First, we will add the code to download all user-labeled images from the production server:
import tensorflow as tf import os import json import random import requests import shutil from scipy.misc import imread, imsave from datetime import datetime from tqdm import tqdm import nets, models, datasets def ensure_folder_exists(folder_path): if not os.path.exists(folder_path): os.mkdir(folder_path) return folder_path def download_user_data(url, user_dir, train_ratio=0.8): response = requests.get("%s/user-labels" % url) data = json.loads(response.text) if not os.path.exists(user_dir): os.mkdir(user_dir) user_dir = ensure_folder_exists(user_dir) train_folder = ensure_folder_exists(os.path.join(user_dir, "trainval")) test_folder = ensure_folder_exists(os.path.join(user_dir, "test")) train_file = open(os.path.join(user_dir, 'trainval.txt'), 'w') test_file = open(os.path.join(user_dir, 'test.txt'), 'w') for image in data: is_train = random.random() < train_ratio image_url = image["url"] file_name = image_url.split("/")[-1] label = image["label"] name = image["name"] if is_train: target_folder = ensure_folder_exists(os.path.join(train_folder, name)) else: target_folder = ensure_folder_exists(os.path.join(test_folder, name)) target_file = os.path.join(target_folder, file_name) + ".jpg" if not os.path.exists(target_file): response = requests.get("%s%s" % (url, image_url)) temp_file_path = "/tmp/%s" % file_name with open(temp_file_path, 'wb') as f: for chunk in response: f.write(chunk) image = imread(temp_file_path) imsave(target_file, image) os.remove(temp_file_path) print("Save file: %s" % target_file) label_path = "%s %s " % (label, target_file) if is_train: train_file.write(label_path) else: test_file.write(label_path)
In download_user_data, we call the /user-labels endpoint to get the list of user-labeled images. The JSON has the following format:
[ { "id": 1, "label": 0, "name": "Abyssinian", "url": "/uploads/2017-05-23_14-56-45_Abyssinian-cat.jpeg" }, { "id": 2, "label": 32, "name": "Siamese", "url": "/uploads/2017-05-23_14-57-33_fat-Siamese-cat.jpeg" } ]
In this JSON, label is the label that the user has chosen, and URL is the link to download the image from. For every image, we will download it into the tmp folder and use imread and imsave from scipy to make sure that the image is in JPEG format. We also create a trainval.txt and test.txt file, as in the training dataset.