diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..ba8cc10 --- /dev/null +++ b/config.ini @@ -0,0 +1,17 @@ +# Use this file to add details of custom dataset. +# Dataset images must be organized inside folders named same as the class label (i-e: 0 to 9 for CIFAR10) + +[CUSTOMDATASETINFO] +path = ../perturbed_datasets/ +# defines percentage of dataset to be loaded +loadDatasetPercentage = 100 +numOfClasses = 10 + +# if set yes, normalization transformation is added to data loader +doNormalize = no +mean = (0.4914, 0.4822, 0.4465) +std = (0.2023, 0.1994, 0.2010) + +imgHeight = 32 +imgWidth = 32 +testsetSize = 10000 diff --git a/src/injections.py b/src/injections.py index 3893554..e6d7d8b 100644 --- a/src/injections.py +++ b/src/injections.py @@ -44,8 +44,6 @@ def rand_neurons_batch(pfi_model, layer, shape, maxval, batchsize, function=-1): ) - - if __name__ == "__main__": # Read in cmd line args @@ -117,12 +115,21 @@ def rand_neurons_batch(pfi_model, layer, shape, maxval, batchsize, function=-1): # init PyTorchFI baseC = 3 - if "IMAGENET" in getDataset(): + if "imagenet" in getDataset().lower(): baseH = 224 baseW = 224 - elif "CIFAR" in getDataset(): + elif "cifar" in getDataset().lower(): baseH = 32 baseW = 32 + elif "custom" in getDataset().lower(): + # Read config.ini file + config_object = ConfigParser() + config_object.read("../config.ini") + + # Get the dataset details + dataset_info = config_object["CUSTOMDATASETINFO"] + baseH = int(dataset_info["imgHeight"]) + baseW = int(dataset_info["imgWidth"]) exp_bits = getBitwidth() - getRadix() - 1 # also INT for fixed point mantissa_bits = getRadix() # also FRAC for fixed point @@ -170,7 +177,10 @@ def rand_neurons_batch(pfi_model, layer, shape, maxval, batchsize, function=-1): pbar.update(samples) # prep images + # try: images, labels, img_ids, index = dataiter.next() + # except StopIteration: + # break if getCUDA_en(): labels = labels.cuda() images = images.cuda() diff --git a/src/profile.py b/src/profile.py index 848b6ec..9aae81e 100644 --- a/src/profile.py +++ b/src/profile.py @@ -127,7 +127,15 @@ def save_data_df(path, file_name, data): elif "CIFAR" in getDataset(): baseH = 32 baseW = 32 - + elif "custom" in getDataset().lower(): + # Read config.ini file + config_object = ConfigParser() + config_object.read("../config.ini") + + # Get the dataset details + dataset_info = config_object["CUSTOMDATASETINFO"] + baseH = int(dataset_info["imgHeight"]) + baseW = int(dataset_info["imgWidth"]) goldeneye_model = goldeneye( model, diff --git a/src/split_data.py b/src/split_data.py index fd828f2..4e69ace 100644 --- a/src/split_data.py +++ b/src/split_data.py @@ -4,14 +4,17 @@ Randomizes and returns two lists. Split is between 0-1, and refers to the size of the rank set. Example, .8 means 80/20 split ''' + + def gen_sets(golden_indices, split): total = len(golden_indices) - split_index = int(total*split) + split_index = int(total * split) randomized = random.sample(golden_indices, total) rank_set = randomized[0:split_index] - test_set = randomized[split_index : ] + test_set = randomized[split_index:] return rank_set, test_set + if __name__ == '__main__': # Read in cmd line args check_args(sys.argv[1:]) @@ -30,7 +33,6 @@ def gen_sets(golden_indices, split): bitwidth_fp = getBitwidth() quant_en = False - name = getDNN() + "_" + getDataset() + "_real" + getPrecision() + "_sim" + format + "_bw" + str(bitwidth_fp) \ + "_r" + str(getRadix()) + "_bias" + str(getBias()) @@ -39,27 +41,36 @@ def gen_sets(golden_indices, split): golden_data = load_file(netProfilePath + "golden_data") split_ratio = .8 - # generate an Analysis Set (AS) and Deployment Set (DS) - if "IMAGENET" in getDataset(): images_base = list(range(0,50000)) - elif "CIFAR" in getDataset(): images_base = list(range(0,10000)) - + if getDataset().upper() == "IMAGENET": + images_base = list(range(0, 50000)) + elif getDataset().upper() == "CIFAR10": + images_base = list(range(0, 10000)) + elif "custom" in getDataset().lower(): + # Read config.ini file + config_object = ConfigParser() + config_object.read("../config.ini") + # find dataset size + dataset_info = config_object["CUSTOMDATASETINFO"] + testset_size = int(dataset_info["testsetSize"]) + dataset_percentage = int(dataset_info["loadDatasetPercentage"]) + images_base = list(range(0, int(10000 * (dataset_percentage / 100)))) random.seed(9001) - analysis_set, deployment_set= gen_sets(images_base, split_ratio) + analysis_set, deployment_set = gen_sets(images_base, split_ratio) save_data(outPath, "analysis_set", analysis_set) save_data(outPath, "deployment_set", deployment_set) - random.seed() #back to randomness + random.seed() # back to randomness # generate a list from the correct images in AS and DS # Also drop images where top2diff is 0 ASgoodImgs = [] DSgoodImgs = [] for i in analysis_set: - if golden_data[i][0] == golden_data[i][1] and golden_data[i][3] > 0 : + if golden_data[i][0] == golden_data[i][1] and golden_data[i][3] > 0: ASgoodImgs.append(i) for i in deployment_set: - if golden_data[i][0] == golden_data[i][1] and golden_data[i][3] > 0 : + if golden_data[i][0] == golden_data[i][1] and golden_data[i][3] > 0: DSgoodImgs.append(i) save_data(outPath, "rank_set_good", ASgoodImgs) @@ -68,24 +79,24 @@ def gen_sets(golden_indices, split): # CSVs of imgs f = open(outPath + "AS.csv", "w+") for i in range(len(analysis_set)): - outputString = "%d\n" %(analysis_set[i]) + outputString = "%d\n" % (analysis_set[i]) f.write(outputString) f.close() f = open(outPath + "DS.csv", "w+") for i in range(len(deployment_set)): - outputString = "%d\n" %(deployment_set[i]) + outputString = "%d\n" % (deployment_set[i]) f.write(outputString) f.close() f = open(outPath + "AS_good.csv", "w+") for i in range(len(ASgoodImgs)): - outputString = "%d\n" %(ASgoodImgs[i]) + outputString = "%d\n" % (ASgoodImgs[i]) f.write(outputString) f.close() f = open(outPath + "DS_good.csv", "w+") for i in range(len(DSgoodImgs)): - outputString = "%d\n" %(DSgoodImgs[i]) + outputString = "%d\n" % (DSgoodImgs[i]) f.write(outputString) f.close() diff --git a/src/util.py b/src/util.py index c4682a7..f24698f 100644 --- a/src/util.py +++ b/src/util.py @@ -10,12 +10,15 @@ import timm import numpy as np from num_sys_class import * -from othermodels import resnet, vgg, cifar10_nn +from othermodels import resnet, vgg +from sklearn.model_selection import StratifiedKFold + +from configparser import ConfigParser ''' Environment Variables ''' -DATASETS = os.environ['ML_DATASETS'] +# DATASETS = os.environ['ML_DATASETS'] ''' Helper functions to parse input @@ -177,27 +180,84 @@ def check_args(args=None): # assert (quantize_in) +class StratifiedBatchSampler: + """Stratified batch sampling + Provides equal representation of target classes in each batch + """ + + def __init__(self, y, batch_size, shuffle=True): + if torch.is_tensor(y): + y = y.cpu().numpy() + assert len(y.shape) == 1, 'label array must be 1D' + n_batches = int(len(y) / batch_size) + self.skf = StratifiedKFold(n_splits=n_batches, shuffle=shuffle) + self.X = torch.randn(len(y), 1).cpu().numpy() + self.y = y + self.shuffle = shuffle + + def __iter__(self): + if self.shuffle: + self.skf.random_state = torch.randint(0, int(1e8), size=()).item() + for train_idx, test_idx in self.skf.split(self.X, self.y): + yield test_idx + + def __len__(self): + return len(self.y) + + def getBatchsize(): return batchsize_in + + def getDNN(): return dnn_in + + def getDataset(): return dataset_in + + def getFormat(): return format_in + + def getPrecision(): return precision_in + + def getOutputDir(): return output_in + + def getCUDA_en(): return cuda_in + + def getInjections(): if injections_in != -1 and injectionsLoc_in == 0: print("Warning: No injection location. Please include \"-I\" flag with value.") return injections_in + + def getInjectionsLocation(): return injectionsLoc_in + + def getRadix(): return radix_in + + def getBitwidth(): return bitwidth_in + + def getBias(): return bias_in + + def getTraining_en(): return training_in + + def getWorkers(): return workers_in + + def getQuantize_en(): return quantize_in + + # def getQuantizeBits(): return QUANTIZE_BITS # def getSingleBitFlip_en(): return singlebitflip_in def getVerbose(): return verbose_in + + def getDebug(): return debug_in @@ -220,6 +280,46 @@ def str2bool(v): else: raise argparse.ArgumentTypeError('Boolean value expected.') + +def parseTupleFromString(set_str): + """" + parses tuple from a string. For example, given "(x, y, z)", it will return a set containing x, y, z. + """ + set_str = set_str.strip()[1:-1] # remove brackets + nums = set_str.split(",") + + parsed_nums = list() + for num in nums: + parsed_nums.append(float(num.strip())) + + return tuple(parsed_nums) + + +def parseConfig(): + # Read config.ini file + config_object = ConfigParser() + config_object.read("../config.ini") + + config = {} + + # Get the dataset details + dataset_info = config_object["CUSTOMDATASETINFO"] + + config["numOfClasses"] = int(dataset_info["numOfClasses"]) + config["loadDatasetPercentage"] = int(dataset_info["loadDatasetPercentage"]) + config["path"] = dataset_info["path"] + if dataset_info["doNormalize"].lower() != "no" and dataset_info["doNormalize"].lower() != "false": + config["doNormalize"] = True + config["mean"] = parseTupleFromString(dataset_info["mean"]) + config["std"] = parseTupleFromString(dataset_info["std"]) + else: + config["doNormalize"] = False + config["mean"] = None + config["std"] = None + + return config + + # def getNumSysName(name): # if name == "fp32": # return num_fp32 @@ -232,57 +332,105 @@ def str2bool(v): # returns the number of classes for common datasets def getNumClasses(dataset): - if(dataset == 'CIFAR10'): + dataset = dataset.lower() + if (dataset == 'cifar10'): return 10 - elif(dataset == 'CIFAR100'): + elif (dataset == 'cifar100'): return 100 - elif(dataset == 'IMAGENET'): + elif (dataset == 'imagenet'): return 1000 + elif (dataset == 'custom'): + config = parseConfig() + return config["numOfClasses"] def getNetwork(networkName, DATASET): ####### IMAGENET ####### FB_repo = 'facebookresearch/deit:main' - if DATASET == 'IMAGENET': + if "custom" in DATASET.lower(): # covers CIFAR100 and custom cifar10, cifar100 datasets + if networkName == "resnet18": + MODEL = resnet.resnet18(pretrained=True) + elif networkName == "resnet34": + MODEL = resnet.resnet34(pretrained=True) + elif networkName == "vgg19_bn": + MODEL = vgg.vgg19_bn(pretrained=True) + elif networkName == "cifar10_nn_baseline": + MODEL = cifar10_nn.baseline(pretrained=True, output_size=getNumClasses(DATASET)) + elif networkName == "cifar10_nn_v1": + MODEL = cifar10_nn.v1(pretrained=True, output_size=getNumClasses(DATASET)) + elif networkName == "cifar10_nn_v2": + MODEL = cifar10_nn.v2(pretrained=True, output_size=getNumClasses(DATASET)) + + if DATASET.lower() == 'imagenet': # Convolution Neural Networks - if networkName == "alexnet": MODEL = models.alexnet(pretrained=True, progress=True) - elif networkName == "vgg11": MODEL = models.vgg11(pretrained=True, progress=True) - elif networkName == "vgg13": MODEL = models.vgg13(pretrained=True, progress=True) - elif networkName == "vgg16": MODEL = models.vgg16(pretrained=True, progress=True) - elif networkName == "vgg19": MODEL = models.vgg19(pretrained=True, progress=True) - elif networkName == "vgg11_bn": MODEL = models.vgg11(pretrained=True, progress=True) - elif networkName == "vgg13_bn": MODEL = models.vgg13(pretrained=True, progress=True) - elif networkName == "vgg16_bn": MODEL = models.vgg16(pretrained=True, progress=True) - elif networkName == "vgg19_bn": MODEL = models.vgg19(pretrained=True, progress=True) - elif networkName == "resnet18": MODEL = models.resnet18(pretrained=True, progress=True) - elif networkName == "resnet34": MODEL = models.resnet34(pretrained=True, progress=True) - elif networkName == "resnet50": MODEL = models.resnet50(pretrained=True, progress=True) - elif networkName == "resnet101": MODEL = models.resnet101(pretrained=True, progress=True) - elif networkName == "resnet152": MODEL = models.resnet152(pretrained=True, progress=True) - elif networkName == "squeezenet1_0": MODEL = models.squeezenet1_0(pretrained=True, progress=True) - elif networkName == "squeezenet1_1": MODEL = models.squeezenet1_1(pretrained=True, progress=True) - elif networkName == "densenet121": MODEL = models.densenet121(pretrained=True, progress=True) - elif networkName == "densenet169": MODEL = models.densenet169(pretrained=True, progress=True) - elif networkName == "densenet201": MODEL = models.densenet201(pretrained=True, progress=True) - elif networkName == "densenet161": MODEL = models.densenet161(pretrained=True, progress=True) - elif networkName == "inceptionv3": MODEL = models.inception_v3(pretrained=True, progress=True) - elif networkName == "googlenet": MODEL = models.googlenet(pretrained=True, progress=True) - elif networkName == "shufflenet": MODEL = models.shufflenet_v2_x1_0(pretrained=True, progress=True) - elif networkName == "mobilenet": MODEL = models.mobilenet_v2(pretrained=True, progress=True) - elif networkName == "resnext50_32x4d": MODEL = models.resnext50_32x4d(pretrained=True, progress=True) + if networkName == "alexnet": + MODEL = models.alexnet(pretrained=True, progress=True) + elif networkName == "vgg11": + MODEL = models.vgg11(pretrained=True, progress=True) + elif networkName == "vgg13": + MODEL = models.vgg13(pretrained=True, progress=True) + elif networkName == "vgg16": + MODEL = models.vgg16(pretrained=True, progress=True) + elif networkName == "vgg19": + MODEL = models.vgg19(pretrained=True, progress=True) + elif networkName == "vgg11_bn": + MODEL = models.vgg11(pretrained=True, progress=True) + elif networkName == "vgg13_bn": + MODEL = models.vgg13(pretrained=True, progress=True) + elif networkName == "vgg16_bn": + MODEL = models.vgg16(pretrained=True, progress=True) + elif networkName == "vgg19_bn": + MODEL = models.vgg19(pretrained=True, progress=True) + elif networkName == "resnet18": + MODEL = models.resnet18(pretrained=True, progress=True) + elif networkName == "resnet34": + MODEL = models.resnet34(pretrained=True, progress=True) + elif networkName == "resnet50": + MODEL = models.resnet50(pretrained=True, progress=True) + elif networkName == "resnet101": + MODEL = models.resnet101(pretrained=True, progress=True) + elif networkName == "resnet152": + MODEL = models.resnet152(pretrained=True, progress=True) + elif networkName == "squeezenet1_0": + MODEL = models.squeezenet1_0(pretrained=True, progress=True) + elif networkName == "squeezenet1_1": + MODEL = models.squeezenet1_1(pretrained=True, progress=True) + elif networkName == "densenet121": + MODEL = models.densenet121(pretrained=True, progress=True) + elif networkName == "densenet169": + MODEL = models.densenet169(pretrained=True, progress=True) + elif networkName == "densenet201": + MODEL = models.densenet201(pretrained=True, progress=True) + elif networkName == "densenet161": + MODEL = models.densenet161(pretrained=True, progress=True) + elif networkName == "inceptionv3": + MODEL = models.inception_v3(pretrained=True, progress=True) + elif networkName == "googlenet": + MODEL = models.googlenet(pretrained=True, progress=True) + elif networkName == "shufflenet": + MODEL = models.shufflenet_v2_x1_0(pretrained=True, progress=True) + elif networkName == "mobilenet": + MODEL = models.mobilenet_v2(pretrained=True, progress=True) + elif networkName == "resnext50_32x4d": + MODEL = models.resnext50_32x4d(pretrained=True, progress=True) # transformers - elif networkName == "vit_base": MODEL = timm.create_model("vit_base_patch16_224", pretrained=True) - elif networkName == "deit_base": MODEL = torch.hub.load(FB_repo, 'deit_base_patch16_224', pretrained=True) - elif networkName == "deit_tiny": MODEL = torch.hub.load(FB_repo, 'deit_tiny_patch16_224', pretrained=True) + elif networkName == "vit_base": + MODEL = timm.create_model("vit_base_patch16_224", pretrained=True) + elif networkName == "deit_base": + MODEL = torch.hub.load(FB_repo, 'deit_base_patch16_224', pretrained=True) + elif networkName == "deit_tiny": + MODEL = torch.hub.load(FB_repo, 'deit_tiny_patch16_224', pretrained=True) # Error else: sys.exit("Network does not exist") - elif DATASET == 'CIFAR10' or DATASET == 'CIFAR100': + elif "cifar10" in DATASET.lower(): # covers CIFAR100 and custom cifar10, cifar100 datasets if networkName == "resnet18": MODEL = resnet.resnet18(pretrained=True) + elif networkName == "resnet34": + MODEL = resnet.resnet34(pretrained=True) elif networkName == "vgg19_bn": MODEL = vgg.vgg19_bn(pretrained=True) elif networkName == "cifar10_nn_baseline": @@ -304,14 +452,15 @@ def getNetwork(networkName, DATASET): return MODEL + def load_dataset(DATASET, BATCH_SIZE, workers=0, training=False, shuffleIn=False, include_id=True): - if DATASET == 'CIFAR10': + if DATASET.lower() == 'cifar10': transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize((0.4914,0.4822,0.4465), (0.2023,0.1994,0.2010)) - ] - ) + [ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + ] + ) if include_id: testset = IdCifar10(root='./data', train=training, download=True, transform=transform) @@ -319,16 +468,16 @@ def load_dataset(DATASET, BATCH_SIZE, workers=0, training=False, shuffleIn=False testset = datasets.CIFAR10(root='./data', train=training, download=True, transform=transform) test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, - shuffle=shuffleIn, num_workers=workers, pin_memory=True) + shuffle=shuffleIn, num_workers=workers, pin_memory=True) dataiter = iter(test_loader) - elif DATASET == 'CIFAR100': + elif DATASET.lower() == 'cifar100': transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize((0.4914,0.4822,0.4465), (0.2023,0.1994,0.2010)) - ] - ) + [ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + ] + ) if include_id: testset = IdCifar100(root='./data', train=training, download=True, transform=transform) @@ -336,16 +485,16 @@ def load_dataset(DATASET, BATCH_SIZE, workers=0, training=False, shuffleIn=False testset = datasets.CIFAR100(root='./data', train=training, download=True, transform=transform) test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, - shuffle=shuffleIn, num_workers=workers, pin_memory=True) + shuffle=shuffleIn, num_workers=workers, pin_memory=True) dataiter = iter(test_loader) - elif DATASET == 'IMAGENET': + elif DATASET.lower() == 'imagenet': if training == False: valdir = os.path.join(DATASETS + '/imagenet/', 'val') else: valdir = os.path.join(DATASETS + '/imagenet/', 'train') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) + std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), @@ -353,19 +502,92 @@ def load_dataset(DATASET, BATCH_SIZE, workers=0, training=False, shuffleIn=False normalize, ]) - if include_id: images = IdImageFolder(valdir, transform=transform) - else: images = datasets.ImageFolder(valdir, transform=transform) + if include_id: + images = IdImageFolder(valdir, transform=transform) + else: + images = datasets.ImageFolder(valdir, transform=transform) val_loader = torch.utils.data.DataLoader(images, batch_size=BATCH_SIZE, shuffle=shuffleIn, num_workers=workers, pin_memory=True) dataiter = iter(val_loader) + elif "custom" in DATASET.lower(): + # read config details from config.ini + config = parseConfig() + dataset_percentage = config["loadDatasetPercentage"] + dataset_path = config["path"] + if config["doNormalize"]: + norm_mean = config["mean"] + norm_std = config["std"] + dataiter = iter( + load_id_custom_dataset(DATASET, BATCH_SIZE, norm_mean, norm_std, dataset_path=dataset_path, + workers=workers, training=training, shuffleIn=shuffleIn, + include_id=include_id)) + else: + dataiter = iter( + load_id_custom_dataset(DATASET, BATCH_SIZE, norm_mean=None, norm_std=None, dataset_path=dataset_path, + workers=workers, training=training, shuffleIn=shuffleIn, + include_id=include_id)) + return dataiter +def load_id_custom_dataset(DATASET, BATCH_SIZE, norm_mean, norm_std, dataset_path, dataset_percentage=100, workers=0, + training=False, shuffleIn=False, include_id=True): + transform_list = [ + transforms.ToTensor(), + ] + + if norm_mean: # not all datasets are to be normalized + transform_list.append(transforms.Normalize(norm_mean, norm_std)) + + transform = transforms.Compose(transform_list) + + if include_id: + train_dataset = IdImageFolder(root=dataset_path, transform=transform) + else: + train_dataset = datasets.ImageFolder(root=dataset_path, transform=transform) + + # take subsets of dataset + # load full dataset + if dataset_percentage == 100: + data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=shuffleIn, + num_workers=workers, pin_memory=True) + else: # load mentioned percentage of dataset + batch_size = BATCH_SIZE + validation_split = int(dataset_percentage) / 100 + shuffle_dataset = shuffleIn + random_seed = 42 # for reproducibility + + # Creating data indices for training and validation splits: + dataset_size = len(train_dataset) + indices = list(range(dataset_size)) + split = int(np.floor(validation_split * dataset_size)) + if shuffle_dataset: + np.random.seed(random_seed) + np.random.shuffle(indices) + train_indices, val_indices = indices[split:], indices[:split] + + # Creating PT data samplers and loaders: + valid_sampler = torch.utils.data.SubsetRandomSampler(val_indices) + + data_loader = torch.utils.data.DataLoader(train_dataset, + batch_size=BATCH_SIZE, + shuffle=shuffleIn, + num_workers=workers, + sampler=valid_sampler, + # batch_sampler=StratifiedBatchSampler(batch_size=BATCH_SIZE, + # y=np.array(train_dataset.targets), + # shuffle=False), + pin_memory=True) + + return data_loader + + # total_data refers to the total size of the data_loader, for all images desired +# DATASET, BATCH_SIZE, workers=0, training=False, shuffleIn=False, include_id=True, data_dir="./custom_data/"): def load_custom_dataset(NETWORK, DATASET, BATCH_SIZE, good_images, total_data, - workers = 0, random=True, replacement=True, single=False, singleIndex=0): + workers=0, random=True, replacement=True, single=False, singleIndex=0): if random: if replacement: if single == False: @@ -378,55 +600,72 @@ def load_custom_dataset(NETWORK, DATASET, BATCH_SIZE, good_images, total_data, else: custom_sampler = get_custom_sampler_full(good_images) - if DATASET == 'CIFAR10': - transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize((0.4914,0.4822,0.4465), (0.2023,0.1994,0.2010)) - ] - ) - - testset = IdCifar10(root='./data', train=False, - download=True, transform=transform) - test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, - sampler=custom_sampler, num_workers=workers, pin_memory=True) - dataiter = iter(test_loader) - - if DATASET == 'CIFAR100': - transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize((0.4914,0.4822,0.4465), (0.2023,0.1994,0.2010)) - ] - ) - - testset = IdCifar100(root='./data', train=False, - download=True, transform=transform) - test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, - sampler=custom_sampler, num_workers=workers, pin_memory=True) - dataiter = iter(test_loader) - - if DATASET == 'IMAGENET': + if "custom" in DATASET.lower(): + # read config details from config.ini + config = parseConfig() + dataset_percentage = config["loadDatasetPercentage"] + dataset_path = config["path"] + if config["doNormalize"]: + norm_mean = config["mean"] + norm_std = config["std"] + else: + norm_mean = None + norm_std = None - valdir = os.path.join(DATASETS + '/imagenet/', 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - images = IdImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])) - val_loader = torch.utils.data.DataLoader(images, batch_size=BATCH_SIZE, - num_workers = workers, sampler=custom_sampler, pin_memory=True) - dataiter = iter(val_loader) + data_loader = load_id_custom_dataset(DATASET, BATCH_SIZE, norm_mean=norm_mean, norm_std=norm_std, + dataset_path=dataset_path, + dataset_percentage=dataset_percentage, workers=workers, include_id=True) + dataiter = iter(data_loader) + + if DATASET.lower() == 'cifar10': + transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + ] + ) + + testset = IdCifar10(root='./data', train=False, + download=True, transform=transform) + test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, + sampler=custom_sampler, num_workers=workers, pin_memory=True) + dataiter = iter(test_loader) + + if DATASET.lower() == 'cifar100': + transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + ] + ) + + testset = IdCifar100(root='./data', train=False, + download=True, transform=transform) + test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, + sampler=custom_sampler, num_workers=workers, pin_memory=True) + dataiter = iter(test_loader) + + if DATASET.lower() == 'imagenet': + valdir = os.path.join(DATASETS + '/imagenet/', 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + images = IdImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])) + val_loader = torch.utils.data.DataLoader(images, batch_size=BATCH_SIZE, + num_workers=workers, sampler=custom_sampler, pin_memory=True) + dataiter = iter(val_loader) return dataiter + class IdCifar10(datasets.CIFAR10): def __init__(self, root, train=False, - transform=None, target_transform=None, - download=False): + transform=None, target_transform=None, + download=False): super(datasets.CIFAR10, self).__init__(root) self.transform = transform @@ -436,7 +675,7 @@ def __init__(self, root, train=False, self.download() if not self._check_integrity(): raise RuntimeError('Dataset not found or corrupted.' + - ' You can use download=True to download it') + ' You can use download=True to download it') if self.train: downloaded_list = self.train_list else: @@ -465,28 +704,29 @@ def __init__(self, root, train=False, def __getitem__(self, index): img, target, path = self.data[index], self.targets[index], self.img_names[index] - #img = Image.fromarray(img) + # img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target, path, index + class IdCifar100(IdCifar10): base_folder = 'cifar-100-python' url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" filename = "cifar-100-python.tar.gz" tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' train_list = [ - ['train', '16019d7e3df5f24257cddd939b257f8d'], + ['train', '16019d7e3df5f24257cddd939b257f8d'], ] test_list = [ - ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], + ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], ] meta = { - 'filename': 'meta', - 'key': 'fine_label_names', - 'md5': '7973b15100ade9c7d40fb424638fde48', + 'filename': 'meta', + 'key': 'fine_label_names', + 'md5': '7973b15100ade9c7d40fb424638fde48', } @@ -496,29 +736,38 @@ def __getitem__(self, index): path = self.imgs[index][0] return item[0], item[1], path, index + class Custom_Sampler(torch.utils.data.Sampler): def __init__(self, data): self.data = data + def __iter__(self): return iter(self.data) + def __len__(self): return len(self.data) + """ Edit this to make the random selector Input: list of good indices Return: list of indices that will be used to load data """ + + def random_selector(indices, total): return random.choices(indices, k=total) + def single_selector(indices, index, total): single_index = [indices[index]] return random.choices(single_index, k=total) + def random_selector_no_replacement(indices, total): return random.sample(indices, k=total) + def get_custom_sampler(indices, total): # Use random sampling with replacement indices = random_selector(indices, total) @@ -528,6 +777,7 @@ def get_custom_sampler(indices, total): return sampler + def get_custom_sampler_single(indices, index, total): # Use random sampling with replacement indices = single_selector(indices, index, total) @@ -537,6 +787,7 @@ def get_custom_sampler_single(indices, index, total): return sampler + def get_custom_sampler_no_replacement(indices, total): # Use random sampling with replacement indices = random_selector_no_replacement(indices, total) @@ -546,6 +797,7 @@ def get_custom_sampler_no_replacement(indices, total): return sampler + def get_custom_sampler_full(indices): # Create custom sampler sampler = Custom_Sampler(indices) @@ -563,6 +815,7 @@ def getMaxClass(tensor, dim=0): diff_top2 = (top2[0] - top2[1]) * 100 return argmax, conf, diff_top2.item() + def getMaxClass_parallel(tensor_in, dim=0): tensor = F.softmax(tensor_in) argmax = torch.argmax(tensor).item() @@ -571,12 +824,14 @@ def getMaxClass_parallel(tensor_in, dim=0): diff_top2 = (top2[0] - top2[1]) * 100 return argmax, conf, diff_top2.item() + # from https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum(axis=0) + def diff_top2(data): return (data[0] - data[1]).item() * 100 @@ -584,22 +839,22 @@ def diff_top2(data): ################################################################# ##################### HELPER METHODS FOR I/O #################### ################################################################# -def save_data(path, file_name, data, compress = True): +def save_data(path, file_name, data, compress=True): if not os.path.exists(path): os.makedirs(path) output = path + file_name + ".p" - f = bz2.BZ2File(output + ".bz2","wb") if compress else open(fname,"wb") + f = bz2.BZ2File(output + ".bz2", "wb") if compress else open(fname, "wb") cPickle.dump(data, f) f.close() - -def load_file(file_name, compress = True): - f = bz2.BZ2File(file_name + '.p.bz2', "rb") if compress else open(file_name.strip('.p.bz2'),"rb") - fileIn= cPickle.load(f) +def load_file(file_name, compress=True): + f = bz2.BZ2File(file_name + '.p.bz2', "rb") if compress else open(file_name.strip('.p.bz2'), "rb") + fileIn = cPickle.load(f) f.close() return fileIn + ################################################################# ################### HELPER METHODS FOR NUMSYS ################### ################################################################# @@ -608,7 +863,7 @@ def getNumSysName(name, bits=16, radix_up=5, radix_down=10, bias=None): if name == "fp32": return num_fp32(), name if name == "INT": - assert(getQuantize_en()) + assert (getQuantize_en()) return num_fp32(), name elif name == "fp16": return num_fp16(), name @@ -627,4 +882,3 @@ def getNumSysName(name, bits=16, radix_up=5, radix_down=10, bias=None): else: sys.exit("Number format not supported") -