diff --git a/chapter01/chapter01.py b/chapter01/chapter01.py index 3c4488b..e37a3fa 100644 --- a/chapter01/chapter01.py +++ b/chapter01/chapter01.py @@ -79,8 +79,6 @@ def is_cat(x): return x[0].isupper() ''' -from fastai.tabular.all import * - path = untar_data(URLs.ADULT_SAMPLE) dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names="salary", @@ -95,8 +93,6 @@ def is_cat(x): return x[0].isupper() # Exercise 4: Predict number vs categories -from fastai.collab import * - path = untar_data(URLs.ML_SAMPLE) dls = CollabDataLoaders.from_csv(path/'ratings.csv') learn = collab_learner(dls, y_range=(0.5,5.5)) # Indicate target range diff --git a/chapter01/white-cat.jpeg b/chapter01/white-cat.jpeg new file mode 100644 index 0000000..eb1a6b4 Binary files /dev/null and b/chapter01/white-cat.jpeg differ diff --git a/chapter02/chapter02.py b/chapter02/chapter02.py index 2e5f405..8285cf2 100644 --- a/chapter02/chapter02.py +++ b/chapter02/chapter02.py @@ -1,62 +1,74 @@ +! [ -e /content ] && pip install -Uqq fastbook +import fastbook +fastbook.setup_book() -key = 'XXX' -key = os.environ['AZURE_SEARCH_KEY'] +from fastbook import * +from fastai.vision.widgets import * +key = os.environ.get('AZURE_SEARCH_KEY', 'X') search_images_bing results = search_images_bing(key, 'grizzly bear') -ims = results.attrgot('content_url') +ims = results.attrgot('contentUrl') len(ims) +ims = ['http://3.bp.blogspot.com/-S1scRCkI3vY/UHzV2kucsPI/AAAAAAAAA-k/YQ5UzHEm9Ss/s1600/Grizzly%2BBear%2BWildlife.jpg'] dest = 'images/grizzly.jpg' download_url(ims[0], dest) im = Image.open(dest) im.to_thumb(128,128) +# Download all the URLs for each of the search terms, each on a separate folder + bear_types = 'grizzly','black','teddy' path = Path('bears') -if not path.exists(): path.mkdir() -for o in bear_types: dest = (path/o) - dest.mkdir(exist_ok=True) - results = search_images_bing(key, f'{o} bear') - download_images(dest, urls=results.attrgot('content_url')) - + +if not path.exists(): + path.mkdir() + for o in bear_types: + dest = (path/o) + dest.mkdir(exist_ok=True) + results = search_images_bing(key, f'{o} bear') + download_images(dest, urls=results.attrgot('contentUrl')) + fns = get_image_files(path) fns -failed = verify_images(fns) - -failed +# Check and delete corrupt images +failed = verify_images(fns) failed.map(Path.unlink); - class DataLoaders(GetAttr): def __init__(self, *loaders): self.loaders = loaders - def __getitem__(self, i): return self.loaders[i] train,valid = add_props(lambda i,self: self[i]) + def __getitem__(self, i): return self.loaders[i] + train,valid = add_props(lambda i,self: self[i]) bears = DataBlock( - blocks=(ImageBlock, CategoryBlock), - get_items=get_image_files, - splitter=RandomSplitter(valid_pct=0.2, seed=42), - get_y=parent_label, - item_tfms=Resize(128)) - -blocks=(ImageBlock, CategoryBlock) -get_items=get_image_files + # Types for independent and dependent variables + # X. Independent = set of images, prediction source + # Y. Dependent = categories, target prediction + blocks=(ImageBlock, CategoryBlock), + # Underlying items, file paths + get_items=get_image_files, -splitter=RandomSplitter(valid_pct=0.2, seed=42) + # Random splitter with fixed seed + splitter=RandomSplitter(valid_pct=0.2, seed=42), -get_y=parent_label + # Set labels using the file name + get_y=parent_label, -item_tfms=Resize(128) + # Item transforms. Returns a DataBlock object + item_tfms=Resize(128)) +# Data source +# Includes validation and training +# Mini-batch of 64 items at a time in a single tensor dls = bears.dataloaders(path) - dls.valid.show_batch(max_n=4, nrows=1) bears = bears.new(item_tfms=Resize(128, ResizeMethod.Squish)) @@ -67,20 +79,98 @@ def __getitem__(self, i): return self.loaders[i] train,valid = add_props(lambda dls = bears.dataloaders(path) dls.valid.show_batch(max_n=4, nrows=1) +# Data augmentation. Randomized cropped image bears = bears.new(item_tfms=RandomResizedCrop(128, min_scale=0.3)) dls = bears.dataloaders(path) + +# Same image, different versions of the crop transform dls.train.show_batch(max_n=4, nrows=1, unique=True) +# Data augmentation bears = bears.new(item_tfms=Resize(128), batch_tfms=aug_transforms(mult=2)) dls = bears.dataloaders(path) dls.train.show_batch(max_n=8, nrows=2, unique=True) +# Standard size 244x244 pixel +# Train bear classifier bears = bears.new( - item_tfms=RandomResizedCrop(224, min_scale=0.5), - batch_tfms=aug_transforms()) + item_tfms=RandomResizedCrop(224, min_scale=0.5), + batch_tfms=aug_transforms()) dls = bears.dataloaders(path) -earn = cnn_learner(dls, resnet18, metrics=error_rate) +# Create learner and fine-tune +learn = vision_learner(dls, resnet18, metrics=error_rate) learn.fine_tune(4) +# Confusion matrix to measure accuracy +# Diagonal correct classification + +interp = ClassificationInterpretation.from_learner(learn) +interp.plot_confusion_matrix() + +interp.plot_top_losses(5, nrows=1) + +#hide_output +cleaner = ImageClassifierCleaner(learn) +cleaner + +# Unlink all images selected for deletion +for idx in cleaner.delete(): cleaner.fns[idx].unlink() + +# Different category +for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat) + +learn.export() + +# Check if file exists +path = Path() +path.ls(file_exts='.pkl') + +# Simulating inference +learn_inf = load_learner(path/'export.pkl') + +# Filename to predict +learn_inf.predict('images/grizzly.jpg') + +# Vocab, or stored list of all possible categories +learn_inf.dls.vocab + +# Ipywidget +btn_upload = widgets.FileUpload() +btn_upload + +img = PILImage.create(btn_upload.data[-1]) + +# Display +out_pl = widgets.Output() +out_pl.clear_output() +with out_pl: display(img.to_thumb(128,128)) +out_pl + +# Get predictions +pred,pred_idx,probs = learn_inf.predict(img) + +# Display +lbl_pred = widgets.Label() +lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}' +lbl_pred + +# Classification button +btn_run = widgets.Button(description='Classify') +btn_run + +def on_click_classify(change): + img = PILImage.create(btn_upload.data[-1]) + out_pl.clear_output() + with out_pl: display(img.to_thumb(128,128)) + pred,pred_idx,probs = learn_inf.predict(img) + lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}' + +btn_run.on_click(on_click_classify) + +# Reset values +btn_upload = widgets.FileUpload() + +VBox([widgets.Label('Select your bear!'), + btn_upload, btn_run, out_pl, lbl_pred]) diff --git a/chapter02/teddy-bear.jpg b/chapter02/teddy-bear.jpg new file mode 100644 index 0000000..77f2aad Binary files /dev/null and b/chapter02/teddy-bear.jpg differ