Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

4.4 - CNN Architectures

Open In Colab
!wget -nc --no-cache -O init.py -q https://raw.githubusercontent.com/rramosp/2021.deeplearning/main/content/init.py
import init; init.init(force_download=False);
/content/init.py:2: SyntaxWarning: invalid escape sequence '\S'
  course_id = '\S*deeplearning\S*'
Loading...
replicating local resources
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
%load_ext tensorboard

from sklearn.datasets import *
from local.lib import mlutils
tf.__version__
Loading...
vgg16 = tf.keras.applications.VGG16()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
553467096/553467096 ━━━━━━━━━━━━━━━━━━━━ 3s 0us/step
vgg16.summary()
Loading...
Loading...
Loading...
Loading...
Loading...

Get some random image and transform it. We need the correct shape and pixel values

img_url = 'https://www.autocar.co.uk/sites/autocar.co.uk/files/styles/gallery_slide/public/images/car-reviews/first-drives/legacy/rrswbphev006.jpg?itok=9I7wPblq'
from skimage.io import imread
from skimage.transform import resize
def get_img(img_fname):
    img = imread(img_fname)
    simg = resize(img, output_shape=(224,224,3))
    simg = ((simg-np.min(simg))/(np.max(simg)-np.min(simg))*255).astype(int)
    simg = simg.reshape([1,*simg.shape])
    return simg


simg = get_img('local/imgs/car.jpg')
print(simg.shape, np.min(simg), np.max(simg))
plt.imshow(simg[0])
(1, 224, 224, 3) 0 255
<Figure size 640x480 with 1 Axes>

make and decode prediction

output = vgg16.predict(simg)
output.shape
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 595ms/step
(1, 1000)
plt.plot(output[0])
<Figure size 640x480 with 1 Axes>
np.argsort(output[0])[::-1][:5]
array([656, 436, 609, 654, 734])
from tensorflow.keras.applications.imagenet_utils import decode_predictions
decode_predictions(output)
[[('n03770679', 'minivan', np.float32(0.7349299)), ('n02814533', 'beach_wagon', np.float32(0.109361306)), ('n03594945', 'jeep', np.float32(0.04911455)), ('n03769881', 'minibus', np.float32(0.033921164)), ('n03977966', 'police_van', np.float32(0.014368715))]]

we can also use it as a feature extractor

inputs = vgg16.layers[0]
layer = vgg16.get_layer('fc1')

layer_output_fn = tf.keras.Model(vgg16.inputs, layer.output)
layer_output_fn(simg)
/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.
Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(1, 224, 224, 3))
  warnings.warn(msg)
<tf.Tensor: shape=(1, 4096), dtype=float32, numpy= array([[ 0. , 0. , 0. , ..., 21.892479, 0. , 0. ]], dtype=float32)>

and do other stuff (train other models, compare, etc.)

simg2 = get_img('local/imgs/motorbike.jpg')
simg3 = get_img('local/imgs/citycar.jpg')
simg4 = get_img('local/imgs/bellaire-house.jpg')
plt.figure(figsize=(15,6))
plt.subplot(141); plt.imshow(simg[0]); plt.axis("off")
plt.subplot(142); plt.imshow(simg2[0]); plt.axis("off")
plt.subplot(143); plt.imshow(simg3[0]); plt.axis("off")
plt.subplot(144); plt.imshow(simg4[0]); plt.axis("off")
(np.float64(-0.5), np.float64(223.5), np.float64(223.5), np.float64(-0.5))
<Figure size 1500x600 with 4 Axes>
feats = layer_output_fn(np.vstack((simg,simg2,simg3,simg4))).numpy()
feats.shape
/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.
Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(4, 224, 224, 3))
  warnings.warn(msg)
(4, 4096)
import itertools
r = np.zeros((len(feats), len(feats)))
for i,j in itertools.product(range(len(feats)), range(len(feats))):
    r[i,j] = np.mean(np.abs(feats[i]-feats[j]))
r
array([[0. , 2.55644989, 2.53840065, 2.27824593], [2.55644989, 0. , 3.00784516, 2.43986821], [2.53840065, 3.00784516, 0. , 2.7770021 ], [2.27824593, 2.43986821, 2.7770021 , 0. ]])

and, of course, the imagenet classification

preds = vgg16.predict(np.vstack((simg,simg2,simg3,simg4)))
decode_predictions(preds)
1/1 ━━━━━━━━━━━━━━━━━━━━ 2s 2s/step
[[('n03770679', 'minivan', np.float32(0.7349284)), ('n02814533', 'beach_wagon', np.float32(0.109362334)), ('n03594945', 'jeep', np.float32(0.04911469)), ('n03769881', 'minibus', np.float32(0.03392129)), ('n03977966', 'police_van', np.float32(0.014368754))], [('n04482393', 'tricycle', np.float32(0.4998707)), ('n03791053', 'motor_scooter', np.float32(0.11783641)), ('n03785016', 'moped', np.float32(0.06644621)), ('n03534580', 'hoopskirt', np.float32(0.04639038)), ('n02769748', 'backpack', np.float32(0.042827234))], [('n02930766', 'cab', np.float32(0.64287394)), ('n04252077', 'snowmobile', np.float32(0.046685826)), ('n03459775', 'grille', np.float32(0.044866074)), ('n03445924', 'golfcart', np.float32(0.040118206)), ('n02701002', 'ambulance', np.float32(0.03328232))], [('n02859443', 'boathouse', np.float32(0.40657878)), ('n02825657', 'bell_cote', np.float32(0.17474882)), ('n03028079', 'church', np.float32(0.08881704)), ('n03930313', 'picket_fence', np.float32(0.07153446)), ('n02793495', 'barn', np.float32(0.043169715))]]