from sklearn.utils import shuffle
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm, trange
import matplotlib.pyplot as plt

 X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
label_binarizer = LabelBinarizer()

# transforming all geryscale values to range [0,1]
# 0 being black and 1 beiung white 
X_scaled = X / 255

# transfrom categorical target labels into one-vs-all fashion
y_binarized = label_binarizer.fit_transform(y)

# splitting the data to 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_binarized, test_size=0.2, random_state=42)

print("Number of train samples: ", X_train.shape[0])
print("Number of test samples: ", X_test.shape[0])

 n_figures = 4
fig, axs = plt.subplots(1,n_figures,figsize=(6*n_figures,6)) 
random_idx = np.random.permutation(len(X_train))[:n_figures]
for idx in range(n_figures):
    axs[idx].imshow(X_train[random_idx[idx]].reshape(28,28))
    true_label = np.arange(0,10)[y_train[random_idx[idx]].astype('bool')].item()
    axs[idx].set_title(f"Label = {true_label}")

 import compgraph as cg
from autodiff.backprop import compute_backprop

def relu(x):
    return cg.where(x > 0, x, 0)
l1_weights = cg.VariableNode.create_using(np.random.normal(scale=np.sqrt(2./784), size=(784, 64)), name='l1_w')
l1_bias = cg.VariableNode.create_using(np.zeros(64), name='l1_b')
l2_weights = cg.VariableNode.create_using(np.random.normal(scale=np.sqrt(2./64), size=(64, 10)), name='l2_w')
l2_bias = cg.VariableNode.create_using(np.zeros(10), name='l2_b')


def nn(x):
    l1_activations = relu(cg.dot(x, l1_weights) + l1_bias)
    l2_activations = cg.dot(l1_activations, l2_weights) + l2_bias
    
    return l2_activations

 random_batch_x = cg.ConstantNode.create_using(np.random.normal(size=(10,784)))
output = nn(random_batch_x)
cg.build_and_visualize_graph(output)

 LEARNING_RATE = 0.01
BATCH_SIZE = 32
ITERATIONS = 50000

last1000_losses = []
progress_bar = trange(ITERATIONS)
training_set_pointer = 0
loss_list = []

for i in progress_bar:
    batch_x = X_train[training_set_pointer:training_set_pointer + BATCH_SIZE]
    batch_y = y_train[training_set_pointer:training_set_pointer + BATCH_SIZE]
    
    if training_set_pointer + BATCH_SIZE >= len(y_train):
        # if the training set is consumed, start from the beginning
        training_set_pointer = 0
    else:
        training_set_pointer += BATCH_SIZE
    
    logits = nn(batch_x)
    loss = cg.softmax_cross_entropy(logits, batch_y)
    last1000_losses.append(loss)
    
    progress_bar.set_description(
        "Avg. Loss (Last 1k Iterations): {:.5f}".format(np.mean(last1000_losses))
    )
    
    if len(last1000_losses) == 1000:
        last1000_losses.pop(0)
    
    grads = compute_backprop(loss)
    
    l1_weights -= LEARNING_RATE * grads['l1_w']
    l2_weights -= LEARNING_RATE * grads['l2_w']
    l1_bias -= LEARNING_RATE * grads['l1_b']
    l2_bias -= LEARNING_RATE * grads['l2_b']
    
    loss_list.append(loss.item())

 fig, ax = plt.subplots()
N = 1000
running_mean = np.convolve(loss_list, np.ones(N)/N, mode='valid')
ax.plot(running_mean)
ax.set_xlabel("iteration")
ax.set_ylabel("loss")

 def softmax(x, axis):
    x_max = cg.max(x, axis=axis, keepdims=True)
    exp_op = cg.exp(x - x_max)
    return exp_op/ cg.sum(exp_op, axis=axis, keepdims=True)

logits = nn(X_test)
probabilities = softmax(logits, axis=-1)
predicted_labels = np.argmax(probabilities, axis=-1)
true_labels = np.argmax(y_test, axis=-1)
accuracy = np.mean(predicted_labels == true_labels)

print("Accuracy: {:.2f}%".format(accuracy * 100))

Build your own PyTorch - 3: Training a Neural Network with self-made AD software¶

1. Load Data¶

2. Build neural network¶

4. Run training¶

5. Evaluate the model¶