In [None]:

%%capture

import import_ipynb
from neural_networks import *


In [None]:

def tensor(delta, x):
	x[x == None] = 0
	delta[delta == None] = 0
	return outer(delta, x)


In [None]:

source = array([ 1.5, 2.5 ])
target = array([ 0.427, -0.288 ])
vector = hstack([source, target])
vector


In [None]:

def num_inputs(w): return sum(inputs(w))
def num_outputs(w): return sum(outputs(w))
def num_biases(w): return sum(biases(w))
def num_neurons(w): return sum(neurons(w))

num_inputs(w), num_outputs(w), num_biases(w), num_neurons(w)


In [None]:

def single_weight_gradient(vector, w, case = 'square'):
	num_in = num_inputs(w)
	num_out = num_outputs(w)
	source, target = vector[:num_in], vector[-num_out:]
	xminus, x = inject_source(source, w)
	xminus, x = forward_prop(xminus, x, w)
	delta = inject_target(xminus, target, w, case = case)
	delta = backward_prop(xminus, x, delta, w)
	y = xminus[outputs(w)]
	loss = J(y, target, w, case = case)
	# gradJ has same shape as w
	gradJ = tensor(delta, x)
	# cast loss to array same shape as w
	loss = full(shape(w), loss)
	return array([loss, gradJ]) 


In [None]:

# update only weights on edges

def update_weights(w, gradJ, learning_rate):
	w[edges(w)] -= learning_rate * gradJ[edges(w)]
	return w


In [None]:

def single_sample_training(vector, w, lr, iters, random = 'no', case = 'square'):
	W = initial_weights(w, random = random)
	trajectory = [ ]
	for iter in range(iters):
		loss, gradJ = single_weight_gradient(vector, W, case = case)
		W = update_weights(W, gradJ, lr)
		# recover scalar loss
		loss = loss[0,0]
		if isclose(0,loss): break
		trajectory.append(loss)
	return W, trajectory


In [None]:

lr = .045
iters = 100
wstar, trajectory = single_sample_training(vector, w, lr, iters)


In [None]:

from matplotlib.pyplot import *

for lr in [.025,.035, .045,.047]:
	wstar, trajectory = single_sample_training(vector, w, lr, iters)
	n = len(trajectory)
	label = f'{n}, {lr}'
	plot(range(n), trajectory, label = label)

title('single sample loss function decay')
grid()
legend()
show()


In [None]:

def batch_weight_gradient(dataset, w, case = 'square'):
	swg = lambda v: single_weight_gradient(v, w, case = case)
	weight_gradients = array([ swg(v) for v in dataset ])
	return mean(weight_gradients, axis = 0)


In [None]:

def batch_sample_training(dataset, w, lr, epochs, random = 'no', case = 'square'):
	W = initial_weights(w, random = random)
	trajectory = [ ]
	for epoch in range(epochs):
		loss, gradJ = batch_weight_gradient(dataset, W, case = case)
		W = update_weights(W, gradJ, lr)
		# recover scalar loss
		loss = loss[0,0]
		if isclose(0,loss): break
		trajectory.append(loss)
	return W, trajectory


In [None]:

sources = array([ [ 1.5, 2.5 ], [1.2,3.1], [7.1,8.2] ])
targets = array([ [ 0.427, -0.288 ], [1.1,-.4], [1.2,3.4 ] ])
dataset = hstack([sources, targets])

epochs = 60

for lr in [.05,.06,.07,.08]:
	wstar, trajectory = batch_sample_training(dataset, w, lr, epochs)
	n = len(trajectory)
	label = f'{n}, {lr}'
	plot(range(n), trajectory, label = label)

title('batch sample loss function decay')
grid()
legend()
show()


In [None]:

def stochastic_sample_training(dataset, w, lr, epochs, random = 'no', case = 'square'):
	W = initial_weights(w, random = random)
	trajectory = [ ]
	for n, epoch in enumerate(range(epochs), start = 1):
		losses = [ ]
		rng().shuffle(dataset)
		for vector in dataset:
			loss, gradJ = single_weight_gradient(vector, W, case = case)
			W = update_weights(W, gradJ, lr/n)
			# append scalar loss
			losses.append(loss[0,0])
		loss = mean(losses)
		if isclose(0,loss): break
		trajectory.append(loss)
	return W, trajectory


In [None]:

def minibatch_sample_training(dataset, w, lr, s, epochs, random = 'no', case = 'square'):
	W = initial_weights(w, random = random)
	trajectory = [ ]
	N = len(dataset)
	for n, epoch in enumerate(range(epochs), start = 1):
		losses = [ ]
		rng().shuffle(dataset)
		minibatches = arange(0, N, s)
		for start in minibatches:
			end = start + s
			minibatch = dataset[start: end]
			loss, gradJ = batch_weight_gradient(minibatch, W, case = case)
			W = update_weights(W, gradJ, lr/n)
			# append scalar loss
			losses.append(loss[0,0])
		loss = mean(losses)
		if isclose(0,loss): break
		trajectory.append(loss)
	return W, trajectory


In [None]:

dataset = array([
[ 0.99335999,  1.        ,  0.        ],
[-0.8943543 ,  0.        ,  1.        ],
[ 0.87709524,  1.        ,  0.        ],
[-0.61427175,  0.        ,  1.        ],
[ 0.53202877,  1.        ,  0.        ],
[ 1.10156379,  1.        ,  0.        ],
[ 0.51760267,  1.        ,  0.        ],
[-1.30845517,  0.        ,  1.        ],
[ 0.47808674,  1.        ,  0.        ],
[-1.13024748,  0.        ,  1.        ]])


In [None]:

mu,  N = 0, 50
#  sdev = 0.5, 1.0, 2.0, 10.0
sdev = 0.5
n, p = 1, 0.5

targets = rng().binomial(n,p,N)
# source mean equals pm1 according to target=1 or 0
sources = rng().normal(mu,sdev,N) + (2*targets - 1)
# one-hot encoded targets
targets = array([targets, 1-targets]).T

dataset = column_stack([sources, targets])
