#MMLSeminar27jan19 from annoTrask.py 19oct18 ## Numpy is the Python library for array operations import numpy as np ## To round printouts to 3 decimal places in printout np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) ##### re-insert the options ## So outputs(y's) = fcn(inputs (x's) using weights (w's) ## XOR operates on all 4 possible truth values, no sampling ## final 1's absorb bias ##aka linearizing affine transformations, projectivive coords, homogenizing X0 = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]]) ## X0(4,3)=4rows of 3vcrs y0 = np.array([[0,1,1,0]]).T ## XOR, y0(4,1) column vector np.random.seed(1) ##for repeatable experiments W1= 2*np.random.random((3,4))-1 ##W1(3,4) signed fractions w2= 2*np.random.random((4,1))-1 ##w2(4,1) signed fractions print "initial W1" print W1 print "initial w2" print w2 for jj in xrange(600): ## by 600 obvious trend, Trask uses 60,000 Y1 = 1/(1+np.exp(-(np.dot(X0,W1)))) #Y1(4,4) = sigma( X0(4,3)W1(3,4) ) y2 = 1/(1+np.exp(-(np.dot(Y1,w2)))) #y3(4,1) = sigma( Y1(4,4)w2(4,1) ) ## loss aka energy(y2)=.5|y0-y2|^2 so -grad(y2)= y0-y2 ## dy2 for Rumelhart's deltas aka Baydin's adjoints to update the weights dy2= (y0-y2)*y2*(1-y2) ## dy2(4,1), Hadamard arithmetic is termwise w2 += Y1.T.dot(dy2) ##update with backprop: dw2(4,1) = Y1(4,4).T dy2(4,1) dY1= dy2.dot(w2.T)*Y1*(1-Y1) ## w2.T(1,4) W1 += X0.T.dot(dY1) #upate with backprop W1(4,4)=X0.T(4,1)dY1(1,4) outer product #endfor print "final W1=" print W1 print "final w2=" print w2 print "outcome Y1=" print Y1 print "outcome y2=" print y2