#19oct18 after Andrew Trask's XOR 1 hidden layer code (modified by GF) #17oct18 rationalize variable names notation #14oct18 continue for MA490 lecture 19oct18 #12mar18 Once more with feeling from Andrew Trask import numpy as np ## C-library for array operations np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) ## round printouts to 3 decimal places X0 = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]]) ## X0(4,3)=4rows of 3vcrs y0 = np.array([[0,1,1,0]]).T ## XOR, y0(4,1) column vector #y0 = np.array([[0,1,1,1]]).T ## OR, y0(4,1) column vector #y0 = np.array([[0,0,0,1]]).T ## AND, y0(4,1) column vector #y0 = np.array([[1,0,0,1]]).T ## IFF, y0(4,1) column vector #y0 = np.array([[1,0,1,1]]).T ## => y0(4,1) column vector np.random.seed(1) ##for repeatable experiments W1= 2*np.random.random((3,4))-1 ##W1(3,4) signed fractions w2= 2*np.random.random((4,1))-1 ##w2(4,1) signed fractions #eps = 0.000000000001 #.0001ok #W1= 1 - eps*np.random.random((3,4)) ##W1(3,4) nhd of 1 #w2= 1 - eps*np.random.random((4,1)) ##w2(4,1) nhd of 1 #W1 = np.zeros((3,4)) ## all 0 #w2 = np.zeros((4,1)) #W1 = np.ones((3,4)) ## all 1 #w2 = np.ones((4,1)) #W1[2][3]= 1. ## contaminators #w2[1]=-1. print "initial W1" print W1 print "initial w2" print w2 for jj in xrange(600): ## by 600 obvious trend, Trask uses 60,000 Y1 = 1/(1+np.exp(-(np.dot(X0,W1)))) #sigma( X0(4,3)W1(3,4) )=Y1(4,4) y2 = 1/(1+np.exp(-(np.dot(Y1,w2)))) #sigma( Y1(4,4)w2(4,1) )=y2(4,1) #raw_input() ## kludge: wait for keypress #print jj ## watch convergence #print y2 ## output #print W1 ## first weight matrix #print w2 ## second weight vector dy2= (y0-y2)*y2*(1-y2) ## dy2(4,1) arithmetic is termwise w2 += Y1.T.dot(dy2) ##backprop Y1(4,4).T dy2(4,1)= dw2(4,1) dY1= dy2.dot(w2.T)*Y1*(1-Y1) ## w2.T(1,4) W1 += X0.T.dot(dY1) #backprop X0.T(1,4) dY1(1,4) #endfor print "final W1=" print W1 print "final w2=" print w2 print "outcome Y1=" print Y1 print "outcome y2=" print y2