http://iamtrask.github.io/2015/07/12/basic-python-network/
;;vocabulary in Trask's Python code decoded somewhat
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ]) ;; X(4,3) ~ 4 rows of 3 vcrs
y = np.array([[0,1,1,0]]).T ;; y(1,4).T = y.T(4,1) ~ column 4 vecr
;; synapse ~ weights, in non-bioenvy jargon
syn0 = 2*np.random.random((3,4)) - 1 ;; syn0(3,4) of signed fractions
syn1 = 2*np.random.random((4,1)) - 1 ;; syn1(4,1) of signed fractions
for j in xrange(60000): ;; overkill, but a matter of taste
;; the "lj" is the output of layer j
l1 = 1/(1+np.exp(-(np.dot(X,syn0)))) ;; l1(4,4)=sigma(X(4,3)syn0(3,4))
l2 = 1/(1+np.exp(-(np.dot(l1,syn1))));; l2(4,1)=sigma(l1(4,4)syn1(4,1)
;;delta ~ allusion to Rumelhart's backprop quantities
;; Python numpy library syntax uses a mixture of
;; matrix and the commutative termwise (Hadamard) product (dot and *)
;; y-l2 ~ gradient of loss fcn = .5|y-l2|^2
;; note that for b=sigma(a), db/da = b*(1-b)
l2_delta = (y - l2)*(l2*(1-l2)) ;; backpropped to 2nd layer
l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1));; backpropped to 2nd layer
syn1 += l1.T.dot(l2_delta) ;; update the second weights
syn0 += X.T.dot(l1_delta) ;; update the first weights
;; To make sense of this obviously "working" code I ended up spending
;; way too much time on trying to find more memorable notation,
;; but can't say I succeeded.