# init repo notebook
!git clone https://github.com/rramosp/ppdl.git > /dev/null 2> /dev/null
!mv -n ppdl/content/init.py ppdl/content/local . 2> /dev/null
!pip install -r ppdl/content/requirements.txt > /dev/null

Symbolic computing for ML#

import sys
import sympy as sy
import numpy as np
import pandas as pd

Using sympy computer algebra system (CAS)#

x,y = sy.symbols("x y")
z = x**2 + x*sy.cos(y)
z
\[\displaystyle x^{2} + x \cos{\left(y \right)}\]

we can evaluate the expresion by providing concrete values for the symbolic variables

z.subs({x: 2, y: sy.pi/4})
\[\displaystyle \sqrt{2} + 4\]

and obtain numerical approximations of these values

sy.N(z.subs({x: 2, y: sy.pi/4}))
\[\displaystyle 5.41421356237309\]

a derivative can be seen as a function that inputs and expression and outputs another expression

observe how we compute \(\frac{\partial z}{\partial x}\) and \(\frac{\partial z}{\partial y}\)

z.diff(x)
\[\displaystyle 2 x + \cos{\left(y \right)}\]
z.diff(y)
\[\displaystyle - x \sin{\left(y \right)}\]
r = z.diff(x).subs({x: 2, y: sy.pi/4})
r, sy.N(r)
(sqrt(2)/2 + 4, 4.70710678118655)
r = z.diff(y).subs({x: 2, y: sy.pi/4})
r, sy.N(r)
(-sqrt(2), -1.41421356237310)

EXERCISE: draw the computational graph of \(x^2+x\cos(x)\) and show how to differentiate mechanically using the graphs.

More things you can do with sympy (and almost any CAS)

sy.expand((x+2)**2)
\[\displaystyle x^{2} + 4 x + 4\]
sy.factor( x**2-2*x-8 )
\[\displaystyle \left(x - 4\right) \left(x + 2\right)\]
sy.solve( x**2 + 2*x - 8, x)
[-4, 2]
a = sy.symbols("alpha")
sy.solve( a*x**2 + 2*x - 8, x)
[(sqrt(8*alpha + 1) - 1)/alpha, -(sqrt(8*alpha + 1) + 1)/alpha]

differential equations, solving \(\frac{df}{dt}=f(t)+t\)

t, C1 = sy.symbols("t C1")
f = sy.symbols("f", cls=sy.Function)
dydt = f(t)+t
eq = dydt-sy.diff(f(t),t)
yt = sy.dsolve(eq, f(t))
yt
\[\displaystyle f{\left(t \right)} = C_{1} e^{t} - t - 1\]

systems of equations

sy.solve ([x**2+y, 3*y-x])
[{x: -1/3, y: -1/9}, {x: 0, y: 0}]

Sympy to Python and Numpy#

See Sympy Numeric Computation

f = (sy.sin(x) + x**2)/2
f
\[\displaystyle \frac{x^{2}}{2} + \frac{\sin{\left(x \right)}}{2}\]
f.subs({x:10})
\[\displaystyle \frac{\sin{\left(10 \right)}}{2} + 50\]
sy.N(f.subs({x:10}))
\[\displaystyle 49.7279894445553\]
f1 = sy.lambdify(x, f)
f1(10)
49.72798944455531

and a vectorized version

f2 = sy.lambdify(x, f, "numpy")
f2(10)
49.72798944455531
f2(np.array([10,2,3]))
array([49.72798944,  2.45464871,  4.57056   ])

the lambdified version is faster, and the vectorized one is even faster

%timeit sy.N(f.subs({x:10}))
94.5 µs ± 1.04 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit f1(10)
874 ns ± 4.61 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
%timeit [f1(i) for i in range(1000)]
927 µs ± 24.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit f2(np.arange(1000))
17.3 µs ± 193 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

Using sympy to obtain the gradient for Linear Regression.#

d = pd.read_csv("local/data/trilotropicos.csv")

y = d.densidad_escamas.values
X = np.r_[[[1]*len(d), d.longitud.values]].T

t0,t1 = sy.symbols("theta_0 theta_1")
t0,t1
(theta_0, theta_1)

we first obtain the cost expression for a few summation terms, so that we can print it out and understand it

expr = 0
for i in range(10):
    expr += (X[i,0]*t0+X[i,1]*t1-y[i])**2
expr = expr/len(X)
expr
\[\displaystyle 1.12328511430396 \left(0.0770387750377373 \theta_{0} + 0.165986639851686 \theta_{1} - 1\right)^{2} + 0.607240695997103 \left(0.10477892436869 \theta_{0} + 0.669458663526939 \theta_{1} - 1\right)^{2} + 0.602651428552665 \left(0.105177120160781 \theta_{0} + 0.654540412278117 \theta_{1} - 1\right)^{2} + 0.51848830745638 \left(0.113392645316274 \theta_{0} + 0.629671878754504 \theta_{1} - 1\right)^{2} + 0.517443860930039 \left(0.113507027629466 \theta_{0} + 0.433278461926943 \theta_{1} - 1\right)^{2} + 0.434566146385104 \left(0.12385867365679 \theta_{0} + 0.539317715111529 \theta_{1} - 1\right)^{2} + 0.406910763701377 \left(0.127998470883561 \theta_{0} + 0.754957174416818 \theta_{1} - 1\right)^{2} + 0.37522986777134 \left(0.133292486729438 \theta_{0} + 0.703028112939281 \theta_{1} - 1\right)^{2} + 0.3417126092161 \left(0.13967666858824 \theta_{0} + 0.666190411969756 \theta_{1} - 1\right)^{2} + 0.340928045981869 \left(0.139837292225183 \theta_{0} + 0.608064060196844 \theta_{1} - 1\right)^{2}\]

find X[0] and y[0] in the expression above, beware that you might get simplifications and reordering of the expression by sympy

y[:10]
array([ 8.81002719, 12.98047638,  9.50777126,  7.81259333,  8.81891411,
        8.07371798,  9.54390404,  7.15116822,  7.15939183,  7.50229833])

we can now simplify the expression, using sympy mechanics

expr = expr.simplify()
expr
\[\displaystyle 0.0666666666666667 \theta_{0}^{2} + 0.650426111998852 \theta_{0} \theta_{1} - 1.16480350229622 \theta_{0} + 1.6854917041242 \theta_{1}^{2} - 5.55685138209798 \theta_{1} + 5.26845684029594\]

we now build the full expression

def build_logisitic_regression_cost_expression(X,y):
    expr_cost = 0
    for i in range(len(X)):
        expr_cost += (X[i,0]*t0+X[i,1]*t1-y[i])**2/len(X)
    expr_cost = expr_cost.simplify()
    return expr_cost
y = d.densidad_escamas.values
X = np.r_[[[1]*len(d), d.longitud.values]].T

expr_cost = build_logisitic_regression_cost_expression(X,y)
expr_cost
\[\displaystyle 1.0 \theta_{0}^{2} + 9.29990347097981 \theta_{0} \theta_{1} - 18.7021160162671 \theta_{0} + 23.7522453335576 \theta_{1}^{2} - 83.9047262815276 \theta_{1} + 91.2853990775578\]
%timeit build_logisitic_regression_cost_expression(X,y)
2.27 s ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

obtain derivatives symbolically

expr_dt0 = expr_cost.diff(t0)
expr_dt1 = expr_cost.diff(t1)
expr_dt0
\[\displaystyle 2.0 \theta_{0} + 9.29990347097981 \theta_{1} - 18.7021160162671\]
expr_dt1
\[\displaystyle 9.29990347097981 \theta_{0} + 47.5044906671153 \theta_{1} - 83.9047262815276\]

and now we can minimize

s_cost = sy.lambdify([[t0,t1]], expr_cost, "numpy")

d0 = sy.lambdify([[t0,t1]], expr_dt0, "numpy")
d1 = sy.lambdify([[t0,t1]], expr_dt1, "numpy")
s_grad = lambda x: np.array([d0(x), d1(x)])

and now we can minimize

r = minimize(s_cost, [0,0], jac=s_grad, method="BFGS")
r
      fun: 2.7447662570799594
 hess_inv: array([[ 5.57425906, -1.09086733],
       [-1.09086733,  0.23434848]])
      jac: array([-1.73778293e-07, -9.26928138e-07])
  message: 'Optimization terminated successfully.'
     nfev: 10
      nit: 9
     njev: 10
   status: 0
  success: True
        x: array([12.6899981, -0.7180591])

observe that hand derived functions and the ones obtained by sympy evaluate to the same values

t0 = np.random.random()*5+10
t1 = np.random.random()*4-3
t = np.r_[t0,t1]

print ("theta:",t)
print ("cost analytic:", n_cost(t))
print ("cost symbolic:", s_cost(t))

print ("gradient analytic:", n_grad(t))
print ("gradient symbolic:", s_grad(t))
theta: [14.75607897 -2.17279455]
cost analytic: 29.327470736096537
cost symbolic: 29.327470736096018
gradient analytic: [ -9.39673767 -49.89211477]
gradient symbolic: [ -9.39673767 -49.89211477]

and we are still using black box optimization!!!!#