Karpathy: building micrograd

Learning more about neural networks and how PyTorch and TensorFlow actually implement autodiff. I was looking into the PyTorch and TensorFlow code and got confused about some of what I saw, so I am watching these videos in the hope that I will understand it more.

1 15

References

import math 
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline 
def f(x):
    return 3*x**2 - 4*x + 5 
print(f(3.0))
xs = np.arange(-5,5,0.25)
ys = f(xs)
plt.plot(xs,ys)
h=0.001
x=2/3
print("Derivative:",(f(x+h)-f(x))/h)
a= 2.0 
b = -3.0
c = 10.0 
d = a*b + c 
print(d)
h =  0.001 
# inputs 
a= 2.0 
b = -3.0
c = 10.0
d1 = a*b +c 
a += h 
d2 = a*b + c 
print('d1',d1)
print('d2',d2)
print('slope',(d2-d1)/h)
out[2]

20.0
Derivative: 0.0029999999995311555
4.0
d1 4.0
d2 3.997
slope -3.0000000000001137

Jupyter Notebook Image

<Figure size 640x480 with 1 Axes>

class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()
out[3]
print(d._prev,d._op)
from graphviz import Digraph 
def trace(root):
    # Builds a set of all nodes and edges in a graph 
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child,v))
                build(child)
    build(root)
    return nodes, edges
def draw_dot(root):
    dot = Digraph(format="svg", graph_attr={ "rankdir": "LR"}) # LR = Left to Right 
    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        # For any value in the graph, create a rectangular ("record") node for it 
        dot.node(name=uid,label="{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad ),shape="record")
        if n._op:
            # If this value is a result of some operation, create an op node for it 
            dot.node(name=uid+n._op,label=n._op)
            # and connect this node to it 
            dot.edge(uid+n._op,uid)
    for n1, n2 in edges:
        # connect n1 to the op node of n2
        dot.edge(str(id(n1)),str(id(n2)) + n2._op)
    return dot 
out[4]

{Value(data=10.0), Value(data=-6.0)} +

draw_dot(L)
out[5]

<graphviz.graphs.Digraph at 0x1fdea8cdb50>

# inputs x1,x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')
# weights w1,w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')
# bias of the neuron
b = Value(6.8813735870195432, label='b')
# x1*w1 + x2*w2 + b
x1w1 = x1*w1; x1w1.label = 'x1*w1'
x2w2 = x2*w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'
o = n.tanh(); o.label = 'o'
o.backward()
out[6]
draw_dot(o)
out[7]

<graphviz.graphs.Digraph at 0x1fdeac63210>

# inputs x1,x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')
# weights w1,w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')
# bias of the neuron
b = Value(6.8813735870195432, label='b')
# x1*w1 + x2*w2 + b
x1w1 = x1*w1; x1w1.label = 'x1*w1'
x2w2 = x2*w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'
# ----
e = (2*n).exp()
o = (e - 1) / (e + 1)
# ----
o.label = 'o'
o.backward()
draw_dot(o)
out[8]

<graphviz.graphs.Digraph at 0x1fdeac0e890>

import torch
out[9]
x1 = torch.Tensor([2.0]).double()                ; x1.requires_grad = True
x2 = torch.Tensor([0.0]).double()                ; x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()               ; w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()                ; w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double()  ; b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print('---')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())
out[10]

0.7071066904050358
---
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737

import random
class Neuron:
  
  def __init__(self, nin):
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
    self.b = Value(random.uniform(-1,1))
  
  def __call__(self, x):
    # w * x + b
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
    out = act.tanh()
    return out
  
  def parameters(self):
    return self.w + [self.b]

class Layer:
  
  def __init__(self, nin, nout):
    self.neurons = [Neuron(nin) for _ in range(nout)]
  
  def __call__(self, x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs
  
  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:
  
  def __init__(self, nin, nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
  
  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]
out[11]
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)
out[12]

Value(data=-0.9554946043526819)

xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets
out[13]
for k in range(20):
  
  # forward pass
  ypred = [n(x) for x in xs]
  loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
  
  # backward pass
  for p in n.parameters():
    p.grad = 0.0
  loss.backward()
  
  # update
  for p in n.parameters():
    p.data += -0.1 * p.grad
  
  print(k, loss.data)
  
out[14]

0 7.67552222138637
1 7.450091029613591
2 6.783693295817814
3 4.352168860924874
4 3.6430582368697917
5 2.987307950937402
6 2.858759712706782
7 2.4382564409118244
8 2.1322519390971193
9 1.617683409345122
10 1.2976231577093384
11 2.338236162196535
12 3.284712983214373
13 0.35098980535647045
14 0.08951349034140584
15 0.07908732693630652
16 0.07082676242677266
17 0.06411750461492344
18 0.05855858026850322
19 0.05387677636298632

ypred
out[15]

[Value(data=0.8846284039663241),

Value(data=-0.9391144640696596),

Value(data=-0.8542497004540471),

Value(data=0.8750361136633112)]

out[16]

Comments

You have to be logged in to add a comment

User Comments

Insert Math Markup

ESC
About Inserting Math Content
Display Style:

Embed News Content

ESC
About Embedding News Content

Embed Youtube Video

ESC
Embedding Youtube Videos

Embed TikTok Video

ESC
Embedding TikTok Videos

Embed X Post

ESC
Embedding X Posts

Embed Instagram Post

ESC
Embedding Instagram Posts

Insert Details Element

ESC

Example Output:

Summary Title
You will be able to insert content here after confirming the title of the <details> element.

Insert Table

ESC
Customization
Align:
Preview:

Insert Horizontal Rule

#000000

Preview:


View Content At Different Sizes

ESC

Edit Style of Block Nodes

ESC

Edit the background color, default text color, margin, padding, and border of block nodes. Editable block nodes include paragraphs, headers, and lists.

#ffffff
#000000

Edit Selected Cells

Change the background color, vertical align, and borders of the cells in the current selection.

#ffffff
Vertical Align:
Border
#000000
Border Style:

Edit Table

ESC
Customization:
Align:

Upload Lexical State

ESC

Upload a .lexical file. If the file type matches the type of the current editor, then a preview will be shown below the file input.

Upload 3D Object

ESC

Upload Jupyter Notebook

ESC

Upload a Jupyter notebook and embed the resulting HTML in the text editor.

Insert Custom HTML

ESC

Edit Image Background Color

ESC
#ffffff

Insert Columns Layout

ESC
Column Type:

Select Code Language

ESC
Select Coding Language

Insert Chart

ESC

Use the search box below

Upload Previous Version of Article State

ESC