Hakaru

An embedded probabilistic programming language in Haskell


Outline

  • Show how to install hakaru
  • write a simple program
  • show Metropolis Hastings interpreter
  • write polynomial regression
  • write non-parametric polynomial regression
  • outlier detection
  • show game theory example

Install hakaru by running

cabal update
cabal install hakaru
In [1]:
:opt no-lint
import qualified Language.Hakaru.ImportanceSampler as IS
import Language.Hakaru.Distribution
test = IS.unconditioned (normal 0 1)
In [2]:
:type normal
normal :: Double -> Double -> Dist Double
In [3]:
IS.empiricalMeasure 10 test []
Mixture $ fromList [(-0.6103374904833524,1.0),(-0.5518749140969872,1.0),(-0.49081067760500324,1.0),(6.543744083886646e-2,1.0),(0.18857166199406272,1.0),(0.18908425310342866,1.0),(0.22207997028019827,1.0),(0.6583731186918189,1.0),(0.7056217133770497,1.0),(0.796452986683651,1.0)]
In [4]:
import Control.Lens
import Data.Default
import Graphics.Rendering.Chart
import Graphics.Rendering.Chart.Gtk
import Graphics.Rendering.Chart.Plot.Histogram
import qualified Data.Vector as V

values = V.fromList [1,1,2,3,8,8,8,8,10] :: V.Vector Double

makeHistogram bins values title = toRenderable layout
        where hist = plot_hist_values  .~ values
                         $ plot_hist_range .~ Just (V.minimum values, V.maximum values)
                         $ plot_hist_bins  .~ bins
                         $ plot_hist_drop_lines .~ True
                         $ defaultPlotHist
              layout :: Layout Double Int
              layout = layout_title .~ title
                         $ layout_title_style . font_name .~ "MathJax_"
                         $ layout_plots .~ [ histToPlot hist ]
                         $ def
In [5]:
import Control.Monad
import Data.Dynamic
import qualified Data.Map.Strict as M

test  = IS.unconditioned (normal (-3) 1)
test2 = IS.unconditioned (uniform 1 5)

do
   t  <- IS.sample test []
   let s  = take 1000 $ map fst t
   t2 <- IS.sample test2 []
   let s2 = take 1000 $ map fst t2
   return (makeHistogram 30 (V.fromList (s ++ s2)) "Histogram")

Using a different sampler

In [6]:
import Language.Hakaru.Metropolis
In [7]:
:t unconditioned (normal 1 1)
unconditioned (normal 1 1) :: Measure Double
In [8]:
mu = 5
sd = 1
test = unconditioned (normal mu sd)
do
   samples <- mcmc test []
   let l = V.fromList (take 1000 samples)
   return $ makeHistogram 30 l ("Normal(" ++ show mu ++ ","
                                          ++ show sd ++ ")")
In [9]:
import Statistics.Sample
mu = 5
sd = 1
test = unconditioned (normal mu sd)
do
   samples <- mcmc test []
   let l = V.fromList (take 1000 samples)
   print $ "expectation of normal("++ show mu ++
           "," ++ show sd ++ "): " ++ show (mean l)
"expectation of normal(5,1): 5.003596170064048"

Polynomial Regression


Let's try fitting some data

In [11]:
x = [ 1.25      ,  1.19897959,  1.14795918,  1.09693878,  1.04591837,
        0.99489796,  0.94387755,  0.89285714,  0.84183673,  0.79081633,
        0.73979592,  0.68877551,  0.6377551 ,  0.58673469,  0.53571429,
        0.48469388,  0.43367347,  0.38265306,  0.33163265,  0.28061224,
        0.22959184,  0.17857143,  0.12755102,  0.07653061,  0.0255102 ,
       -0.0255102 , -0.07653061, -0.12755102, -0.17857143, -0.22959184,
       -0.28061224, -0.33163265, -0.38265306, -0.43367347, -0.48469388,
       -0.53571429, -0.58673469, -0.6377551 , -0.68877551, -0.73979592,
       -0.79081633, -0.84183673, -0.89285714, -0.94387755, -0.99489796,
       -1.04591837, -1.09693878, -1.14795918, -1.19897959, -1.25      ]
y = [ -4.01097324e-01,   1.37183656e+00,   6.05640862e-01,
         5.13980618e-01,   1.46145122e-01,   3.03874391e-02,
        -1.09760266e-01,  -5.98794850e-01,  -9.94191100e-02,
         6.92535614e-01,  -7.31383379e-01,  -6.72657778e-01,
         1.08296068e-01,   3.33868251e-01,  -1.23912756e+00,
        -4.44110290e-01,   9.99476759e-02,  -4.92289120e-02,
        -1.60368358e-01,   3.03248892e-05,   3.64213675e-01,
         1.88036936e-01,  -3.71764627e-01,   2.66679977e-01,
         9.60937596e-01,   1.28421409e-01,  -3.06628259e-02,
         5.18123525e-01,   1.31226181e-01,   1.04903679e+00,
         5.81470326e-01,   7.46482671e-01,   4.77020161e-01,
         6.32933061e-01,   1.12959324e+00,  -8.56678715e-01,
         5.40763945e-01,   8.39047069e-01,  -7.21162183e-01,
         3.68034857e-01,   4.90999581e-01,   3.05868046e-01,
         4.86698528e-01,   1.72560589e-01,   1.27359037e+00,
         8.75089065e-01,   6.12808736e-01,  -2.00583597e-01,
        -7.29245769e-02,  -6.38596144e-01]
In [12]:
import System.Environment(getArgs)
import Graphics.Rendering.Chart
import Data.Colour
import Data.Colour.Names
import Data.Default.Class
import Graphics.Rendering.Chart.Backend.Cairo
import Control.Lens

setLinesBlue :: PlotLines a b -> PlotLines a b
setLinesBlue = plot_lines_style  . line_color .~ opaque blue

scatter x y title = toRenderable layout
  where
    plot = plot_points_style .~ filledCircles 2 (opaque purple)
              $ plot_points_values .~ zip x y
              $ def

    layout = layout_title .~ title
           $ layout_plots .~ [toPlot plot]
           $ def

line x y title = toRenderable layout
  where
    plot = plot_lines_values .~ [zip x y]
              $ setLinesBlue
              $ def

    layout = layout_title .~ title
           $ layout_plots .~ [toPlot plot]
           $ def
           
scatter x y "Scatter of data"

We want to do is fit a polynomial to these points. To get a sense of how well we are doing, we are going to represent our coefficients as a line. This will let us visualize how well it fits.

In [13]:
import System.Environment(getArgs)
import Graphics.Rendering.Chart
import Data.Colour
import Data.Colour.Names
import Data.Default.Class
import Graphics.Rendering.Chart.Backend.Cairo
import Control.Lens

enum :: (Fractional a, Ord a) => a -> a -> a -> [a]
enum a b c | (c - a) / b < 1 = []
enum a b c = a : (enum (a+b) b c)

poly :: Double -> Double
poly x = (x - 2)*x*(x + 2)

points = [ (x,(poly x)) | x <- enum (-2.5) 0.5 2.5]

chart poly points = toRenderable layout
  where
    x = map fst points
    pmin = minimum x
    pmax = maximum x
    poly1 = plot_lines_values .~ [[ (x,(poly x)) | x <- enum pmin 0.05 pmax]]
              $ setLinesBlue
              $ plot_lines_title .~ "poly"
              $ def

    poly2 = plot_points_style .~ filledCircles 2 (opaque purple)
              $ plot_points_values .~ points
              $ plot_points_title .~ "poly points"
              $ def

    layout = layout_title .~ "Polynomial Plot"
           $ layout_plots .~ [toPlot poly1,
                              toPlot poly2]
           $ def
           
chart poly points

Let's see how a cubic polynomial fits.

In [14]:
import Data.List
import Language.Hakaru.Types

expectations l = map (mean . V.fromList) (transpose l)

poly1d weights x = poly weights x 1
   where poly [] x acc = 0
         poly (w:ws) x acc = w*acc + (poly ws x acc*x)

cubic :: [Double] -> Measure [Double]
cubic x = do
    w0 <- unconditioned (normal 0 2)
    w1 <- unconditioned (normal 0 2)
    w2 <- unconditioned (normal 0 2)
    w3 <- unconditioned (normal 0 2)
    y <- mapM (conditioned . (\x -> normal (poly1d [w0, w1, w2, w3] x) 1)) x
    return [w0, w1, w2, w3]

burn = 5000

do
   l <- mcmc (cubic x) (map (Just . toDyn . Lebesgue) y)
   let means = expectations $ take 1000 $ drop burn l
   return $ chart (poly1d means) (zip x y)

Let's simplify this code. Haskell has a function replicateM for doing an action a certain number of times.

This allows us to get a list of weights and pass them around

In [15]:
expectations l = map (mean . V.fromList) (transpose l)

poly1d weights x = poly weights x 1
   where poly [] x acc = 0
         poly (w:ws) x acc = w*acc + (poly ws x acc*x)

cubic :: [Double] -> Measure [Double]
cubic x = do
    w <- replicateM 4 $ unconditioned (normal 0 2)
    y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
    return w

burn = 5000

do
   l <- mcmc (cubic x) (map (Just . toDyn . Lebesgue) y)
   let means = expectations $ take 1000 $ drop burn l
   return $ chart (poly1d means) (zip x y)

With that simplication, we can now also explore if perhaps a different polynomial order should be used. We can put a prior on different order polynomials and that way explore which order is the best choice

In [16]:
expectations l = map (mean . V.fromList) (transpose l)

poly1d weights x = poly weights x 1
   where poly [] x acc = 0
         poly (w:ws) x acc = w*acc + (poly ws x acc*x)

curve :: [Double] -> Measure Int
curve x = do
    m <- unconditioned $ poisson 8
    w <- replicateM m $ unconditioned (normal 0 2)
    y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
    return m

To visualize these choices we will need a visualization for discrete values.

In [17]:
import Data.List

frequency :: Ord a => [a] -> [(a,Int)] 
frequency list = map (\l -> (head l, length l)) (group (sort list))
In [18]:
import Data.Colour
import Data.Colour.Names
import Data.Colour.SRGB (sRGB)
import Data.Default.Class

makeDiscrete values title = toRenderable layout
 where
  counts = frequency values 
  layout = 
        layout_title .~ title
      $ layout_title_style . font_size .~ 10
      $ layout_x_axis . laxis_generate .~ autoIndexAxis (map (show . fst) counts)
      $ layout_y_axis . laxis_override .~ axisGridHide
      $ layout_left_axis_visibility . axis_show_ticks .~ False
      $ layout_plots .~ [ plotBars bars ]
      $ layout_legend .~ Nothing
      $ def :: Layout PlotIndex Int

  bars = plot_bars_values .~ addIndexes (map (\ x -> [snd x]) counts)
      $ plot_bars_item_styles .~ [(solidFillStyle (opaque $ sRGB 0.5 0.5 1.0), Nothing)]
      $ def
In [19]:
burn = 1000
do
   l <- mcmc (curve x) (map (Just . toDyn . Lebesgue) y)
   return $ makeDiscrete (take 2000 $ drop burn l) "Weights on Polynomial Coefficient"

There are a few suggestions. Seven? that seems weird.

In [20]:
curve :: [Double] -> Measure [Double]
curve x = do
    w <- replicateM 7 $ unconditioned (normal 0 2)
    y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
    return w

burn = 1000

do
   l <- mcmc (curve x) (map (Just . toDyn . Lebesgue) y)
   let means = expectations $ take 1000 $ drop burn l
   return $ chart (poly1d means) (zip x y)

This seems like a bit of a better fit. Keep in mind, we used a poisson prior which assumes we expect a certain coefficent on average. Other choices of prior are unlikely to give similar results

Outlier Detection


Data is rarely as clean as the previous example

In [21]:
x_out = [-2.47046868, -2.36790458, -2.26534048, -2.16277637, -2.06021227, -1.95764817, -1.85508407,
     -1.75251996, -1.64995586, -1.54739176, -1.44482766, -1.34226355, -1.23969945, -1.13713535,
     -1.03457125, -0.93200714, -0.82944304, -0.72687894, -0.62431484, -0.52175073, -0.41918663,
     -0.31662253, -0.21405843, -0.11149432, -0.00893022, 0.09363388,  0.19619799,  0.29876209,
      0.40132619,  0.50389029, 0.6064544 ,  0.7090185 ,  0.8115826 ,  0.9141467 ,  1.01671081,
      1.11927491,  1.22183901,  1.32440311,  1.42696722,  1.52953132, -2.0, -1.55555556,
     -1.11111111, -0.66666667, -0.22222222, 0.22222222,  0.66666667,  1.11111111,  1.55555556,  2.0]
y_out = [-4.08655521, -4.94908167, -5.55573685, -3.7502832 , -4.93007015, -2.34106153, -3.31114033,
     -3.23165512, -3.69215366, -3.12813371, -1.81986816, -1.49942524, -1.42647913, -1.02643592,
     -1.29952275, -1.07021507, -0.054402  , -0.45484295,  0.76516168, -0.17242427, 0.26827251,
      0.89381081,  1.70693168,  1.239561  ,  2.2079517 , 1.20606684,  2.76864282,  2.80423461,
      3.84555608,  3.77723   , 3.68546601,  4.65722966,  5.33267795,  4.23107215,  4.56852846,
      5.23228827,  6.19655233,  6.48768459,  7.32349587,  6.24823713, 3.98397291, -3.25830321,
      0.4881191 , -1.85683895, -9.46533531, 4.74818749, -5.02765689, -3.58144202, -7.98408015,  5.94158254]

scatter x_out y_out "Line with outliers"

Let's try a simple linear model first

In [22]:
expectations l = map (mean . V.fromList) (transpose l)

poly1d weights x = poly weights x 1
   where poly [] x acc = 0
         poly (w:ws) x acc = w*acc + (poly ws x acc*x)

linear :: [Double] -> Measure [Double]
linear x = do
    w <- replicateM 2 $ unconditioned (normal 0 4)
    y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
    return w

do
   l <- mcmc (linear x_out) (map (Just . toDyn . Lebesgue) y_out)
   let means = expectations $ take 1000 l
   return $ chart (poly1d means) (zip x_out y_out)

The trouble is the outliers are messing up the estimator. Let's explicitly model them.

In [23]:
expectations l = map (mean . V.fromList) (transpose l)

poly1d weights x = poly weights x 1
   where poly [] x acc = 0
         poly (w:ws) x acc = w*acc + (poly ws x acc*x)

outlier_model :: [Double] -> (Bool, Double) -> Measure Double
outlier_model w (False,x) = conditioned $ normal (poly1d w x) 1
outlier_model w (True,_ ) = conditioned $ uniform (-10) 10

linear :: [Double] -> Measure [Double]
linear x = do
    outliers <- mapM (\ x -> unconditioned (bern 0.1)) x
    w <- replicateM 2 $ unconditioned (normal 0 4)
    y <- mapM (outlier_model w) (zip outliers x)
    return w

burn = 5000

do
   l <- mcmc (linear x_out) (map (Just . toDyn . Lebesgue) y_out)
   let means = expectations $ take 1000 $ drop burn l
   return $ chart (poly1d means) (zip x_out y_out)