cabal update
cabal install hakaru
:opt no-lint
import qualified Language.Hakaru.ImportanceSampler as IS
import Language.Hakaru.Distribution
test = IS.unconditioned (normal 0 1)
:type normal
IS.empiricalMeasure 10 test []
import Control.Lens
import Data.Default
import Graphics.Rendering.Chart
import Graphics.Rendering.Chart.Gtk
import Graphics.Rendering.Chart.Plot.Histogram
import qualified Data.Vector as V
values = V.fromList [1,1,2,3,8,8,8,8,10] :: V.Vector Double
makeHistogram bins values title = toRenderable layout
where hist = plot_hist_values .~ values
$ plot_hist_range .~ Just (V.minimum values, V.maximum values)
$ plot_hist_bins .~ bins
$ plot_hist_drop_lines .~ True
$ defaultPlotHist
layout :: Layout Double Int
layout = layout_title .~ title
$ layout_title_style . font_name .~ "MathJax_"
$ layout_plots .~ [ histToPlot hist ]
$ def
import Control.Monad
import Data.Dynamic
import qualified Data.Map.Strict as M
test = IS.unconditioned (normal (-3) 1)
test2 = IS.unconditioned (uniform 1 5)
do
t <- IS.sample test []
let s = take 1000 $ map fst t
t2 <- IS.sample test2 []
let s2 = take 1000 $ map fst t2
return (makeHistogram 30 (V.fromList (s ++ s2)) "Histogram")
import Language.Hakaru.Metropolis
:t unconditioned (normal 1 1)
mu = 5
sd = 1
test = unconditioned (normal mu sd)
do
samples <- mcmc test []
let l = V.fromList (take 1000 samples)
return $ makeHistogram 30 l ("Normal(" ++ show mu ++ ","
++ show sd ++ ")")
import Statistics.Sample
mu = 5
sd = 1
test = unconditioned (normal mu sd)
do
samples <- mcmc test []
let l = V.fromList (take 1000 samples)
print $ "expectation of normal("++ show mu ++
"," ++ show sd ++ "): " ++ show (mean l)
x = [ 1.25 , 1.19897959, 1.14795918, 1.09693878, 1.04591837,
0.99489796, 0.94387755, 0.89285714, 0.84183673, 0.79081633,
0.73979592, 0.68877551, 0.6377551 , 0.58673469, 0.53571429,
0.48469388, 0.43367347, 0.38265306, 0.33163265, 0.28061224,
0.22959184, 0.17857143, 0.12755102, 0.07653061, 0.0255102 ,
-0.0255102 , -0.07653061, -0.12755102, -0.17857143, -0.22959184,
-0.28061224, -0.33163265, -0.38265306, -0.43367347, -0.48469388,
-0.53571429, -0.58673469, -0.6377551 , -0.68877551, -0.73979592,
-0.79081633, -0.84183673, -0.89285714, -0.94387755, -0.99489796,
-1.04591837, -1.09693878, -1.14795918, -1.19897959, -1.25 ]
y = [ -4.01097324e-01, 1.37183656e+00, 6.05640862e-01,
5.13980618e-01, 1.46145122e-01, 3.03874391e-02,
-1.09760266e-01, -5.98794850e-01, -9.94191100e-02,
6.92535614e-01, -7.31383379e-01, -6.72657778e-01,
1.08296068e-01, 3.33868251e-01, -1.23912756e+00,
-4.44110290e-01, 9.99476759e-02, -4.92289120e-02,
-1.60368358e-01, 3.03248892e-05, 3.64213675e-01,
1.88036936e-01, -3.71764627e-01, 2.66679977e-01,
9.60937596e-01, 1.28421409e-01, -3.06628259e-02,
5.18123525e-01, 1.31226181e-01, 1.04903679e+00,
5.81470326e-01, 7.46482671e-01, 4.77020161e-01,
6.32933061e-01, 1.12959324e+00, -8.56678715e-01,
5.40763945e-01, 8.39047069e-01, -7.21162183e-01,
3.68034857e-01, 4.90999581e-01, 3.05868046e-01,
4.86698528e-01, 1.72560589e-01, 1.27359037e+00,
8.75089065e-01, 6.12808736e-01, -2.00583597e-01,
-7.29245769e-02, -6.38596144e-01]
import System.Environment(getArgs)
import Graphics.Rendering.Chart
import Data.Colour
import Data.Colour.Names
import Data.Default.Class
import Graphics.Rendering.Chart.Backend.Cairo
import Control.Lens
setLinesBlue :: PlotLines a b -> PlotLines a b
setLinesBlue = plot_lines_style . line_color .~ opaque blue
scatter x y title = toRenderable layout
where
plot = plot_points_style .~ filledCircles 2 (opaque purple)
$ plot_points_values .~ zip x y
$ def
layout = layout_title .~ title
$ layout_plots .~ [toPlot plot]
$ def
line x y title = toRenderable layout
where
plot = plot_lines_values .~ [zip x y]
$ setLinesBlue
$ def
layout = layout_title .~ title
$ layout_plots .~ [toPlot plot]
$ def
scatter x y "Scatter of data"
We want to do is fit a polynomial to these points. To get a sense of how well we are doing, we are going to represent our coefficients as a line. This will let us visualize how well it fits.
import System.Environment(getArgs)
import Graphics.Rendering.Chart
import Data.Colour
import Data.Colour.Names
import Data.Default.Class
import Graphics.Rendering.Chart.Backend.Cairo
import Control.Lens
enum :: (Fractional a, Ord a) => a -> a -> a -> [a]
enum a b c | (c - a) / b < 1 = []
enum a b c = a : (enum (a+b) b c)
poly :: Double -> Double
poly x = (x - 2)*x*(x + 2)
points = [ (x,(poly x)) | x <- enum (-2.5) 0.5 2.5]
chart poly points = toRenderable layout
where
x = map fst points
pmin = minimum x
pmax = maximum x
poly1 = plot_lines_values .~ [[ (x,(poly x)) | x <- enum pmin 0.05 pmax]]
$ setLinesBlue
$ plot_lines_title .~ "poly"
$ def
poly2 = plot_points_style .~ filledCircles 2 (opaque purple)
$ plot_points_values .~ points
$ plot_points_title .~ "poly points"
$ def
layout = layout_title .~ "Polynomial Plot"
$ layout_plots .~ [toPlot poly1,
toPlot poly2]
$ def
chart poly points
Let's see how a cubic polynomial fits.
import Data.List
import Language.Hakaru.Types
expectations l = map (mean . V.fromList) (transpose l)
poly1d weights x = poly weights x 1
where poly [] x acc = 0
poly (w:ws) x acc = w*acc + (poly ws x acc*x)
cubic :: [Double] -> Measure [Double]
cubic x = do
w0 <- unconditioned (normal 0 2)
w1 <- unconditioned (normal 0 2)
w2 <- unconditioned (normal 0 2)
w3 <- unconditioned (normal 0 2)
y <- mapM (conditioned . (\x -> normal (poly1d [w0, w1, w2, w3] x) 1)) x
return [w0, w1, w2, w3]
burn = 5000
do
l <- mcmc (cubic x) (map (Just . toDyn . Lebesgue) y)
let means = expectations $ take 1000 $ drop burn l
return $ chart (poly1d means) (zip x y)
Let's simplify this code. Haskell has a function replicateM for doing an action a certain number of times.
This allows us to get a list of weights and pass them around
expectations l = map (mean . V.fromList) (transpose l)
poly1d weights x = poly weights x 1
where poly [] x acc = 0
poly (w:ws) x acc = w*acc + (poly ws x acc*x)
cubic :: [Double] -> Measure [Double]
cubic x = do
w <- replicateM 4 $ unconditioned (normal 0 2)
y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
return w
burn = 5000
do
l <- mcmc (cubic x) (map (Just . toDyn . Lebesgue) y)
let means = expectations $ take 1000 $ drop burn l
return $ chart (poly1d means) (zip x y)
With that simplication, we can now also explore if perhaps a different polynomial order should be used. We can put a prior on different order polynomials and that way explore which order is the best choice
expectations l = map (mean . V.fromList) (transpose l)
poly1d weights x = poly weights x 1
where poly [] x acc = 0
poly (w:ws) x acc = w*acc + (poly ws x acc*x)
curve :: [Double] -> Measure Int
curve x = do
m <- unconditioned $ poisson 8
w <- replicateM m $ unconditioned (normal 0 2)
y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
return m
To visualize these choices we will need a visualization for discrete values.
import Data.List
frequency :: Ord a => [a] -> [(a,Int)]
frequency list = map (\l -> (head l, length l)) (group (sort list))
import Data.Colour
import Data.Colour.Names
import Data.Colour.SRGB (sRGB)
import Data.Default.Class
makeDiscrete values title = toRenderable layout
where
counts = frequency values
layout =
layout_title .~ title
$ layout_title_style . font_size .~ 10
$ layout_x_axis . laxis_generate .~ autoIndexAxis (map (show . fst) counts)
$ layout_y_axis . laxis_override .~ axisGridHide
$ layout_left_axis_visibility . axis_show_ticks .~ False
$ layout_plots .~ [ plotBars bars ]
$ layout_legend .~ Nothing
$ def :: Layout PlotIndex Int
bars = plot_bars_values .~ addIndexes (map (\ x -> [snd x]) counts)
$ plot_bars_item_styles .~ [(solidFillStyle (opaque $ sRGB 0.5 0.5 1.0), Nothing)]
$ def
burn = 1000
do
l <- mcmc (curve x) (map (Just . toDyn . Lebesgue) y)
return $ makeDiscrete (take 2000 $ drop burn l) "Weights on Polynomial Coefficient"
There are a few suggestions. Seven? that seems weird.
curve :: [Double] -> Measure [Double]
curve x = do
w <- replicateM 7 $ unconditioned (normal 0 2)
y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
return w
burn = 1000
do
l <- mcmc (curve x) (map (Just . toDyn . Lebesgue) y)
let means = expectations $ take 1000 $ drop burn l
return $ chart (poly1d means) (zip x y)
This seems like a bit of a better fit. Keep in mind, we used a poisson prior which assumes we expect a certain coefficent on average. Other choices of prior are unlikely to give similar results
x_out = [-2.47046868, -2.36790458, -2.26534048, -2.16277637, -2.06021227, -1.95764817, -1.85508407,
-1.75251996, -1.64995586, -1.54739176, -1.44482766, -1.34226355, -1.23969945, -1.13713535,
-1.03457125, -0.93200714, -0.82944304, -0.72687894, -0.62431484, -0.52175073, -0.41918663,
-0.31662253, -0.21405843, -0.11149432, -0.00893022, 0.09363388, 0.19619799, 0.29876209,
0.40132619, 0.50389029, 0.6064544 , 0.7090185 , 0.8115826 , 0.9141467 , 1.01671081,
1.11927491, 1.22183901, 1.32440311, 1.42696722, 1.52953132, -2.0, -1.55555556,
-1.11111111, -0.66666667, -0.22222222, 0.22222222, 0.66666667, 1.11111111, 1.55555556, 2.0]
y_out = [-4.08655521, -4.94908167, -5.55573685, -3.7502832 , -4.93007015, -2.34106153, -3.31114033,
-3.23165512, -3.69215366, -3.12813371, -1.81986816, -1.49942524, -1.42647913, -1.02643592,
-1.29952275, -1.07021507, -0.054402 , -0.45484295, 0.76516168, -0.17242427, 0.26827251,
0.89381081, 1.70693168, 1.239561 , 2.2079517 , 1.20606684, 2.76864282, 2.80423461,
3.84555608, 3.77723 , 3.68546601, 4.65722966, 5.33267795, 4.23107215, 4.56852846,
5.23228827, 6.19655233, 6.48768459, 7.32349587, 6.24823713, 3.98397291, -3.25830321,
0.4881191 , -1.85683895, -9.46533531, 4.74818749, -5.02765689, -3.58144202, -7.98408015, 5.94158254]
scatter x_out y_out "Line with outliers"
Let's try a simple linear model first
expectations l = map (mean . V.fromList) (transpose l)
poly1d weights x = poly weights x 1
where poly [] x acc = 0
poly (w:ws) x acc = w*acc + (poly ws x acc*x)
linear :: [Double] -> Measure [Double]
linear x = do
w <- replicateM 2 $ unconditioned (normal 0 4)
y <- mapM (conditioned . (\x -> normal (poly1d w x) 1)) x
return w
do
l <- mcmc (linear x_out) (map (Just . toDyn . Lebesgue) y_out)
let means = expectations $ take 1000 l
return $ chart (poly1d means) (zip x_out y_out)
The trouble is the outliers are messing up the estimator. Let's explicitly model them.
expectations l = map (mean . V.fromList) (transpose l)
poly1d weights x = poly weights x 1
where poly [] x acc = 0
poly (w:ws) x acc = w*acc + (poly ws x acc*x)
outlier_model :: [Double] -> (Bool, Double) -> Measure Double
outlier_model w (False,x) = conditioned $ normal (poly1d w x) 1
outlier_model w (True,_ ) = conditioned $ uniform (-10) 10
linear :: [Double] -> Measure [Double]
linear x = do
outliers <- mapM (\ x -> unconditioned (bern 0.1)) x
w <- replicateM 2 $ unconditioned (normal 0 4)
y <- mapM (outlier_model w) (zip outliers x)
return w
burn = 5000
do
l <- mcmc (linear x_out) (map (Just . toDyn . Lebesgue) y_out)
let means = expectations $ take 1000 $ drop burn l
return $ chart (poly1d means) (zip x_out y_out)
Much better! We can also ask our model what points it thinks are outliers.
import System.Environment(getArgs)
import Graphics.Rendering.Chart
import Data.Colour
import Data.Colour.Names
import Data.Default.Class
import Graphics.Rendering.Chart.Backend.Cairo
import Control.Lens
setLinesBlue :: PlotLines a b -> PlotLines a b
setLinesBlue = plot_lines_style . line_color .~ opaque blue
outchart poly points outliers = toRenderable layout
where
x = map fst points
pmin = minimum x
pmax = maximum x
(hollow, filled) = partition snd (zip points outliers)
line = plot_lines_values .~ [[ (x,(poly x)) | x <- [pmin, pmin+0.05 .. pmax]]]
$ setLinesBlue
$ plot_lines_title .~ "poly"
$ def
fpoint = plot_points_style .~ filledCircles 2 (opaque purple)
$ plot_points_values .~ map fst filled
$ plot_points_title .~ "poly points"
$ def
hpoint = plot_points_style .~ hollowCircles 3 1 (opaque purple)
$ plot_points_values .~ map fst hollow
$ plot_points_title .~ "outliers"
$ def
layout = layout_title .~ "Polynomial Plot with Outliers"
$ layout_plots .~ [toPlot line,
toPlot fpoint,
toPlot hpoint]
$ def
expectations l = map (mean . V.fromList) (transpose l)
poly1d weights x = poly weights x 1
where poly [] x acc = 0
poly (w:ws) x acc = w*acc + (poly ws x acc*x)
convert_outliers o = (map . map) (fromIntegral . fromEnum) o
outlier_model :: [Double] -> (Bool, Double) -> Measure Double
outlier_model w (False,x) = conditioned $ normal (poly1d w x) 1
outlier_model w (True, x) = conditioned $ uniform (-10) 10
linear :: [Double] -> Measure ([Bool],[Double])
linear x = do
outliers <- mapM (\ x -> unconditioned (bern 0.1)) x
w <- replicateM 2 $ unconditioned (normal 0 4)
y <- mapM (outlier_model w) (zip outliers x)
return (outliers,w)
burn = 5000
do
l <- mcmc (linear x_out) (map (Just . toDyn . Lebesgue) y_out)
let outliers = expectations $ convert_outliers $ take 5000 (map fst l)
let means = expectations $ take 5000 (map snd l)
return $ outchart (poly1d means) (zip x_out y_out) (map (>= 0.6) outliers)
:ext DeriveDataTypeable
import Data.Typeable
import Data.Dynamic
import Control.Monad
data Rest = French | Italian | Tapas deriving (Eq, Ord, Typeable, Show)
pickRestaurant = do
p <- unconditioned $ uniform 0 1
fussy <- unconditioned $ bern p
whereWeAte <- conditioned $ if fussy then categorical [(Italian, 0.5),
(Tapas, 0.5)]
else categorical [(French, 1),
(Italian,1),
(Tapas,1)]
return p
do
samples <- mcmc pickRestaurant [Just (toDyn (Discrete French))]
return $ take 10 samples
As we see below, since we are eating at a French restaurant it is unlikely we are eating with our fussy friend.
do
samples <- mcmc pickRestaurant [Just (toDyn (Discrete French))]
return $ makeHistogram 30 (V.fromList (take 10000 samples)) "Fussiness"
In this final example, we show how we can model agents playing iterated prisoner's dilemma
type Outcome = Measure (Bool, Bool)
type Trust = Double
type Strategy = Trust -> Bool -> Bool -> Measure Bool
tit :: Trust -> Bool -> Bool -> Measure Bool
tit me True _ = conditioned $ bern 0.9
tit me False _ = conditioned $ bern me
play :: Strategy -> Strategy ->
(Bool, Bool) -> (Trust, Trust) -> Outcome
play strat_a strat_b (last_a,last_b) (a,b) = do
a_action <- strat_a a last_b last_a
b_action <- strat_b b last_a last_b
return (a_action, b_action)
iterated_game :: Measure (Double, Double)
iterated_game = do
let a_initial = False
let b_initial = False
a <- unconditioned $ uniform 0 1
b <- unconditioned $ uniform 0 1
rounds <- replicateM 10 $ return (a, b)
foldM_ (play tit tit) (a_initial,b_initial) rounds
return (a, b)
games = [Just (toDyn (Discrete False)), Just (toDyn (Discrete False)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete True)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete False)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete True)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete True)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete False)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete True)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete True)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete True)),
Just (toDyn (Discrete False)), Just (toDyn (Discrete False))]
do
l <- mcmc iterated_game games
return $ [makeHistogram 30 (V.fromList $ map fst (take 5000 l)) "A's paranoia",
makeHistogram 30 (V.fromList $ map snd (take 5000 l)) "B's paranoia"]
allCooperate :: Trust -> Bool -> Bool -> Measure Bool
allCooperate _ _ _ = conditioned $ bern 0.1
allDefect :: Trust -> Bool -> Bool -> Measure Bool
allDefect _ _ _ = conditioned $ bern 0.9
grimTrigger :: Trust -> Bool -> Bool -> Measure Bool
grimTrigger me True False = conditioned $ bern 0.9
grimTrigger me False False = conditioned $ bern 0.1
grimTrigger me _ True = conditioned $ bern 0.9
data SChoice = TitForTat | GrimTrigger | AllDefect | AllCooperate
deriving (Eq, Ord, Enum, Typeable, Show)
chooseStrategy :: SChoice -> Strategy
chooseStrategy TitForTat = tit
chooseStrategy AllDefect = allDefect
chooseStrategy AllCooperate = allCooperate
chooseStrategy GrimTrigger = grimTrigger
strat :: Measure SChoice
strat = unconditioned $ categorical [(AllCooperate, 0.25),
(AllDefect, 0.25),
(GrimTrigger, 0.25),
(TitForTat, 0.25)]
iterated_game2 :: Measure (SChoice, SChoice)
iterated_game2 = do
let a_initial = False
let b_initial = False
a <- unconditioned $ uniform 0 1
b <- unconditioned $ uniform 0 1
na <- strat
let a_strat = chooseStrategy na
nb <- strat
let b_strat = chooseStrategy nb
rounds <- replicateM 10 $ return (a, b)
foldM_ (play a_strat b_strat) (a_initial,b_initial) rounds
return (na, nb)
do
l <- mcmc iterated_game2 games
return [makeDiscrete (map fst (take 1000 l)) "A strategy",
makeDiscrete (map snd (take 1000 l)) "B strategy"]