<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;DE8ARHo7cCp7ImA9WhVWGUg.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668</id><updated>2012-05-02T04:07:25.408-07:00</updated><category term="yesod haskell cabal" /><category term="haskell arbitrage parsec dynamic-programming floyd-warshall" /><category term="javascript" /><category term="ai" /><category term="near duplicate detection" /><category term="clojure" /><category term="dynamic" /><category term="redis" /><category term="newton" /><category term="solitaire" /><category term="generating text" /><category term="map" /><category term="paip" /><category term="websockets" /><category term="not-really-that-serious" /><category term="algorithms" /><category term="antiobjects" /><category term="yql" /><category term="gameoflife" /><category term="jvisualvm" /><category term="consistent-hashing" /><category term="java methodnamer.com" /><category term="simhash" /><category term="agents" /><category term="devdays" /><category term="stackoverflow" /><category term="csharp" /><category term="freebase" /><category term="robocode" /><category term="opengl" /><category term="criterion" /><category term="wordplay" /><category term="world cup" /><category term="haskell" /><category term="flocking" /><category term="macro" /><category term="performance" /><category term="programming clojure" /><category term="canvas" /><category term="ubuntu objectivec" /><category term="transient" /><category term="paradigms of artificial intelligence programming" /><category term="orbit simulator" /><category term="google wave" /><category term="scala" /><category term="emacs" /><category term="Accu" /><category term="neural networks" /><category term="java" /><category term="arrays" /><category term="reduce" /><category term="static" /><category term="fluid dynamics" /><category term="arc" /><category term="icfp" /><category term="fractals" /><category term="icfp2009" /><category term="hlint" /><category term="microsimulation" /><category term="stm" /><category term="music" /><category term="monte-carlo" /><category term="dung-beetle development" /><category term="6502" /><category term="quickcheck" /><category term="ascii art" /><category term="concurrency" /><category term="regex" /><category term="image magick" /><category term="tests" /><category term="appengine" /><category term="reconcile" /><category term="functional programming" /><category term="coding dojo" /><category term="history" /><category term="yesod" /><category term="klondike" /><category term="brians brain" /><category term="ray-tracing" /><category term="ubuntu" /><category term="traffic" /><category term="hoogle" /><category term="search-engine" /><category term="cards" /><category term="metadata" /><category term="emergent behaviour" /><category term="real world haskell" /><category term="json" /><title>Fatvat</title><subtitle type="html">Exploring functional programming</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://www.fatvat.co.uk/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://www.fatvat.co.uk/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>168</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/fatvat" /><feedburner:info uri="fatvat" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;DE8ARHo6eyp7ImA9WhVWGUg.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-2490779498122807948</id><published>2012-04-29T12:42:00.000-07:00</published><updated>2012-05-02T04:07:25.413-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-05-02T04:07:25.413-07:00</app:edited><title>Thoughts from the final day of ACCU</title><content type="html">The final day keynote was a series of lightning keynotes (apparently due to a missing speaker during the &lt;a href="http://en.wikipedia.org/wiki/Eyjafjallaj%C3%B6kull"&gt;Icelandic Volcano problem&lt;/a&gt; a few years before and it now being an established tradition).&lt;br /&gt;
&lt;br /&gt;
The opening key note was by a Lisper, &lt;a href="http://www.didierverna.com/sciblog/index.php?"&gt;Didier Verna&lt;/a&gt; and was about language obesity.&amp;nbsp; The central argument was a familiar one from a Lisp expert, but I still found it pretty compelling!&amp;nbsp; Natural languages have evolved from a few roots, and have evolved by adding new words, but the grammatical structure has remained mostly static (when was the last time a new grammar structure was added to your language?).&amp;nbsp; In contrast, the rationale for most new language design tends to involve taking the best bits from previous languages, and adding a sprinkle of some new grammatical elements to simplify things.&amp;nbsp; For example, both C# and Java have evolved to have for-each loops with new syntax.&amp;nbsp; C++ is perhaps the best example of language bloat - C++ 11 is probably the most complicated language I have ever seen with very unclear semantics.&lt;br /&gt;
&lt;br /&gt;
Lisp doesn't suffer from this problem (it suffers from &lt;quote&gt;syntactic anorexia&lt;/quote&gt;!), and the reason it doesn't is &lt;a href="http://en.wikipedia.org/wiki/Homoiconicity"&gt;homoiconicity&lt;/a&gt;.&amp;nbsp; This property is that code is data (and vice-versa).&amp;nbsp; For example, in Lisp &lt;code&gt;(+ 1 1)&lt;/code&gt; is both an expression that applies 1 and 1 to the + function, it's also a list of three &lt;strike&gt;expressions&lt;/strike&gt; atoms.&amp;nbsp; This property gives Lisp the ability to add new syntax and structures without the need for a new language.&amp;nbsp; &lt;a href="http://en.wikipedia.org/wiki/Common_Lisp_Object_System"&gt;CLOS&lt;/a&gt; is perhaps the best example of this, adding an object system to the base language without the need to change the language specification whatsoever.&amp;nbsp; The &lt;a href="http://www.ai.sri.com/pkarp/loop.html"&gt;loop macro&lt;/a&gt; is another &lt;strike&gt;good&lt;/strike&gt; (I'm not a fan; I don't want to learn a big &lt;acronym title="domain specific language"&gt;DSL&lt;/acronym&gt; for dealing with collections) example of this.&amp;nbsp; Someone will probably argue that &lt;a href="http://boost-spirit.com/home/"&gt;Boost Spirit&lt;/a&gt; is a great example of how you can use &lt;a href="http://en.wikipedia.org/wiki/Template_metaprogramming"&gt;template metaprogramming&lt;/a&gt; to do the same thing in C++ but that's a &lt;a href="http://en.wikipedia.org/wiki/Turing_tarpit"&gt;Turing tarpit&lt;/a&gt; style argument!&lt;br /&gt;
&lt;br /&gt;
Lisp will be around for ever in one form or another simply because of this property.&amp;nbsp; I'm not sure whether the same can be said of curly-brace languages.&amp;nbsp; What horrific new structures will be needed in a decade when we're programming on mega-core machines?&amp;nbsp; I found the argument compelling, and perhaps in light of the previous keynote on the &lt;a href="http://www.fatvat.co.uk/2012/04/accu-2012-day-three.html"&gt;Requiem to C&lt;/a&gt; there's a chance that a new homoiconic language will emerge to deal with our multi-core future.&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
Charles Bailey talked about the "Important Art of Thinking".&amp;nbsp; Despite the fact that software should always be about thinking, we sometimes find ourselves developing auto-pilot and thus end up in a mess.&amp;nbsp; It's always easier to start tapping on the keyboard rather than thinking about the problem (working in &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; inverts this for me!&lt;br /&gt;
&lt;br /&gt;
He mentioned the &lt;a href="http://en.wikipedia.org/wiki/Dreyfus_model_of_skill_acquisition"&gt;Dreyfus model of skill acquisition&lt;/a&gt; and tried to relate this to programming by peeling back the layers of abstraction on &lt;code&gt;std::endl&lt;/code&gt;.&lt;br /&gt;
&lt;ul&gt;&lt;br /&gt;&amp;nbsp;
&lt;li&gt;Novice - &lt;code&gt;std::endl&lt;/code&gt; ends a line with a newline&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Advanced Beginner - &lt;code&gt;std::endl&lt;/code&gt; is a &lt;a href="http://www.cplusplus.com/reference/iostream/manipulators/"&gt;manipulator&lt;/a&gt;&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Competent - &lt;code&gt;std::endl&lt;/code&gt; can be viewed as a function operating on the stream&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Proficient - &lt;code&gt;std::endl&lt;/code&gt; is defined as a function template&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Expert - &lt;code&gt;std::endl&lt;/code&gt; is redundant when connected to a terminal because it will (by POSIX standards) guarantee at least line-buffering.&amp;nbsp; Just use '\n' already!&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;br /&gt;
I do like the new C++ lambda sequence.&amp;nbsp; Being able to type &lt;code&gt;[](){}()&lt;/code&gt; and have it mean nothing is an achievement by anyone's standards!&amp;nbsp; In summary, think before coding...&lt;br /&gt;
&lt;br /&gt;
Next up was &lt;a href="http://emilybache.blogspot.co.uk/"&gt;Emily Bache&lt;/a&gt; talking about "Geek Feminism", more specifically programmer geek feminism.&amp;nbsp; This was defined as the ability for women to influence the programming community (on merit - no-one was asking for a free pass!).&amp;nbsp; As was evident at the conference, there aren't a lot of women in technology.&amp;nbsp; In the grand scheme of things, there aren't a lot of software engineers at all, so missing out on 50% of the population seems like a big problem!&amp;nbsp; &lt;a href="http://en.wikipedia.org/wiki/Impostor_syndrome"&gt;Imposter syndrome&lt;/a&gt; was something I am familiar with (especially in my first programming gig having come out of a research background).&amp;nbsp; It's the feeling that you think you are crap despite external evidence that you are good.&amp;nbsp; There was anecdotal evidence that women are more prone to suffer from this than men.&amp;nbsp; There was also a reference to the &lt;a href="http://en.wikipedia.org/wiki/The_Wisdom_of_Crowds"&gt;Wisdom of Crowds&lt;/a&gt; which (rather obviously) states that gathering opinions from a bigger range of backgrounds leads to better results.&amp;nbsp; Software's gender inbalance stops us taking full advantage of the wisdom of crowds.&lt;br /&gt;
&lt;br /&gt;
Finally, there was a great presentation from &lt;a href="http://www.noop.nl/"&gt;Jurgen Appelo&lt;/a&gt; on finding your motivation.&amp;nbsp; Jurgen presented CHAMPFROGS, an acronym for the various things that can motivate developers.&lt;br /&gt;
&lt;ul&gt;&lt;br /&gt;&amp;nbsp;
&lt;li&gt;&lt;b&gt;C&lt;/b&gt;uriousity&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;H&lt;/b&gt;onour&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;A&lt;/b&gt;cceptance&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;M&lt;/b&gt;astery&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;P&lt;/b&gt;ower&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;F&lt;/b&gt;reedom&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;R&lt;/b&gt;relatedness&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;O&lt;/b&gt;order&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;G&lt;/b&gt;oal&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;&lt;b&gt;S&lt;/b&gt;tatus&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;br /&gt;
The &lt;a href="http://www.noop.nl/2011/09/moving-motivators-free-exercise.html"&gt;Moving Motivators&lt;/a&gt; game sounds like something useful to try to see if your needs and desires are aligned with the work you are actually doing.&lt;br /&gt;
&lt;br /&gt;
Next I went to Usable API's in Practice by &lt;a href="http://www.giovanniasproni.com/"&gt;Giovanni Asproni&lt;/a&gt;.&amp;nbsp; This tried to fill the gap between the principles that we have (Single Responsibility, Principle of Least Astonishment, Don't Repeat Yourself) and the code that we write.&amp;nbsp; API's are the biggest asset (or liability) that a company has.&amp;nbsp; A bad API (even an internal one) limits your ability to change and can act as a productivity drain.&amp;nbsp; The following principles were presented.&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;br /&gt;&amp;nbsp;
&lt;li&gt;Code from the users perspective - One example of how to do this is not just to test your API, but also to test a program written using your API.&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Naming - keep it simple, don't be cute and use one word per concept&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Control to the caller - don't restrict options.&amp;nbsp; Previously I've worked somewhere were all the core container functions used their own locks.&amp;nbsp; This was a terrible decision because the caller didn't have control!&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Explicit Context - don't use globals to hide context from the user, pass it in at construction time&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Error Reporting - one of the harder ones.&amp;nbsp; Use layers (since an error at one layer is not necessarily an error at the next).&lt;br /&gt;&amp;nbsp; &lt;/li&gt;
&lt;li&gt;Logging - this gave some good discussion on why &lt;code&gt;Logger logger = LoggerFactory.GetLogger()&lt;/code&gt; is a bad thing!&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;br /&gt;
I chose to leave it there at ACCU.&amp;nbsp; I really enjoyed most of the talks.&amp;nbsp; The key themes this year were multi-core is coming, TDD is good and functional is fun.&amp;nbsp; I suspect these have been messages from the expert community for quite some time, but hopefully the wider software engineering community will start to realize this!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-2490779498122807948?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/Kp_M-BGVaZ1DmR2ehx_R71vapAk/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Kp_M-BGVaZ1DmR2ehx_R71vapAk/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/Kp_M-BGVaZ1DmR2ehx_R71vapAk/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Kp_M-BGVaZ1DmR2ehx_R71vapAk/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/LEL4MVbGdKY" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/2490779498122807948?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/2490779498122807948?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/LEL4MVbGdKY/thoughts-from-final-day-of-accu.html" title="Thoughts from the final day of ACCU" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2012/04/thoughts-from-final-day-of-accu.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04FQ3o-eSp7ImA9WhVWFUQ.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-7344868127229870594</id><published>2012-04-27T23:51:00.001-07:00</published><updated>2012-04-27T23:51:52.451-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-04-27T23:51:52.451-07:00</app:edited><title>ACCU 2012 - day three</title><content type="html">The opening keynote today was "Requiem for C" by (howling mad?) &lt;a href="http://www.objectmentor.com/omTeam/martin_r.html"&gt;Uncle Bob&lt;/a&gt;.  First the easy dead language beginning with C. &lt;a href="http://en.wikipedia.org/wiki/COBOL"&gt;COBOL&lt;/a&gt; is a dead language.  It may still run 40% of the code in the world, but no new projects are being written in it and no new developers are being trained in it.  This is an easy one to agree on!&lt;br /&gt;
&lt;br /&gt;
More contentious is that this is the fate that awaits &lt;a href="http://en.wikipedia.org/wiki/C_(programming_language)"&gt;C&lt;/a&gt;.  Bob was at pains to point out that he isn't gunning for C and gave us a recap of his programming career from punchcards on the &lt;a href="http://en.wikipedia.org/wiki/PDP-8"&gt;PDP-8&lt;/a&gt; through to his first usage of C in the late 1970's.  The presentation style was fun, though there was a little &lt;a href="https://gist.github.com/2508746"&gt;controversy&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
The first nail in C's coffin was &lt;a href="http://en.wikipedia.org/wiki/C%2B%2B"&gt;C++&lt;/a&gt;.  It gave us strong typing, modularity and the wonders of templates, but importantly it still gave developers the chance to see the metal.  If you worked hard enough, you could still reason about what was going on in the underlying machine.  You can still see the metal, though it's a little fuzzy.&lt;br /&gt;
&lt;br /&gt;
Next &lt;a href="http://en.wikipedia.org/wiki/Java_(programming_language)"&gt;Java&lt;/a&gt; showed us that we don't need to see the raw metal, we can have an idealised machine running on a machine.  You don't need to see the raw metal at all, have a look at the JVM instead!  Looking back, it does seem that the world had gone bonkers at this point.  Instead of running on a machine, we'll run on a software simulation of a machine, running on a machine.  Eh?&lt;br /&gt;
&lt;br /&gt;
Now the final nail (and a familiar theme for the conference) the multi core revolution.  Now we need radically new views of the hardware to take full advantage of it.  &lt;br /&gt;
&lt;br /&gt;
I agreed with most of the keynote.  C is never going to fully go away, but it's going to be needed by a smaller and smaller percentage of people.  The presentation style was fun, but a little intense when you are nursing a hangover!&lt;br /&gt;
&lt;br /&gt;
Next I went to Kevlin Henney's talk on "Functional Programming you already know".  There's been a lot of hype generally about functional programming, but we've been practicing these concepts for years anyway (I think you could replace functional programming with any of the patterns and practises that have been hyped and still be on safe ground e.g. &lt;a href="http://en.wikipedia.org/wiki/Dependency_injection"&gt;dependency injection&lt;/a&gt;).&lt;br /&gt;
&lt;br /&gt;
There were some examples of first class functions from the C language e.g. &lt;code&gt;qsort&lt;/code&gt;, function composition (&lt;a href="http://en.wikipedia.org/wiki/Pipeline_(Unix)"&gt;UNIX pipes&lt;/a&gt;) and declarative (SQL).  There were also some great sound bites too:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;code generation - do the wrong thing faster!&lt;/blockquote&gt;&lt;blockquote&gt;lock is the anti-fast pattern&lt;/blockquote&gt;&lt;br /&gt;
There was lots of historical content too.  It's always suprisinging to read notes from the 1970's and hear that it still sounds fresh and new today.  For example &lt;a href="http://www.purl.org/stefan_ram/pub/doc_kay_oop_en"&gt;Alan Kay's&lt;/a&gt; explanation of what object orientation really means.&lt;br /&gt;
&lt;br /&gt;
After this I went to a C++ talk about generic libraries by Andrew Sutton.  Firstly I found that he has the perfect job, researching how to make the perfect data structure and algorithms library.  Unfortunately he has to use C++!  The &lt;a href="http://code.google.com/p/origin/"&gt;Origin libraries&lt;/a&gt; are the playground for this experimentation.&lt;br /&gt;
&lt;br /&gt;
I learnt a lot of new things about C++ &lt;a href="http://en.wikipedia.org/wiki/Concepts_(C%2B%2B)"&gt;concepts&lt;/a&gt;.  A concept is the both the syntactic meaning of a type (e.g. the operands) and the semantic (the behaviour of those operations).  I was very interested in the axiom part of concepts which provides a series of relationships that any I implementation of the concept must obey.  For example, you can specify that the equality concept should be transitive, reflexive and symmetric.   I will be interested to see how verifiability evolves in C++ as it certainly doesn't seem like a natural extension for a language as wart-ridden as C++.&lt;br /&gt;
&lt;br /&gt;
Then as a C# / Java / Haskell programmer, it was clear that my next session should be the C++ 11 pub quiz.  This was a well run session about just how complicated C++ is.  Even with compiler writers, language specification contributors and so on, no one managed to understand all the code.  Compiler bugs were found and changes in behaviour over different versions were also observed.  C++ might still be close to the metal, but I don't think there is a person on earth with the necessary mental acuity to see through the complexity!&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-7344868127229870594?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/LzyQg59xs5AOOeIk57vckl9aYUo/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LzyQg59xs5AOOeIk57vckl9aYUo/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/LzyQg59xs5AOOeIk57vckl9aYUo/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LzyQg59xs5AOOeIk57vckl9aYUo/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/BxlYgjUHLIs" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7344868127229870594?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7344868127229870594?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/BxlYgjUHLIs/accu-2012-day-three.html" title="ACCU 2012 - day three" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2012/04/accu-2012-day-three.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0cHRX0_cCp7ImA9WhVWFUs.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-6965950993269802729</id><published>2012-04-27T15:12:00.000-07:00</published><updated>2012-04-27T15:17:14.348-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-04-27T15:17:14.348-07:00</app:edited><title>ACCU day two thoughts</title><content type="html">This is my second attempt to write these notes.  Turns out that writing in an online editor whilst connected to hotel wi-fi is a very bad idea!&lt;br /&gt;
&lt;br /&gt;
Today's keynote was given by Phil Nash, and was entitled "The Congreuent Programmer".  The message seemed to be that we live our lives according to a set of beliefs, values and principles.  If these are aligned then we are fine, otherwise we find ourselves in a state of &lt;a href="http://en.wikipedia.org/wiki/Cognitive_dissonance"&gt;cognitive dissonance&lt;/a&gt;.  One example I have for this is that I believe in TDD (at least to some degree!), but I very rarely find myself practicing it!  &lt;br /&gt;
&lt;br /&gt;
Next I went to a talk on "Go, D, C++ and the multicore revolution" by Russel Winder. The aims of this talk were to convince me that shared memory multi-theading is not appropriate for application development (I don't need much convincing on this point!) and also that the new threading related features of C++ 11 may have saved it from the dustbin.&lt;br /&gt;
&lt;br /&gt;
The multicore revolution is already here.  I'm writing this on my dual core iPad and chances are you (hi mum!) are reading it on a multicore machine of some description.  Since the multicore revolution, software has started to lag behind hardware.  As software engineers, we're the people holding things up!&lt;br /&gt;
&lt;br /&gt;
The distinction was made between &lt;a href="http://en.wikipedia.org/wiki/Concurrency_(computer_science)"&gt;concurrency&lt;/a&gt; (a design tool) and parallelism (a tool for improving performance).  Three models of controlling concurrency were then presented.  None of the are new ideas, but they are well understood tools that are only now beginning to be recognised as important.&lt;br /&gt;
&lt;br /&gt;
The &lt;a href="http://en.wikipedia.org/wiki/Actor_model"&gt;actor model&lt;/a&gt; controls concurrency by having multiple independent processes (important to note that this process need not be a separate OS process, it's just a lump of code with an independent address space) communicating via asynchronous messages.  Erlang is an example of a language that has direct support for the actor model.&lt;br /&gt;
&lt;br /&gt;
The data flow model is a spreadsheet model where the computation reacts to changes in inputs.  this has a role in big data as a counterpart to the map/reduce app.  By running a data flow net as a continuous query that updates based only on the diff, it's possible to get more immediate results.  &lt;a href="http://en.wikipedia.org/wiki/Lucid_(programming_language)"&gt;Lucid&lt;/a&gt; is an old, but interesting, example of a data flow language.&lt;br /&gt;
&lt;br /&gt;
Finally, &lt;a href="http://en.wikipedia.org/wiki/Communicating_sequential_processes"&gt;Communicating Sequential Processes&lt;/a&gt; (CSP) was presented.  This is a formal model of concurrency where processes communicate via synchronous messages.  Go reinvented CSP and called it go-routines.&lt;br /&gt;
&lt;br /&gt;
Next Russel presented a simple example that calculated Pi.  The example was kept consistent as the languages moved to higher level abstractions, from C and pthreads to Go and D with C++ and MPI in between.  The obvious point was higher level abstractions make things easier to understand.  All in all, I enjoyed this talk!&lt;br /&gt;
&lt;br /&gt;
Henrik Berglund ran a quick 45 minute session on "Real Teams".  I have to admit I initially chose this one purely because the door was open so it was a break from the unbelievably hot rooms!  Luckily for me this turned out to be an excellent choice.  The introduction talked about the &lt;a href="http://libcom.org/history/1973-skylab-4-mutiny"&gt;curious case of the astronauts that went on strike&lt;/a&gt;.  Despite being highly trained, military disciplined and very intelligent a bunch of astronauts went on strike in space simply because the working conditions were so hard.  Building great teams is hard!&lt;br /&gt;
&lt;br /&gt;
Getting the conditions right for a team is very important and Henrik outlined some of the things that he had seen with successful teams.&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;People need influence over their working conditions.  This can be achieved by using self managing teams that have clear authority.  &lt;a href="http://www.noop.nl/2011/03/delegation-poker-game-description.html"&gt;Delegation poker&lt;/a&gt; might be one tool to try out for this.&lt;br /&gt;
&lt;li&gt;Compelling direction.  A vision, a product, a sprint goal.  One of the more interesting points was that goals should be challenging and it was suggested that a 50% failure rate was optimum.  I definitely want to try this when I get back to work!&lt;br /&gt;
&lt;li&gt;Clear boundaries.  Understand who is on the team.&lt;br /&gt;
&lt;li&gt;Nobody succeeds unless everyone succeeds.  This avoids the done vs done done dilemma.  Problems should be given to teams not people.&lt;br /&gt;
&lt;li&gt;Trustworthy feedback based on results.  No proxies between users and the teams.  The role of the product manager is to be the vision guy, not to shield customers from engineers.&lt;br /&gt;
&lt;li&gt;Need to depend on each others skills.  It's OK to specialize and build up independent skills (got to be careful of the bus number!)&lt;br /&gt;
&lt;li&gt;Be able to give negative feedback if needed.  This needs trust on the team and this takes time to build up.&lt;br /&gt;
Team building requires effort.  Undertsanding the needs and motivations of your colleagues is an important part of working together&lt;br /&gt;
&lt;/ul&gt;
I enjoyed this talk a surprising amount and there's definitely some ideas I can take back to work.&lt;br /&gt;

After this talk I caught the tail-end of "When only C will do".  C is supported everywhere and has a standard &lt;a href="http://en.wikipedia.org/wiki/Application_binary_interface"&gt;application binary interface&lt;/a&gt; (ABI).  This ubiquity makes it an easy choice.  C++ doesn't have a standard API and embedded devices may choose to drop some features (e.g. runtime type information or exceptions) which makes life harder.  The conclusion seemed to be that most of the time you can use C++, but you may need to expose a C interface to be compatible with the rest of the world.&lt;br /&gt;

The afternoon finished with a talk on "Refactoring to Functional".  I don't think I was the target audience for this presentation.  The talk introduced some foundational functional concepts but through the medium of Google's Java's &lt;a href="http://code.google.com/p/guava-libraries/"&gt;Guava library&lt;/a&gt; (curiously its missing a fold/reduce operator).  This has some of the worst syntax I have ever seen.  The noise to useful ratio is stupidly high and it just made me so grateful for C#.  Yuck!&lt;br /&gt;
&lt;br /&gt;
They chose this style because it made the core logic simpler (and presumably eventually their brains learned to unsee the syntactic abominations).  They did try Scala, but found that you had to think at both the Scala and Java layers.  I think using the functional style in a language without first class functions is a mistake.  It's like trying to write C in an object-oriented style.  You can do it, but you can constantly fighting it and that's not good.  Maybe Java 8 will resolve these problems in a few decades.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-6965950993269802729?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/4BvaAmrj5KjVnStpwoH3_7lrctA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/4BvaAmrj5KjVnStpwoH3_7lrctA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/4BvaAmrj5KjVnStpwoH3_7lrctA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/4BvaAmrj5KjVnStpwoH3_7lrctA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/f5d9BWmqipY" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6965950993269802729?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6965950993269802729?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/f5d9BWmqipY/accu-day-two-thoughts.html" title="ACCU day two thoughts" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2012/04/accu-day-two-thoughts.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0cHQn49fyp7ImA9WhVWE0U.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-3018052222616856902</id><published>2012-04-25T14:23:00.000-07:00</published><updated>2012-04-25T14:23:53.067-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-04-25T14:23:53.067-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Accu" /><title>ACCU 2012 - Day One</title><content type="html">I'm lucky enough to be at &lt;a href="http://accu.org/index.php/conferences/accu_conference_2012"&gt;ACCU 2012&lt;/a&gt; this week, and I thought I'd try and write up my notes after each day so that I have more of a chance to digest the information being thrown at me!&lt;br /&gt;
&lt;br /&gt;
The opening keynote today was entitled "Project Patterns: From Adrenalin Junkies to Template Zombies" by Tim Lister of PeopleWare fame.  Tim was keen to point out that this wasn't about patterns in the strong Patterns sense, but more about some of the habits from teams he'd worked with.&lt;br /&gt;
&lt;br /&gt;
The first pattern mentioned was "The Safety Valve".  Successful teams often have a release mechanism, be it making popcorn, riding a pink tricycle (!) or playing foosball.  This pattern rang true.  In good projects I've been involved in I can always identify a safety valve.  The converse is also true, when working on shitty projects its mostly just been heads down, stress up, writing code until its done.&lt;br /&gt;
&lt;br /&gt;
The second pattern was mañana.  This describes the problem with long deadlines.  A lack of urgency means apathy.  If something is outside of my mañana window, then I don't really care about it.  As an engineer, my mañana window is about a sprint in length.  If its not needed for this sprint, then it's off my radar.  In contrast management types need to have a much longer mañana period.&lt;br /&gt;
&lt;br /&gt;
Next up was "Lessons unlearnt" that made the controversial point that the lessons learnt in a retrospective rarely trigger change in the organisation, they only cause change in those that encountered the problems in the first place.  This was backed up by the observation that long running software companies are no better at making software than those with little history.  &lt;br /&gt;
&lt;br /&gt;
"Project Sluts" or managers that just can't say no was another pattern that struck a chord.  I suspect everyone has experienced this one!&lt;br /&gt;
&lt;br /&gt;
Finally was the "Dead Fish Pattern".  It's a project where everyone knows it is doomed from day one, but nobody says anything.  I've direct experience of this one (it being the major reason why I left a previous job).  We all knew the project was doomed, but the politics of the situation meant this could never be voiced, and the team just hunkered down into a marine corp death march mentality (see &lt;a href="http://www.amazon.com/Death-March-Edition-Edward-Yourdon/dp/013143635X"&gt;Death March&lt;/a&gt; by Edward Yourdon, a great book!).  There was a great quote about heroics from developers to keep it running by the use of "code defibrillators".&lt;br /&gt;
&lt;br /&gt;
I really enjoyed the talk, and I think I was successfully not-so-subliminally convinced to buy the &lt;a href="http://www.amazon.co.uk/Adrenaline-Junkies-Template-Zombies-Understanding/dp/0932633676"&gt;book&lt;/a&gt;!&lt;br /&gt;
&lt;br /&gt;
Next up was the Code Simplicity talk I (I was actively trying to avoid the Goldberg contraption that it C++ 11). It was a good talk using examples from the Qt framework to demonstrate complicated code.&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;Make it simple.  Make it memorable.  Make it inviting to look at.  Make it fun to read&lt;/blockquote&gt;&lt;br /&gt;
The biggest takeaway for me was looking at code from the perspective of consumers.  There's no point designing a clever, simple API for yourself, if you hoist all the complexity on the users of the API (for example, requiring implementation of a huge interface).  I also learnt a new useless acronym TATUC (throw away that useless code).  Someone for the audience also gave a memorable composition over inheritance quote, "if you marry you can get divorced, but parents are forever".&lt;br /&gt;
&lt;br /&gt;
The speaker also tried to encourage the boy scout rule.  Since bad code is easy to spot,you should take every opportunity you can to tidy it up.  This is much too idealistic for me, tidying code is always a risk (most of the time it won't have tests) and its all too easy to make a messier working code base into a prettier, but subtly wrong code base.&lt;br /&gt;
&lt;br /&gt;
Bit of lunch and the onto Parallel Architectures.  This turned out to be an interesting review of parallel architectures from the hardware side, and then the software side.  In the beginning there was the Z80, and life was simple.  Then the was pipelining&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://upload.wikimedia.org/wikipedia/commons/thumb/6/67/5_Stage_Pipeline.svg/200px-5_Stage_Pipeline.svg.png" imageanchor="1" style=""&gt;&lt;img border="0" height="127" width="200" src="http://upload.wikimedia.org/wikipedia/commons/thumb/6/67/5_Stage_Pipeline.svg/200px-5_Stage_Pipeline.svg.png" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
Pipelining is a &lt;a href="http://en.wikipedia.org/wiki/Leaky_abstraction"&gt;leaky abstraction&lt;/a&gt;.  Instructions can be reordered by the compiler/hardware and this can break your code in subtle, non-obvious ways.  The best example of this is probably &lt;a href="http://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html"&gt;double checked locking&lt;/a&gt;.  Cache coherency is another problematic abstraction.  If you write something to cache, and this needs to be available to another core then the memory management unit (MMU) must flush the cache to make that data visible.  This causes a huge performance problem (see &lt;a href="http://simplygenius.net/Article/FalseSharing"&gt;here&lt;/a&gt; for a great example).&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;Multithreading is just one damn thing, after, before and during another (Alexandrescu)&lt;/blockquote&gt;&lt;br /&gt;
On the software side of things, there was recognition that both mutex and atomic based approaches aren't suitable long term.  Both are non-composable, and (apparently) there's evidence that mutex based approaches simply won't scale to large number of cores.  Transactional memory was mentioned, though it's slow.  Importantly though, transactional memory is composable and is simple to reason about.  The &lt;a href="http://en.wikipedia.org/wiki/Transactional_Synchronization_Extensions"&gt;Intel Haswell&lt;/a&gt; processor will provide hardware support, so it'll be interesting to see whether this provides an alternative to the actor models that seem to be in vogue at the moment. I'm looking forward to seeing how things change with concurrency over the next few years (I hope for a revolution, but I bet we just find more intricate ways of just about making things work).&lt;br /&gt;
&lt;br /&gt;
Finally, I went to the "Objections on TDD and their refutations". This covered the many excuses that developers come up with for not doing TDD, ranging from the cop-out (my manager wouldn't let me) through to the delusional (I don't make mistakes).  My feelings on this are mixed.  Code without tests is just a lump of synctactically valid code, but not much more.  Tests give substance.  I'm not so sure whether unit tests are the best way to do this.  There was a quote on Twitter today which stated that unit tests provide existential qualification whereas types provide universal qualification.  I dig that!&lt;br /&gt;
&lt;br /&gt;
TDD also seems to be used inappropriately for hard problems.  In this case, it feels like simulated annealing; it seeks out a local solution to the problem, rather than finding a global optimum.  My favourite example of this is Sudoku.  Compare &lt;a href="http://xprogramming.com/articles/oksudoku/"&gt;Ron Jefferies epic adventures&lt;/a&gt; with &lt;a href="http://norvig.com/sudoku.html"&gt;Peter Norvig's application of brain power&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Looking forward to day two.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-3018052222616856902?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/ItVXc8ZBDqBRZEFk1rNLOccTdQY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ItVXc8ZBDqBRZEFk1rNLOccTdQY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/ItVXc8ZBDqBRZEFk1rNLOccTdQY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ItVXc8ZBDqBRZEFk1rNLOccTdQY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/r6nhPXcDRgg" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/3018052222616856902?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/3018052222616856902?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/r6nhPXcDRgg/accu-2012-day-one.html" title="ACCU 2012 - Day One" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2012/04/accu-2012-day-one.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Dk8GRnY8fyp7ImA9WhdUEU4.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-5794603738003434013</id><published>2011-09-27T07:47:00.000-07:00</published><updated>2011-09-27T07:47:07.877-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-27T07:47:07.877-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="java" /><category scheme="http://www.blogger.com/atom/ns#" term="csharp" /><title>Avoid C# Enums!</title><content type="html">C# enums are closer to glorified integers than Java's.  In Java, you're able to avoid the horrible pattern where the function has to throw an exception if a new enum is added, such as that shown below.  This pattern is yucky because the compiler is unable to determine this at compile-time.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;   public enum Foo { Bar, Baz };

   // Has to throw the exception
   public void doSomething(Foo f) {
     switch(f) {
        case Bar:
           System.out.println("!! Bar");
           break;
        case Baz:
           System.out.println("Baz !!");
           break;
        default:
           throw new IllegalArgumentException("Unsupported enum type!");
      }
   }
&lt;/pre&gt;&lt;br /&gt;
In Java, you can give the enum behaviour.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;   public enum Foo { 
     Bar("!! Bar"), 
     Baz("Baz !!);

     private string stringRepresentation;

     private Foo(string s) { stringRepresentation = s; }

     public string getString() { return stringRepresentation; }
   }

   public void doSomething(Foo f) {
      return f.getString();
   }
&lt;/pre&gt;&lt;br /&gt;
This is cool because the only way you can add a new enum value and get it to compile is to give it a valid string representation.&lt;br /&gt;
&lt;br /&gt;
In C#, enum's can't have behaviour so I think they should be avoided like the plague.  Changes are if you are switching on an enum value then the behaviour you are executing belongs on the class anyway!&lt;br /&gt;
&lt;br /&gt;
You can simulate enums with a class, a private constructor and some static instances of the class representating the enum values.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  public sealed class Foo 
  {
    public static readonly Bar = new Foo("!! Bar");
    public static reaodnly Baz = new Foo("Baz !!");

    private string m_StringRepresentation;

    private Foo(string stringRepresentation) {
        m_StringRepresentation = stringRepresentation;
    }
  
    public string StringRepresentation { get { return m_StringRepresentation; } } 
  }
&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-5794603738003434013?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/G-tri3q3wHFapmpgQxbCtJOZxwE/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/G-tri3q3wHFapmpgQxbCtJOZxwE/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/G-tri3q3wHFapmpgQxbCtJOZxwE/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/G-tri3q3wHFapmpgQxbCtJOZxwE/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/He61Z9XLJGQ" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/5794603738003434013?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/5794603738003434013?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/He61Z9XLJGQ/avoid-c-enums.html" title="Avoid C# Enums!" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2011/09/avoid-c-enums.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUcHRHc4fSp7ImA9WhZUEkw.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-4758038435787299011</id><published>2011-06-04T12:50:00.000-07:00</published><updated>2011-06-04T12:50:35.935-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-04T12:50:35.935-07:00</app:edited><title>Deploying a Yesod application on Linode</title><content type="html">A &lt;acronym title="Virtual Private Server"&gt;VPS&lt;/acronym&gt; is a virtual machine that can be used for software hosting, it's typically a lot cheaper than a dedicated server and it's a good way to spend a few dollars and get a machine on the internet!&lt;br /&gt;
&lt;br /&gt;
I use &lt;a href="http://www.linode.com/?r=165c6997ab81ff930a81275d5f99ebf4c6a7aaa1"&gt;Linode&lt;/a&gt; for my VPS and it's cheap, simple and I've never had any problems with it.  The goal of this is for me to write down how I deployed an application written using &lt;a href="http://www.yesodweb.com/"&gt;Yesod&lt;/a&gt; to a &lt;a href="http://www.linode.com/?r=165c6997ab81ff930a81275d5f99ebf4c6a7aaa1"&gt;Linode&lt;/a&gt; instance.  I found it wasn't as straight-forward as I'd hoped so I've tried to document all the steps I went through.&lt;br /&gt;
&lt;br /&gt;
First off, create yourself a 32bit Ubuntu 10.04 image (with less than 4GB of RAM, 64 bit just wastes space).  Once you've got this, ssh onto the machine and download the latest and greatest &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; and &lt;a href="http://hackage.haskell.org/platform/"&gt;Haskell platform&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
To install &lt;a href="http://www.haskell.org/ghc/"&gt;GHC&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  wget http://www.haskell.org/ghc/dist/7.0.3/ghc-7.0.3-i386-unknown-linux.tar.bz2
  tar xvf ghc-7.0.3-i386-unknown-linux.tar.bz2
  cd ghc-7.0.3
  sudo apt-get install gcc libgmp3-dev
  ./configure
  make install
&lt;/pre&gt;&lt;br /&gt;
After this has completed, fire up &lt;code&gt;ghci&lt;/code&gt; and verify you can evaluate Haskell commands (&lt;code&gt;print "Hello World"&lt;/code&gt; ought to confirm that everything is working, missing &lt;code&gt;libgmp3-dev&lt;/code&gt; causes ghc to install but not run).  Next the Haskell platform.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  wget http://lambda.galois.com/hp-tmp/2011.2.0.1/haskell-platform-2011.2.0.1.tar.gz
  tar xvf haskell-platform-2011.2.0.1.tar.gz
  sudo apt-get install zlib1g-dev libglut3-dev
  make
  make install
&lt;/pre&gt;&lt;br /&gt;
I had a somewhat bizarre issue where the first round of make failed to build OpenGL.  Running &lt;code&gt;make&lt;/code&gt; again made the troubles go away.  No idea why, but it doesn't seem to have caused any problems.  At this stage you should have a completely working Haskell environment, with lots of lovely packages installed from Hackage.&lt;br /&gt;
&lt;br /&gt;
Next step is to actually build and run the application.  I've got my code up on a &lt;a href="http://mercurial.selenic.com/"&gt;Mercurial&lt;/a&gt; instance, so I simply clone the repository onto the build server, and then I can just use &lt;a href="http://www.haskell.org/cabal/"&gt;cabal&lt;/a&gt; to build locally and end up with an executable.  This &lt;a href="http://www.reddit.com/r/haskell/comments/hqucl/how_to_host_haskell_web_services/c1xlpeb"&gt;reddit comment&lt;/a&gt; suggests that building and linking with GHC takes huge amounts of memory.  I'm on the cheapest plan at &lt;a href="http://www.linode.com/?r=165c6997ab81ff930a81275d5f99ebf4c6a7aaa1"&gt;Linode&lt;/a&gt; which only have 512MB of RAM and I had no problems, but then again I'm only deploying &lt;a href="http://search.boringtosser.co.uk/"&gt;toy applications&lt;/a&gt; at the moment.  As said on Reddit, if you do run into problems build locally with the same architecture and upload to the server.&lt;br /&gt;
&lt;br /&gt;
The &lt;a href="http://www.yesodweb.com/book/deploying"&gt;recommended way&lt;/a&gt; of deploiying a Yesod application is to use &lt;a href="http://en.wikipedia.org/wiki/Nginx"&gt;nginx&lt;/a&gt; as a &lt;a href="http://en.wikipedia.org/wiki/Reverse_proxy"&gt;reverse proxy&lt;/a&gt;, together with &lt;a href="https://github.com/snoyberg/warp"&gt;Warp&lt;/a&gt;.  Getting nginx up and running on Linode is documented &lt;a href="http://library.linode.com/web-servers/nginx/installation/ubuntu-10.04-lucid""&gt;here&lt;/a&gt;.  Nginx is the 10.04 repositories is only up to version 0.76, so I built the latest release from &lt;a href="http://www.nginx.org/en/download.html"&gt;source&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
The below is simple &lt;code&gt;nginx.conf&lt;/code&gt; file adapted from the &lt;a href="http://www.yesodweb.com/book/deploying"&gt;Yesod web site&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  daemon off; # don't run nginx in the background, good for monitoring app
  events {
      worker_connections 4096;
  }
  http {

      # If multiple domains are hosted on the same box, this can be used to
      # block everything that isn't directed at a specific host
      server {
          listen 80 default_server;
          server_name _;
          return 444;
      }

      server {
           # Important, serve files with the appropriate mime-type
           # Without this some browsers don't interpret JS, CSS correctly
           include /opt/nginx/conf/mime.types;

           listen 80; 
           server_name search.boringtosser.co.uk;
           location / {
               proxy_pass http://127.0.0.1:3000;
           }

           # Used for serving static content
           location /static {
               root /home/jeff/code/keywordSearch;
               expires max;
           }
       }               
}
&lt;/pre&gt;&lt;br /&gt;
nginx comes with default start scripts in &lt;/code&gt;/etc/init.d&lt;/code&gt;.  &lt;code&gt;/etc/init.d/nginx start&lt;/code&gt; starts the server running with the default configuration.  Swapping start for stop obviously stops it!  To get your &lt;a href="http://www.yesodweb.com"&gt;Yesod&lt;/a&gt; application built use &lt;a href="http://www.haskell.org/cabal/"&gt;cabal&lt;/a&gt;, making sure to build the production version with &lt;code&gt;cabal -fproduction configure&lt;/code&gt;.  As described in the &lt;a href="http://www.yesodweb.com/show/map/1/93"&gt;Yesod book&lt;/a&gt; you can use an &lt;a href="http://upstart.ubuntu.com/"&gt;Upstart&lt;/a&gt; script to keep things up and running.  And that should be all there is to it!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-4758038435787299011?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/pCBJ4CncnafH8tlTgRsLUC5CjsI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/pCBJ4CncnafH8tlTgRsLUC5CjsI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/pCBJ4CncnafH8tlTgRsLUC5CjsI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/pCBJ4CncnafH8tlTgRsLUC5CjsI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/Q4zpSFz_Imk" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/4758038435787299011?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/4758038435787299011?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/Q4zpSFz_Imk/deploying-yesod-application-on-linode.html" title="Deploying a Yesod application on Linode" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2011/06/deploying-yesod-application-on-linode.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08GRXo4cCp7ImA9WhZUEEk.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-7782733742983572618</id><published>2011-06-02T14:23:00.000-07:00</published><updated>2011-06-02T14:23:44.438-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-02T14:23:44.438-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="redis" /><category scheme="http://www.blogger.com/atom/ns#" term="search-engine" /><title>Writing a simple keyword search engine using Haskell and Redis</title><content type="html">The job of a search engine is simple, given some search terms find some relevant documents containing those terms.  I thought it'd be fun to write a very simple one and see what I could learn along the way.  &lt;a href="http://search.boringtosser.co.uk" title="Errr, yeah, I'm wondering why I picked this domain name as well"&gt;search.boringtosser.co.uk&lt;/a&gt; contains the end result.&lt;br /&gt;
&lt;br /&gt;
The most fundamental data structure in a search engine is the &lt;a href="http://en.wikipedia.org/wiki/Inverted_index"&gt;inverted index&lt;/a&gt; that maintains a mapping from content (such as words or numbers) to the documents that contain those terms.  For example:&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  Doc 1 = quick brown fox
  Doc 2 = brown owl
  Doc 3 = fox tango

  quick =&gt; { 1 }
  brown =&gt; { 1, 2 }
  fox   =&gt; { 1, 3 }
  owl   =&gt; { 2 }
  tango =&gt; { 3 }
&lt;/pre&gt;&lt;br /&gt;
An inverted index makes it a simple problem to go from a search term to the documents containing that term.   There are (at least!) a couple of problems with just building a reverse index blindly from the data.  How do you make sure that variations of the same word go to the same document?  For example, if I search for "bananas" I'd like to get documents containing the term "banana" returned as well.  Secondly, how do you deal with words like "the" that are going to occur in almost every single document?  &lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://en.wikipedia.org/wiki/Stemming"&gt;Stemming&lt;/a&gt; is a process to reduce words to their base form.  For example, "eating", "eaten" and "eat" all result in the same stemmed word "eat".  Most search engines deal with common words by filtering them out.  Search engines refer to these words as &lt;a href="http://en.wikipedia.org/wiki/Stop_words"&gt;stop words&lt;/a&gt; and it's as simple as filtering out a known list of common words.&lt;br /&gt;
&lt;br /&gt;
Users communicate with a search engine using a &lt;a href="http://en.wikipedia.org/wiki/Query_language"&gt;query language&lt;/a&gt;.  For  this toy example, I'll use a simple query language that supports keyword matching and the Boolean operators, AND and OR.&lt;br /&gt;
&lt;br /&gt;
In order to show a search engine we'll also need a data set.  I've used the &lt;a href="http://fortunes.cat-v.org/"&gt;Jar of Fortune&lt;/a&gt; data as this gives lot sof short, simple documents.  &lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-LiIKVMWc-VQ/TeXjFPgdG0I/AAAAAAAAAGY/48e0oaHNLe4/s1600/fortune.jpg" imageanchor="1" style=""&gt;&lt;img border="0" height="300" width="400" src="http://1.bp.blogspot.com/-LiIKVMWc-VQ/TeXjFPgdG0I/AAAAAAAAAGY/48e0oaHNLe4/s400/fortune.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
(image generated using &lt;a href="http://jim.ignitiondomain.com/fc/"&gt;Jim's fortune cookie image generator&lt;/a&gt; and text from &lt;a href="http://xkcd.com/425/"&gt;XKCD Fortune Cookies&lt;/a&gt;)&lt;br /&gt;
&lt;br /&gt;
&lt;h3&gt;Implementation&lt;/h3&gt;&lt;br /&gt;
Where should the inverted index be stored?  You could simply store this data in a hash table, but then you have to worry about persisting the data, scaling to large amounts of data and so on.  A &lt;a href="http://en.wikipedia.org/wiki/NoSQL#Key-value_store"&gt;Key/Value store&lt;/a&gt; provides a very efficient way of storing this map based data.  My current favourite store is &lt;a href="http://www.redis.io/"&gt;Redis&lt;/a&gt; as it's well documented, stable, ultra-fast and has a very simple API to use from a variety of languages, including &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
To make things simpler, I'll also store the documents themselves in Redis using the standard &lt;a href="http://redis.io/commands#string"&gt;string functions&lt;/a&gt;.  This is OK for this example, as I'm just storing small strings.  Indexing larger files would probably want to use some disk-based structure.  I'll use Redis's &lt;a href="http://redis.io/topics/data-types#sorted-sets"&gt;sorted set&lt;/a&gt; feature to store the word to document mapping.  This allows you to store a set of strings, where each entry in the set has an associated score representing how many times that word has occurred.  The higher the score, the more relevant the search result.&lt;br /&gt;
&lt;br /&gt;
The first task is to turn a lump of text into a series of terms to index into Redis.  Stop word removal is a bit of a no-brainer, just grab a &lt;a href="http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a11-smart-stop-list/english.stop"&gt;giant list of stop words&lt;/a&gt; and remove them.  &lt;code&gt;isStopWord :: T.Text -&gt; Bool&lt;/code&gt;.  The complete code is dull, unexciting and &lt;a href="https://github.com/fffej/Keyword-Search/blob/master/StopWords.hs"&gt;here&lt;/a&gt;!  Stemming is more interesting.  Developed in 1980, the &lt;a href="http://tartarus.org/~martin/PorterStemmer/def.txt"&gt;Porter stemming algorithm&lt;/a&gt; consists of 5 or so simple rules to stem a word.  Better still, there is already an &lt;a href="http://tartarus.org/~martin/PorterStemmer/haskell.txt"&gt;existing Haskell implementation&lt;/a&gt;.  I used that implementation and adapted it ever so slightly to use &lt;a href="http://hackage.haskell.org/packages/archive/text/latest/doc/html/Data-Text.html"&gt;Data.Text&lt;/a&gt;.  The full version of the code for stemming &lt;a href="https://github.com/fffej/Keyword-Search/blob/master/Porter.hs"&gt;here&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
The &lt;a href="http://fortunes.cat-v.org/"&gt;Jar of Fortunes&lt;/a&gt; data is in two different formats (one '%' separated, the other blank line separated).   A quick script parses those from one giant text string into individual elements, and adds a reference to the document for each term.  For example, if the quote is "quick fox" and the quote is #123 in file foo, then three entries would be created &lt;code&gt;(quick -&gt; (foo#123,1), fox -&gt; (foo#123,1))&lt;/code&gt;.  The code for indexing cookies is below:&lt;br /&gt;
&lt;br /&gt;
&lt;script src="https://gist.github.com/1004028.js"&gt; &lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
Next up is a way of performing the search, but before we can proceed with that we need to define what a search actually is.  Most search engines support a simple query language, providing simple features like boolean expressions and exact phrase matching.  For my little toy engine, I'll just stick with supporting exact match for single words and the boolean operations 'AND and 'OR'.  Implicit OR's are placed between adjacent terms.  For example, searching for &lt;code&gt;resplendent fish&lt;/code&gt; is treated as &lt;code&gt;resplendent OR fish&lt;/code&gt;.  &lt;br /&gt;
&lt;br /&gt;
Once the query is passed we need to evaluate the AST and get the results.  The leaf case is simple, given a "Contains" query the only task is to create the search term from the text and look up the values in the inverted index.  The Boolean operators are more interesting.  As is obvious, "and" and "or" correspond to set intersection and set union.  One way to implement this would be to look up the values from the left hand side and the right hand side and calculate the intersection, but this is a very choice because it involves grabbing all documents from Redis to the server.  A better choice is to make use of a Redis function to do the heavy lifting.  Redis provides &lt;a href="http://redis.io/commands/zinterstore"&gt;ZINTERSTORE&lt;/a&gt; and &lt;a href="http://redis.io/commands/zunionstore"&gt;ZUNIONSTORE&lt;/a&gt; for just this purpose.  Given two (or more) existing keys, they calculate the intersection and union of the keys and store the result in a new key.  Storing intermediate results in new keys runs the risk that the memory usage will grow unbounded, but it does allow query fragment caching (as long as the intermediate key represents the query).  Although this doesn't make much difference for such a small data set, it's likely that on a large data set caching frequently used query strings will give some benefit.  Redis provides &lt;acronym title="Time to life"&gt;ttl&lt;/acronym&gt; commands such as &lt;a href="http://redis.io/commands/ttl"&gt;&lt;code&gt;ttl&lt;/code&gt;&lt;/a&gt;, &lt;a href="http://redis.io/commands/expire"&gt;&lt;code&gt;expire&lt;/code&gt;&lt;/a&gt; and &lt;a href="http://redis.io/commands/persist"&gt;&lt;code&gt;persist&lt;/code&gt;&lt;/a&gt; to help address the memory consumption issues.  I've made sure that intermediate keys have a short ttl and that should (hopefully) mean that the memory shouldn't grow infinitely.&lt;br /&gt;
&lt;br /&gt;
I used &lt;a href="http://hackage.haskell.org/package/parsec"&gt;Parsec&lt;/a&gt; to turn a query string into an &lt;a href="http://en.wikipedia.org/wiki/Abstract_syntax_tree"&gt;abstract syntax tree&lt;/a&gt; (mostly based on the &lt;a href="http://www.haskell.org/haskellwiki/Parsing_expressions_and_statements"&gt;example code&lt;/a&gt; on the Haskell wiki).  The &lt;code&gt;query&lt;/code&gt; function processes a search request and returns a key containing the results that can be retrieved using &lt;code&gt;getKeyValues&lt;/code&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="https://gist.github.com/1005209.js"&gt; &lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
After all this I have a query engine that works in &lt;a href="http://www.haskell.org/ghc/docs/7.0-latest/html/users_guide/ghci.html"&gt;ghci&lt;/a&gt;.   That's a bit boring, so time to get it deployed on a web server somewhere.  I used &lt;a href="http://www.yesodweb.com/"&gt;Yesod&lt;/a&gt; to create a website.  For a simple one page website, the only code I had to write was to initialize the &lt;a href="http://www.redis.io/"&gt;Redis&lt;/a&gt; connection and the logic to execute the search request.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="https://gist.github.com/1005229.js?file=Root.hs"&gt;&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
The &lt;code&gt;getRootR&lt;/code&gt; function just loads the templates to display the home page.  &lt;code&gt;getSearchR&lt;/code&gt; performs the search.  The biggest problem I ran into was messages such as &lt;code&gt;&amp;lt;socket: 7&amp;gt;: hFlush: resource vanished (Broken pipe)&lt;/code&gt;.  This was because I had tried to keep a connection permanently open to Redis, instead of using a pool.  After some helpful people in #haskell pointed me in the right direction, I switched over to using &lt;a href="http://hackage.haskell.org/packages/archive/resource-pool/0.1.1.0/doc/html/Data-Pool.html"&gt;Data.Pool&lt;/a&gt; and all was good.&lt;br /&gt;
&lt;br /&gt;
The deployed application is available at &lt;a href="http://search.boringtosser.co.uk" title="Errr, yeah, I'm wondering why I picked this domain name as well"&gt;http://search.boringtosser.co.uk&lt;/a&gt; and the complete set of code is available on &lt;a href="https://github.com/fffej/Keyword-Search"&gt;github&lt;/a&gt; (and hopefully it comes with a working &lt;a href="http://www.haskell.org/cabal/"&gt;cabal&lt;/a&gt; file this time!).  Any suggestions on how to improve the code or just anything I'm doing stupidly is much appreciated!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-7782733742983572618?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/YT4_MgDi829tBwhhWT69YggyuOY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/YT4_MgDi829tBwhhWT69YggyuOY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/YT4_MgDi829tBwhhWT69YggyuOY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/YT4_MgDi829tBwhhWT69YggyuOY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/0l2X1yjDkFg" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7782733742983572618?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7782733742983572618?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/0l2X1yjDkFg/writing-simple-keyword-search-engine.html" title="Writing a simple keyword search engine using Haskell and Redis" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-LiIKVMWc-VQ/TeXjFPgdG0I/AAAAAAAAAGY/48e0oaHNLe4/s72-c/fortune.jpg" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2011/06/writing-simple-keyword-search-engine.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck8AQXc4fyp7ImA9WhZWEEo.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-3367314827869676723</id><published>2011-05-10T16:28:00.000-07:00</published><updated>2011-05-10T16:40:40.937-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-10T16:40:40.937-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="java methodnamer.com" /><title>Method Names in the Java Development Kit and Spring Framework</title><content type="html">&lt;blockquote&gt;"When I use a word," Humpty Dumpty said, in a rather scornful tone, "It means just what I choose it to mean — neither more nor less."&lt;/blockquote&gt;&lt;br /&gt;
&lt;blockquote&gt;(Lewis Carroll, &lt;a target="_blank"  href="http://www.amazon.com/Through-the-Looking-Glass-ebook/dp/B002RKRJGE?ie=UTF8&amp;tag=fatvat-20&amp;link_code=btl&amp;camp=213689&amp;creative=392969"&gt;Through the Looking Glass&lt;/a&gt;&lt;img src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;l=btl&amp;camp=213689&amp;creative=392969&amp;o=1&amp;a=B002RKRJGE" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important; padding: 0px !important" /&gt;)&lt;/blockquote&gt;&lt;br /&gt;
&lt;p&gt;Names are probably the most important abstraction device when writing code.  A good name can hide a multitude of sins (who cares how vomit inducing a method implementation is, if the name is right?), but a bad one invokes feelings of rage (I've found a &lt;code&gt;fileExists&lt;/code&gt; method that returns false if the file exists...).  &lt;p&gt;Parts of Java have a very rigid naming standard (for example &lt;a href="http://en.wikipedia.org/wiki/JavaBean#JavaBean_conventions"&gt;JavaBean conventions&lt;/a&gt;). This is a good thing for some reasons (see the number of packages that use reflection to take advantage of this), but a bad thing for others when you end up making up terrible names just to satisfy some naming convention.&lt;/p&gt;&lt;br /&gt;
&lt;p&gt;In order to explore the sort of names that Java uses I wrote a quick script that trawled the &lt;a href="http://www.oracle.com/technetwork/java/javase/documentation/index-jsp-135444.html"&gt;JavaDoc&lt;/a&gt; from the standard library (JDK6.0), reflectively looked up all the methods and split them up into words (assuming &lt;a href="http://en.wikipedia.org/wiki/CamelCase"&gt;camel casing&lt;/a&gt; and ignoring anything with &lt;code&gt;$&lt;/code&gt; or &lt;code&gt;_&lt;/code&gt; in the name).  And then a bit of manual munging...&lt;br /&gt;
&lt;br /&gt;
The most popular starting word for a Java method is (unsuprisingly) get.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-mkPldPxAFWk/TcnBGrgC0AI/AAAAAAAAAF4/f7D8z0SrQrI/s1600/jdkMostPopular.png" imageanchor="1" style=""&gt;&lt;img border="0" height="362" width="400" src="http://4.bp.blogspot.com/-mkPldPxAFWk/TcnBGrgC0AI/AAAAAAAAAF4/f7D8z0SrQrI/s400/jdkMostPopular.png" title="Most Popular First Words in JDK Method Names"/&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
The longest method is (deep breath), &lt;a href="http://download.oracle.com/javase/6/docs/api/java/sql/DatabaseMetaData.html#supportsDataDefinitionAndDataManipulationTransactions()"&gt;supportsDataDefinitionAndDataManpulationTransactions&lt;/a&gt;.  The most number of words that an individual method has is eight, and there's quite a few of these as shown below.&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;a href="http://download.oracle.com/javase/6/docs/api/java/util/concurrent/ScheduledThreadPoolExecutor.html#getContinueExistingPeriodicTasksAfterShutdownPolicy()"&gt;get Continue Existing Periodic Tasks After Shutdown Policy&lt;/a&gt;&lt;br /&gt;
&lt;li&gt;&lt;a href="http://download.oracle.com/javase/6/docs/api/javax/swing/UIManager.html#getCrossPlatformLookAndFeelClassName()"&gt;get Cross Platform Look And Feel Class Name&lt;/a&gt;&lt;br /&gt;
&lt;li&gt;&lt;a href="http://download.oracle.com/javase/6/docs/api/javax/swing/JEditorPane.html#getEditorKitClassNameForContentType(java.lang.String)"&gt;get Editor Kit Class Name For Content Type&lt;/a&gt;&lt;br /&gt;
&lt;li&gt;&lt;a href="http://www.google.com/codesearch/p?hl=en#ih5hvYJNSIA/src/share/classes/java/util/jar/JarFile.java&amp;q=isKnownToNotHaveClassPathAttribute&amp;l=509"&gt;is Known To Not Have Class Path Attribute&lt;/a&gt; (not a public method!)&lt;br /&gt;
&lt;li&gt;&lt;a href="http://download.oracle.com/javase/6/docs/api/java/util/concurrent/ScheduledThreadPoolExecutor.html#setContinueExistingPeriodicTasksAfterShutdownPolicy(boolean)"&gt;set Continue Existing Periodic Tasks After Shutdown Policy&lt;/a&gt;&lt;br /&gt;
&lt;li&gt;&lt;a href="http://www.google.com/codesearch/p?hl=en#ih5hvYJNSIA/src/share/classes/java/awt/EventQueue.java&amp;q=setCurrentEventAndMostRecentTimeImpl&amp;l=1041"&gt;set Current Event And Most Recent Time Impl&lt;/a&gt; (not a public method!) &lt;br /&gt;
&lt;li&gt;&lt;a href="http://download.oracle.com/javase/6/docs/api/java/util/concurrent/ScheduledThreadPoolExecutor.html#setExecuteExistingDelayedTasksAfterShutdownPolicy(boolean)"&gt;set Execute Existing Delayed Tasks After Shutdown Policy&lt;/a&gt;&lt;br /&gt;
&lt;li&gt;&lt;a href="http://www.google.com/codesearch/p?hl=en#-WpwJU0UKqQ/src/share/classes/javax/swing/DefaultRowSorter.java&amp;q=setModelToViewFromViewToModel&amp;l=726"&gt;set Model To View From View To Model&lt;/a&gt;&lt;br /&gt;
&lt;/ul&gt;The distribution of lengths of method names shows a nice &lt;a href="http://en.wikipedia.org/wiki/Normal_distribution"&gt;Gaussian distribution&lt;/a&gt;.  &lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-n8vz4phH6ec/TcnBRzblkwI/AAAAAAAAAGA/-QcO8L9lxxo/s1600/jdkCharactersPerMethodName.png" imageanchor="1" style=""&gt;&lt;img border="0" height="271" width="400" src="http://4.bp.blogspot.com/-n8vz4phH6ec/TcnBRzblkwI/AAAAAAAAAGA/-QcO8L9lxxo/s400/jdkCharactersPerMethodName.png" title="Distribution of Length of Method Names"/&gt;&lt;/a&gt;&lt;/div&gt;&lt;a href="http://en.wikipedia.org/wiki/Spring_Framework"&gt;Spring&lt;/a&gt; started life as a simpler alternative to &lt;a href="http://en.wikipedia.org/wiki/Enterprise_JavaBean"&gt;Enterprise JavaBean&lt;/a&gt; framework and is now one of the most popular frameworks for Java.  How do the naming conventions of Spring compare to the Java world?  Does it have any &lt;a href="http://ws.apache.org/xmlrpc/apidocs/org/apache/xmlrpc/server/RequestProcessorFactoryFactory.RequestSpecificProcessorFactoryFactory.html"&gt;factory factories&lt;/a&gt;?  First off, a simple frequency analysis of the first word of methods.  As expected, lots of &lt;code&gt;get&lt;/code&gt;, &lt;code&gt;set&lt;/code&gt; and &lt;code&gt;is&lt;/code&gt;.  &lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-_rIKr4h_ZSw/TcnBlSFKTMI/AAAAAAAAAGI/5w4MpZJ1SN4/s1600/springMostPopular.png" imageanchor="1" style=""&gt;&lt;img border="0" height="263" width="400" src="http://3.bp.blogspot.com/-_rIKr4h_ZSw/TcnBlSFKTMI/AAAAAAAAAGI/5w4MpZJ1SN4/s400/springMostPopular.png" title="Most popular first words in method names for the Spring Framework"/&gt;&lt;/a&gt;&lt;/div&gt;&lt;a href="http://www.google.com/codesearch/p?hl=en#0CkyFaECr-A/trunk/botnodetoolkit/src/thirdparty/spring/org/springframework/aop/aspectj/AspectJAdviceParameterNameDiscoverer.java&amp;q=AspectJAdviceParameterNameDiscoverer&amp;l=533"&gt;maybe bind this or target or args from pointcut expression&lt;/a&gt;?  That's the method with the largest number of words in the Spring framework (or a strange Haiku?).  The largest number of words in a public method is the monster &lt;a href="http://static.springsource.org/spring/docs/2.0.x/api/org/springframework/web/portlet/handler/AbstractHandlerMapping.html#setApplyWebRequestInterceptorsToRenderPhaseOnly(boolean)"&gt;set apply web request interceptors to render phrase only&lt;/a&gt;.  The distribution of lengths of method names again shows a &lt;a href="http://en.wikipedia.org/wiki/Normal_distribution"&gt;normal distribition&lt;/a&gt;  &lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-VZ8p0UkRrxg/TcnB0PTPZqI/AAAAAAAAAGQ/2BJ1bz2sEwk/s1600/springCharactersPerMethodName.png" imageanchor="1" style=""&gt;&lt;img border="0" height="279" width="400" src="http://3.bp.blogspot.com/-VZ8p0UkRrxg/TcnB0PTPZqI/AAAAAAAAAGQ/2BJ1bz2sEwk/s400/springCharactersPerMethodName.png" /&gt;&lt;/a&gt;&lt;/div&gt;With that as inspiration I decided to finally get around to creating &lt;a href="http://www.methodnamer.com"&gt;methodnamer.com&lt;/a&gt;, the new one-stop shop for adding the method names for classes that you've named using &lt;a href="http://www.classnamer.com/"&gt;classnamer.com&lt;/a&gt;.  You can generate methods based on either Spring or the JDK.  Are there any method names in the JDK that are &lt;a href="http://en.wikipedia.org/wiki/Haiku"&gt;Haiku&lt;/a&gt;?  A Haiku should contain 17 syllables in the pattern 5-7-5.  I used the Perl module, &lt;a href="http://search.cpan.org/dist/Lingua-EN-Syllable/Syllable.pm"&gt;Lingua::EN::Syllable&lt;/a&gt; and a bit of hackery and didn't find anything that was an exact match.  The best I could come up with was:  &lt;pre style="text-align: center"&gt;create selection 
   model property change
   listener
&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-3367314827869676723?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/oNcjWUD7YeOeeZkQgYZJQFQ5M4c/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/oNcjWUD7YeOeeZkQgYZJQFQ5M4c/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/oNcjWUD7YeOeeZkQgYZJQFQ5M4c/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/oNcjWUD7YeOeeZkQgYZJQFQ5M4c/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/PNAL0xWsmoc" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/3367314827869676723?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/3367314827869676723?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/PNAL0xWsmoc/method-names-in-java-development-kit.html" title="Method Names in the Java Development Kit and Spring Framework" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-mkPldPxAFWk/TcnBGrgC0AI/AAAAAAAAAF4/f7D8z0SrQrI/s72-c/jdkMostPopular.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2011/05/method-names-in-java-development-kit.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Dk8GQ3o6eCp7ImA9WhZTEkw.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-6688651762335812666</id><published>2011-03-15T11:47:00.000-07:00</published><updated>2011-03-15T11:47:02.410-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-15T11:47:02.410-07:00</app:edited><title>TODO Write more Posts</title><content type="html">Wow, it's been three months since I did anything to my blog.  That can't be good so time for a post to remind myself to do something...&lt;br /&gt;
&lt;br /&gt;
Over the next few months, I think I'll try and post more things about algorithms in general.  I've been spending my time trying to be a bit better at the sort of algorithm problems that &lt;a href="http://www.topcoder.com/"&gt;TopCoder&lt;/a&gt; and &lt;a href="http://www.spoj.pl"&gt;Spoj&lt;/a&gt; present.  This post is designed to act as a reminder for me to actually do so!&lt;br /&gt;
&lt;br /&gt;
So first up, I'll write something about &lt;a href="http://en.wikipedia.org/wiki/Kd_tree"&gt;Kd-Tree's &lt;/a&gt;, and hopefully now I've written that I will that'll be motivation enough for something to appear over the next few weeks.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-6688651762335812666?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/wkkrPXzfe7FhvpCPtG2l7KBBM54/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/wkkrPXzfe7FhvpCPtG2l7KBBM54/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/wkkrPXzfe7FhvpCPtG2l7KBBM54/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/wkkrPXzfe7FhvpCPtG2l7KBBM54/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/RkLDWXkFBnE" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6688651762335812666?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6688651762335812666?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/RkLDWXkFBnE/todo-write-more-posts.html" title="TODO Write more Posts" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2011/03/todo-write-more-posts.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0IFRHo8fCp7ImA9Wx9REUk.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-925863858468267184</id><published>2010-12-11T06:54:00.000-08:00</published><updated>2010-12-12T02:25:15.474-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-12-12T02:25:15.474-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="algorithms" /><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="wordplay" /><title>Word Ladders</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Word_Ladder"&gt;Word Ladders&lt;/a&gt; were invented by &lt;a href="http://en.wikipedia.org/wiki/Lewis_Carroll"&gt;Lewis Carroll&lt;/a&gt; as a form of word play. The idea is that you start with a word, change one letter at a time, and end up with a new word.  All words in the chain must be valid words in the dictionary (otherwise it'd be a bit rubbish).  For example, you can turn beer into wine with the following&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;&amp;nbsp;&amp;nbsp;BEER
  BEAR
  BEAD
  BEAK
  BEAT
  BELT
  WELT
  WILT
  WILE
  WINE
&lt;/pre&gt;&lt;br /&gt;
Let's write a simple program that calculates word ladders for a simplified version of the game where each change can only change a single letter at a time (rather than allow deletes and inserts too).  Finding a word ladder can be thought about as a graph problem.  Each node is a word, and each edge represents a connection to another valid word. &amp;nbsp;The graph below is a partial representation of the graph starting with BEER. &amp;nbsp;The real graph is much more complicated!&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_XfmDdL0570Q/TQNNoNmVfcI/AAAAAAAAAFs/zW3RJAYJ82U/s1600/partialGraph.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="180" src="http://2.bp.blogspot.com/_XfmDdL0570Q/TQNNoNmVfcI/AAAAAAAAAFs/zW3RJAYJ82U/s320/partialGraph.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
First things first, how do we build the graph?  For starters we need to know if two words are neighbours or not.  The following simple functions determine the difference between two strings.  Remember that I'm only working on the simplest possible distance metric at the moment, that is allowing a single character to change.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;neighbour :: String -&gt; String -&gt; Bool
  neighbour x y = difference x y == 1
                                     
  difference :: String -&gt; String -&gt; Int                     
  difference [] [] = 0
  difference (x:xs) (y:ys) | x == y = difference xs ys
                           | otherwise = 1 + difference xs ys
  difference _ _ = error "Two strings must be the same length"
&lt;/pre&gt;&lt;br /&gt;
Next thing we need to grab is a huge list of words.  We'll store these words in a Set because we'll want to test for membership frequently (an O(lg(N)) operation.  An alternative would be to use a perfect hash (this is an option because the dictionary is fixed).  This would give O(1) lookup times, and it looks like there is (as almost always) a &lt;a href="http://hackage.haskell.org/package/PerfectHash"&gt;library on Hackage&lt;/a&gt; that does just that.  The simple distance metric chosen means we can limit the number of words based on the size of the input word.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;import qualified Data.Set as S
  type WordSet = S.Set String

  wordListPath :: String
  wordListPath = "/usr/share/dict/british-english"

  createDictionary :: Int -&gt; IO WordSet    
  createDictionary n = do
    file &lt;- readFile wordListPath 
    return $ S.fromList $ filter (\x -&gt; length x == n &amp;&amp; all isAlpha x) (map (map toLower) $ words file)
&lt;/pre&gt;&lt;br /&gt;
Once we've got a dictionary, all we need to do is build the graph.  Since &lt;a href="http://www.haskell.org"&gt;Haskell&lt;/a&gt; is lazy, we don't need to worry about the space complexity of the graph - we'll just build it lazily and only the bit that is explored will be resident in memory.&lt;br /&gt;
&lt;br /&gt;
Each node contains the word it represents and the links to the child elements.  The graph is built by starting at a root, and filling all the valid neighbours.  Each time we place a word in the graph we remove it from the dictionary, otherwise we'll get cycles in the graph.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;data Node = Node String [Node] deriving Show

  buildGraph :: WordSet -&gt; String -&gt; Node 
  buildGraph wordset top = Node top (map (buildGraph smaller) neighbours)
    where
      neighbours = S.toList (S.filter (neighbour top) smaller)
      smaller = S.delete top wordset 
&lt;/pre&gt;&lt;br /&gt;
The graph is *huge*, so we need to find some way to limit the search space.  The most obvious way is to give a restriction on the depth of the search.  A word ladder that is 10000 words rungs high is probably not much fun to complete.  We can also cut the search short if the word is too many changes away given the maximum depth (for example, if 4 characters need to change in a 5 letter word and the maximum left to search is 3 then we can prune this search branch).&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;search :: Node -&gt; Int -&gt; String -&gt; [String]
  search graph maxDepth goal = search' graph maxDepth goal []

  search' :: Node -&gt; Int -&gt; String -&gt; [String] -&gt; [String]
  search' (Node end children) maxDepth goal path 
    | end == goal    = end : path 
    | null children  = [] 
    | length path &gt;= maxDepth = [] -- too deep
    | difference end goal &gt;= maxDepth - length path = [] -- too much difference
    | otherwise = first
      where
        childRoutes = filter (not . null) $ 
                      map (\child -&gt; search' child maxDepth goal (end : path)) children
        first | null childRoutes = []
              | otherwise        = head childRoutes          
        quickest | null childRoutes = []
                 | otherwise = minimumBy (comparing length) childRoutes                         
&lt;/pre&gt;&lt;br /&gt;
The way we search the children is important.  In this case we've used &lt;code&gt;first&lt;/code&gt; gone for the first available route that satisfies the depth guarantee, but isn't guaranteed to be the shortest route.  &lt;code&gt;quickest&lt;/code&gt; on the other hand calculates all child routes and finds the minimum length part.&lt;br /&gt;
&lt;br /&gt;
Finally, we can put this all together and write a simple search search function.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;makeLadder :: Int-&gt; String -&gt; String -&gt; IO [String]
  makeLadder maxDepth start end 
    | length start /= length end = error "Only two strings of equal length are currently supported."
    | otherwise = do    
        dict &lt;- createDictionary (length start)
        if (S.member start dict &amp;&amp; S.member end dict)
          then return $ search (buildGraph dict start) maxDepth end
          else return []
&lt;/pre&gt;
The complete code for this version available on my git hub repo &lt;a href="https://github.com/fffej/fatvat-algorithms/blob/4197ce2642c19f0c667537637950ac791a4d1cb5/word-ladders/haskell/WordLadders.hs"&gt;here&lt;/a&gt;.

This version has several problems.
&lt;ol&gt;&lt;li&gt;It's too slow - searching for the minimal path can take considerable time&lt;/li&gt;
&lt;li&gt;It's not very flexible&lt;/li&gt;
&lt;/ol&gt;
(this part completely edited from the original post, thanks to insightful comment about something *very* daft I was doing) 

Let's fix that.  In order to be flexible, we need to support various distance metrics.  For example, it'd be nice to allow insertions and deletions as well as character transposition.  We just need a generic function that calculates the distance between any given strings.  In order to improve performance, instead of searching the entire dictionary to see whether any are neighbours, we can generate the neighbours and find out which ones are in the dictionary.

That means we need both a distance metric and a way of calculating the edits.

&lt;pre&gt;  data DistanceMetric = DistanceMetric (Word -&gt; Word -&gt; Int) (Word -&gt; WordSet)

  difference :: Word -&gt; Word -&gt; Int                     
  difference x y 
    | length x /= length y = 999999
    | otherwise = sum $ zipWith (\c1 c2 -&gt; if c1 == c2 then 0 else 1) x y
                
  transposeChar :: Word -&gt; [Word] 
  transposeChar [] = []
  transposeChar (x:xs) = map (:xs) (validChars \\ [x])
                
  deleteChar :: Word -&gt; [Word]                       
  deleteChar [] = []
  deleteChar (x:xs) = [xs]

  insertChar :: Word -&gt; [Word]
  insertChar [] = []
  insertChar (x:xs) = map (\y -&gt; y:x:xs) validChars

  differenceEdit :: Word -&gt; WordSet
  differenceEdit x = edit' x [transposeChar]
    
  editDistanceEdits :: Word -&gt; WordSet
  editDistanceEdits x = edit' x [insertChar,transposeChar,deleteChar]

  edit' :: Word -&gt; [Word -&gt; [Word]] -&gt; WordSet
  edit' w fns = S.fromList $ concat $ 
                zipWith (\x y -&gt; map (\z -&gt; x ++ z) (concatMap (\x -&gt; x y) fns)) 
                (inits w) (tails w)

  simple :: DistanceMetric
  simple = DistanceMetric difference differenceEdit

  edits :: DistanceMetric
  edits = DistanceMetric editDistance editDistanceEdits
&lt;/pre&gt;
This gives two distance functions and two ways of generating edits.  The Levenshtein distance  of 1 is generated by transposing, deleting and inserting characters from the original word.

This gives us the flexibility, because another distance metric could be put in place (anagrams perhaps?).  Next to performance.

&lt;pre&gt;  buildGraph :: DistanceMetric -&gt; WordSet -&gt; Word -&gt; Node 
  buildGraph d@(DistanceMetric dist edits) wordset top = Node top (map (buildGraph d smaller) neighbours)
    where
      possibleNeighbours = edits top
      neighbours = S.toList (smaller `S.intersection` possibleNeighbours)
      smaller = S.delete top wordset 
    
  search :: DistanceMetric -&gt; Node -&gt; Int -&gt; Word -&gt; [Word]
  search (DistanceMetric dist _) graph maxDepth goal = search' graph []
    where 
      search' (Node end children) path 
        | end == goal    = end : path 
        | length path &gt;= maxDepth = [] -- too deep
        | dist end goal &gt;= maxDepth - length path = [] -- too much difference
        | otherwise = first
          where
            -- Find the best node to search by comparing it against the goal
            costForNextChild :: [(Int,Node)]
            costForNextChild = zip (map (\(Node x _) -&gt; dist x goal) children) children
            bestFirst = map snd $ sortBy (comparing fst) costForNextChild
        
            -- Best first search
            childRoutes = filter (not . null) $ map (\child -&gt; search' child (end : path)) bestFirst
      
            first | null childRoutes = []
                  | otherwise        = head childRoutes                                                                                      
&lt;/pre&gt;
Two things have changed from the original code.  The first is that the graph is built by comparing the edits against the dictionary, rather than the word against the whole dictionary.  This is the main saving and makes it *hugely* faster (thanks to jkkramer for the pointer and &lt;a href="http://jkkramer.wordpress.com/2010/08/27/fun-with-clojure%C2%A0turning-cats-into-dogs-in-hanoi/"&gt;this post&lt;/a&gt;.)  The only other change is that we decide which node to search next based on how close it is to the goal (a &lt;a href="http://en.wikipedia.org/wiki/Best-first_search"&gt;best-first search&lt;/a&gt;).

With these changes it can now solve all of the problems I've tried at &lt;a href="http://www.wordchains.com"&gt;wordchains.com&lt;/a&gt;.  Neat.

The complete code is available &lt;a href="https://github.com/fffej/fatvat-algorithms/blob/e39957785ccd03dd3ef9807ec5481dcd8f0bb23f/word-ladders/haskell/WordLadders.hs"&gt;here&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-925863858468267184?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/2UOrpxIbOQxUjlM0vhTvXCCdmRk/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/2UOrpxIbOQxUjlM0vhTvXCCdmRk/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/2UOrpxIbOQxUjlM0vhTvXCCdmRk/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/2UOrpxIbOQxUjlM0vhTvXCCdmRk/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/2eRsFxNptm8" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/925863858468267184?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/925863858468267184?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/2eRsFxNptm8/word-ladders.html" title="Word Ladders" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_XfmDdL0570Q/TQNNoNmVfcI/AAAAAAAAAFs/zW3RJAYJ82U/s72-c/partialGraph.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/12/word-ladders.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C08CQH08fSp7ImA9Wx5aFUg.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-1092298877190032944</id><published>2010-11-12T00:51:00.000-08:00</published><updated>2010-11-12T00:51:01.375-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-12T00:51:01.375-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="freebase" /><category scheme="http://www.blogger.com/atom/ns#" term="reconcile" /><title>Google Refine</title><content type="html">&lt;a href="http://code.google.com/p/google-refine/"&gt;Google Refine&lt;/a&gt; is a toy for playing with data, originally developed by the team behind &lt;a href="http://www.freebase.com/"&gt;Freebase&lt;/a&gt;.  It's a tool for taking messy data sources and getting some structure in with them. &amp;nbsp;This is exactly the same sort of thing that I need for my super-secret-world-changing-awesome-side-project.  Ok, it's not that super yet, not released and will probably not change the world, but it is fun and it does require adding some structure to the messy data sets that can easily be found on the web.&lt;br /&gt;
&lt;br /&gt;
Refine can import data from a variety of sources from either the web or a data file on disk.  To have a quick play with this, I grabbed the list of UK Prime Ministers from &lt;a href="http://www.historic-uk.com/HistoryUK/England-History/PrimeMinisters.htm"&gt;here&lt;/a&gt; and spent a couple of minutes in Emacs to reformat it as a CSV file. &amp;nbsp;Once you've imported that data into Refine, you can automatically reconcile this data with structured information available online (such as Freebase).  This gives you the best matches for each name.  The screen below shows the results after reconciling with Freebase.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_XfmDdL0570Q/TNz-s_97fQI/AAAAAAAAAFo/0I3ILeUJpao/s1600/refine.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="248" src="http://2.bp.blogspot.com/_XfmDdL0570Q/TNz-s_97fQI/AAAAAAAAAFo/0I3ILeUJpao/s320/refine.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
I'm not sure if it's entirely clear from the image but about 40% of the prime ministers have automatically been correlated with the appropriate entry from Freebase! &amp;nbsp;A few more clicks and the entire data set can be reconciled against an&amp;nbsp;on-line&amp;nbsp;source and then exported in a variety of formats including HTML, CSV and JSON. &amp;nbsp;This is exactly the kind of data matching I'm after, as once you've got a Freebase ID you can look up all the extra information &lt;a href="http://www.fatvat.co.uk/2010/08/freebasing-with-haskell.html"&gt;very easily&lt;/a&gt;. &amp;nbsp;So easy, it almost feels like cheating!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-1092298877190032944?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/N4KS_nHbZe_yqZySv5L7Cnopt5A/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/N4KS_nHbZe_yqZySv5L7Cnopt5A/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/N4KS_nHbZe_yqZySv5L7Cnopt5A/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/N4KS_nHbZe_yqZySv5L7Cnopt5A/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/9PEc1WiLJu8" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/1092298877190032944?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/1092298877190032944?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/9PEc1WiLJu8/google-refine.html" title="Google Refine" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_XfmDdL0570Q/TNz-s_97fQI/AAAAAAAAAFo/0I3ILeUJpao/s72-c/refine.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/11/google-refine.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MDRXw_fyp7ImA9Wx5UGEs.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-6563671296220443790</id><published>2010-10-23T14:31:00.000-07:00</published><updated>2010-10-23T14:31:14.247-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-10-23T14:31:14.247-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="yesod haskell cabal" /><title>Duplicate symbol my_inet_ntoa when upgrading Yesod</title><content type="html">Upgraded &lt;a href="http://docs.yesodweb.com/"&gt;Yesod&lt;/a&gt; today, but found I could no longer run the application.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  GHCi runtime linker: fatal error: I found a duplicate definition for symbol
     my_inet_ntoa
  whilst processing object file
     /usr/local/lib/network-2.2.1.7/ghc-6.12.3/HSnetwork-2.2.1.7.o
  This could be caused by:
     * Loading two different object files which export the same symbol
     * Specifying the same object file twice on the GHCi command line
     * An incorrect `package.conf' entry, causing some object to be
       loaded twice.
  GHCi cannot safely continue in this situation.  Exiting now.  Sorry.
&lt;/pre&gt;&lt;br /&gt;
&lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; packages are described &lt;a href="http://www.haskell.org/ghc/docs/6.12.2/html/users_guide/packages.html"&gt;here&lt;/a&gt;.  Running &lt;code&gt;ghc-pkg-list&lt;/code&gt; showed me that &lt;code&gt;network-2.2.1.7&lt;/code&gt; was mentioned in the usr/local/lib ghc package.conf.d and also that a later version &lt;code&gt;network-2.2.1.9&lt;/code&gt; was also present.&lt;br /&gt;
&lt;br /&gt;
To solve this problem, it seems like I had to simply do &lt;code&gt;cabal upgrade http&lt;/code&gt;.  Perhaps a dependency is missing?  Not entirely sure, but thought I'd note the problem here so someone else encountering it will find it!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-6563671296220443790?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/JBQknpx40IM54SmyydPfelV7y-A/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/JBQknpx40IM54SmyydPfelV7y-A/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/JBQknpx40IM54SmyydPfelV7y-A/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/JBQknpx40IM54SmyydPfelV7y-A/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/i70W5pwHLvc" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6563671296220443790?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6563671296220443790?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/i70W5pwHLvc/duplicate-symbol-myinetntoa-when.html" title="Duplicate symbol my_inet_ntoa when upgrading Yesod" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2010/10/duplicate-symbol-myinetntoa-when.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0IERHs5cSp7ImA9Wx5XFko.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-7989475761162157771</id><published>2010-09-16T15:18:00.000-07:00</published><updated>2010-09-16T15:18:25.529-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-16T15:18:25.529-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="simhash" /><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="near duplicate detection" /><title>Let's get hashing.</title><content type="html">The usual job of a &lt;a href="http://en.wikipedia.org/wiki/Hash_function"&gt;hash function&lt;/a&gt;&amp;nbsp;is to convert a large bit of data (such as a string) into a simple integer representation. &amp;nbsp;A &lt;a href="http://en.wikipedia.org/wiki/Perfect_hash_function"&gt;perfect hash function&lt;/a&gt;&amp;nbsp;guarantees that each bit of data you provide to the hash function gives you a unique integer. &amp;nbsp;In the real world, hash functions aren't perfect, but usually the goal is to minimize the different sets of data that has to the same value.&lt;br /&gt;
&lt;br /&gt;
One application for hashes such as &lt;a href="http://en.wikipedia.org/wiki/MD5"&gt;MD5&lt;/a&gt; or &lt;a href="http://en.wikipedia.org/wiki/SHA-1"&gt;SHA1&lt;/a&gt;&amp;nbsp;is verification that a downloaded file is the same as the one of the server. &amp;nbsp;A hash signature is given on the server, and this can be verified locally after downloading. &amp;nbsp;A small change in the downloaded item results in a huge change in the computed hash function value. &amp;nbsp;For example using an MD5 hash:&lt;br /&gt;
&lt;blockquote&gt;"A Quick Brown Fox" hashes to 138a8e4a3e0fa3d62211e11a08917072&lt;/blockquote&gt;&lt;blockquote&gt;"A Quick Brown Fix" hashes to &amp;nbsp;e6907da1d6917f6ce3befcc628d332f7&lt;/blockquote&gt;This is also great for detecting duplicates - files with the same binary content can be found simply by comparing their checksums. &amp;nbsp;But what if you want to detect near duplicates? &amp;nbsp;Documents that are more or less similar, but not quite?&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://en.wikipedia.org/wiki/Locality_sensitive_hashing"&gt;Locality Sensitive Hashing&lt;/a&gt;&amp;nbsp;is a simple algorithm where similar features hash to similar values. &amp;nbsp;&lt;a href="http://www.cs.princeton.edu/~moses/papers/similar.ps"&gt;Sim-hash&lt;/a&gt;&amp;nbsp;[PostScript file] is once such&amp;nbsp;algorithm&amp;nbsp;by &lt;a href="http://www.cs.princeton.edu/~moses/"&gt;Moses Charikar&lt;/a&gt;. &amp;nbsp;I hope that the algorithm can be roughly described in the following steps for a feature vector V&lt;br /&gt;
&lt;br /&gt;
&lt;ol&gt;&lt;li&gt;Hash each element of the feature vector using a standard hash algorithm&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Create a weight vector the same length in bits as the hash value for each hash. &amp;nbsp;Values are set to 1 if the bit is set, and -1 otherwise. &amp;nbsp;Sum these to produce a single weight vector for the whole feature vector&lt;/li&gt;
&lt;li&gt;The similarity hash is created by turning this weight vector into a big binary number with &amp;gt;0 meaning a bit is set.&lt;/li&gt;
&lt;/ol&gt;&lt;br /&gt;
Converting this over to &lt;a href="http://www.Haskell.org/"&gt;Haskell&lt;/a&gt; is pretty simple (especially thanks to &lt;a href="http://blog.jebu.net/2009/08/simhashing-in-erlang-beauty-with-binary-comprehension/"&gt;this Erlang implementation&lt;/a&gt; and &lt;a href="http://d3s.mff.cuni.cz/~holub/sw/shash/"&gt;this explanation&lt;/a&gt;).&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/581575.js?file=simhash.hs"&gt;&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
I chose to use the &lt;a href="http://en.wikipedia.org/wiki/Jenkins_hash_function"&gt;Jenkins hash function&lt;/a&gt; purely because &lt;a target="_blank"  href="http://www.amazon.com/Real-World-Haskell-Bryan-OSullivan/dp/0596514980?ie=UTF8&amp;tag=fatvat-20&amp;link_code=btl&amp;camp=213689&amp;creative=392969"&gt;Real World Haskell&lt;/a&gt;&lt;img src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;l=btl&amp;camp=213689&amp;creative=392969&amp;o=1&amp;a=0596514980" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important; padding: 0px !important" /&gt; has the code for it (see the chapter on &lt;a href="http://book.realworldhaskell.org/read/advanced-library-design-building-a-bloom-filter.html"&gt;Advanced Library Design&lt;/a&gt;).&lt;br /&gt;
&lt;br /&gt;
So we can test this out simply now.  Similar feature vectors hash to similar values.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  &gt; computeHash (FV "A Quick Brown Fox")
  10515216444980845459

  &gt; computeHash (FV "A Quick Brown Fix")
  10519438844509313944

  &gt; computeHash (FV "Hopefully Different")
  9706485515645876927
&lt;/pre&gt;&lt;br /&gt;
Because of the way the hash is calculated, the ordering of the feature vector is irrelevant, so hash [1,2,3] is the same as [3,1,2] and so on.  To measure the distance between two hashes, the &lt;a href="http://en.wikipedia.org/wiki/Hamming_distance"&gt;Hamming Distance&lt;/a&gt; is used.  This is simply just a count of the number of bits that differ.  A naive implementation of this is shown below (using the &lt;a href="http://www.haskell.org/ghc/docs/6.12.2/html/libraries/base-4.2.0.1/Data-Bits.html"&gt;Data.Bits&lt;/a&gt;) package.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/581636.js?file=hammingv1.hs"&gt;&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
This code is easy to follow and looks right, but it's not particularly fast.  A much faster way to count the number of bits is shown in the &lt;a target="_blank"  href="http://www.amazon.com/Programming-Language-2nd-Brian-Kernighan/dp/0131103628?ie=UTF8&amp;tag=fatvat-20&amp;link_code=btl&amp;camp=213689&amp;creative=392969"&gt;K&amp;R C Programming Language&lt;/a&gt;&lt;img src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;l=btl&amp;camp=213689&amp;creative=392969&amp;o=1&amp;a=0131103628" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important; padding: 0px !important" /&gt;.  The implementation in the C language is discussed in more detail &lt;a href="http://graphics.stanford.edu/~seander/bithacks.html"&gt;here&lt;/a&gt; and this translates to the following with the algorithm running in time proportional to the number of bits set.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/581641.js?file=betterHammingDistance.hs"&gt;&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
We can verify that the implementation performs exactly the same functionality by challenging &lt;a href="http://hackage.haskell.org/package/QuickCheck-2.3"&gt;QuickCheck&lt;/a&gt; to prove us wrong.&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;&gt; quickCheck (\x y -&gt; hammingDistance x y == hammingDistance2 x y)
  +++ OK, passed 100 tests.
&lt;/pre&gt;&lt;br /&gt;
Note that you'll need an appropriate definition of arbitrary for Word64, such as the one &lt;a href="http://gist.github.com/raw/581648/7eefe3c2a04164bb1c0154db1d1b3ce29d797797/arbitraryWord64.hs"&gt;here&lt;/a&gt;.  Next up, is it actually any faster?  &lt;a href="http://hackage.haskell.org/package/criterion-0.5.0.4"&gt;Criterion&lt;/a&gt; tells me that it's quite a bit faster (about 50%).&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;benchmarking hamming/Naive
  collecting 100 samples, 229 iterations each, in estimated 956.2516 ms
  bootstrapping with 100000 resamples
  mean: 42.66142 us, lb 42.52916 us, ub 42.95010 us, ci 0.950

  benchmarking hamming/Ritchie
  collecting 100 samples, 388 iterations each, in estimated 955.6707 ms
  bootstrapping with 100000 resamples
  mean: 24.59006 us, lb 24.53693 us, ub 24.65282 us, ci 0.950
&lt;/pre&gt;&lt;br /&gt;
Ok, basics sorted, so what can we do with sim-hash?  One application is near duplicate detection, and to test this out I grabbed a dataset from &lt;a href="http://userweb.cs.utexas.edu/users/ml/riddle/index.html"&gt;RIDDLE&lt;/a&gt; with the &lt;a target="_blank"  href="http://www.amazon.com/s/?ie=UTF8&amp;tag=fatvat-20&amp;link_code=btl&amp;camp=213689&amp;creative=392969&amp;search-alias=aps&amp;field-keywords=zagat"&gt;Zagat&lt;/a&gt;&lt;img src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;l=btl&amp;camp=213689&amp;creative=392969&amp;o=1&amp;a=" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important; padding: 0px !important" /&gt; and &lt;a target="_blank"  href="http://www.amazon.com/s/?ie=UTF8&amp;tag=fatvat-20&amp;link_code=btl&amp;camp=213689&amp;creative=392969&amp;search-alias=aps&amp;field-keywords=fodors"&gt;Fodor&lt;/a&gt;&lt;img src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;l=btl&amp;camp=213689&amp;creative=392969&amp;o=1&amp;a=" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important; padding: 0px !important" /&gt; food guides and the associated restaurant address.  The addresses are similar, but not quite.  For example&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  "Bel-Air Hotel 701 Stone Canyon Rd. Bel Air 310-472-1211 Californian"
 
    vs.

  "Hotel Bel-Air 701 Stone Canyon Rd. Bel Air 310/472-1211 Californian\STX"
&lt;/pre&gt;&lt;br /&gt;
Pretty similar but not quite the same.  A quick (and hugely inefficient - I compare everything against everything) function to compare the two is shown below, which returns the list of all near dupes according to the hamming distance specified.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/583204.js?file=compareLists.hs"&gt;&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
A more efficient implementation of navigating the hamming space is given in the paper &lt;a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.78.7794&amp;rep=rep1&amp;type=pdf"&gt;Detecting Near-Duplicates for Web Crawling [PDF]&lt;/a&gt; by by Gurmeet Manku, Arvind Jain, and Anish Sarma.  For this little daft program, I can tolerate waiting a few seconds!&lt;br /&gt;
&lt;br /&gt;
According to the readme include with the &lt;a href="http://userweb.cs.utexas.edu/users/ml/riddle/data/restaurant.tar.gz"&gt;data set [tar.gz]&lt;/a&gt; there are 112 matches between the two sets of data.  Running with various hamming distances I get the following results:&lt;br /&gt;
&lt;pre&gt;  -- Compare the lists, deduplicate and run for various hamming distances
 &gt; map (\x -&gt; length $ Data.List.nub (map fst $ compareLists zagats fodors x)) [0..5]
 [3,9,27,59,124,197]
&lt;/pre&gt;&lt;br /&gt;
So with a Hamming distance of 0, three results are returned (i.e. there are only 3 exact duplicates between the two data sources), whereas with a Hamming distance of 5 then way too many duplicates are returned.  Eye-balling the results, a Hamming distance of 2 seems to give the best set of actual closest matches (but still with a fair few false matches).&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;  -- Good
  "Bel-Air Hotel 701 Stone Canyon Rd. Bel Air 310-472-1211 Californian",
  "Hotel Bel-Air 701 Stone Canyon Rd. Bel Air 310/472-1211 Californian\STX"

  -- Good
  "Cafe Bizou 14016 Ventura Blvd. Sherman Oaks 818-788-3536 French Bistro",
  "Cafe Bizou 14016 Ventura Blvd. Sherman Oaks 818/788-3536 French\STX"

  -- False match
  "Cassell's 3266 W. Sixth St. LA 213-480-8668 Hamburgers",
  "Jack Sprat's Grill 10668 W. Pico Blvd. Los Angeles 310/837-6662 Health Food\STX"

   -- Good
  "Granita 23725 W. Malibu Rd. Malibu 310-456-0488 Californian",
  "Granita 23725 W. Malibu Rd. Malibu 310/456-0488 Californian\STX"
&lt;/pre&gt;&lt;br /&gt;
This is almost certainly because the choice of the ASCII values of each item in the string is a poor choice of feature vector!  Given that we know the data set is a list of addresses, we could take advantage and cheat a little bit and base the feature vector solely on the numbers.  Using just the numbers gives 95 perfect matches with a Hamming distance of zero.  A hamming distance of 1 covers all the matches (and some false matches too - since it's order invariant and there aren't a whole host of numbers).  Sim-hash is better suited for large documents with decent feature vectors!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-7989475761162157771?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/zKZt8ic4FXVM5JpDNmGGlQv3BuA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/zKZt8ic4FXVM5JpDNmGGlQv3BuA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/zKZt8ic4FXVM5JpDNmGGlQv3BuA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/zKZt8ic4FXVM5JpDNmGGlQv3BuA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/77_UW1Qcfd8" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7989475761162157771?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7989475761162157771?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/77_UW1Qcfd8/lets-get-hashing.html" title="Let's get hashing." /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2010/09/lets-get-hashing.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUQAQ388cCp7ImA9Wx5QGUU.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-706371031528043965</id><published>2010-09-08T15:02:00.000-07:00</published><updated>2010-09-08T15:02:22.178-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-08T15:02:22.178-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="6502" /><title>The Beginnings of a 6502 Emulator in Haskell</title><content type="html">It's been ages since I've written some assembly code, but I'm also enjoying learning &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt;.  Therefore, my next random coding exercise is to code a simple CPU emulator.&lt;br /&gt;
&lt;br /&gt;
The &lt;a href="http://en.wikipedia.org/wiki/MOS_Technology_6502"&gt;6502 Processor&lt;/a&gt; is an 8-bit processor introduced in 1975 and it's still used in embedded systems.  It was a hugely popular chip used by such classic machines as the &lt;a href="http://en.wikipedia.org/wiki/Atari_2600"&gt;Atari 2600&lt;/a&gt;,&amp;nbsp;the original &lt;a href="http://en.wikipedia.org/wiki/Nintendo_Entertainment_System"&gt;Nintendo Entertainment System&lt;/a&gt; and the &lt;a href="http://en.wikipedia.org/wiki/BBC_Micro"&gt;BBC Micro&lt;/a&gt;.&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_XfmDdL0570Q/TIfwRi8TrhI/AAAAAAAAAFY/hbzTEoMY2Kw/s1600/L_NTE-NTE6502.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_XfmDdL0570Q/TIfwRi8TrhI/AAAAAAAAAFY/hbzTEoMY2Kw/s320/L_NTE-NTE6502.jpg" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;The 6502 Micro Processor!&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;
&lt;br /&gt;
A &lt;acronym title="Central Processing Unit"&gt;CPU&lt;/acronym&gt; is defined by its instruction set architecture (ISA).  According to &lt;a href="http://www.amazon.com/Computer-Architecture-Quantitative-Approach-4th/dp/0123704901?ie=UTF8&amp;amp;tag=fatvat-20&amp;amp;link_code=btl&amp;amp;camp=213689&amp;amp;creative=392969" target="_blank"&gt;Computer Architecture - A Quantitative Approach&lt;/a&gt;&lt;img alt="" border="0" height="1" src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;amp;l=btl&amp;amp;camp=213689&amp;amp;creative=392969&amp;amp;o=1&amp;amp;a=0123704901" style="border: none !important; margin: 0px !important; padding: 0px !important;" width="1" /&gt; (seems to be the authoritative book on Computer Architecture) an ISA is defined by:&lt;br /&gt;
&lt;br /&gt;
&lt;ol&gt;&lt;li&gt;Class &amp;nbsp;- Most processors today are general-purpose register architectures where operands are either registers or memory locations. &amp;nbsp;General purpose &lt;a href="http://en.wikipedia.org/wiki/GPGPU"&gt;GPUs&lt;/a&gt;&amp;nbsp;are a different class of ISA&lt;/li&gt;
&lt;li&gt;Memory Addressing - Almost every processor uses byte addressing to access memory. &amp;nbsp;Alignment can be an issue for performance and some processors such as the &lt;a href="http://en.wikipedia.org/wiki/Itanium"&gt;Itanium&lt;/a&gt; have more &lt;a href="http://msdn.microsoft.com/en-us/library/aa290049(VS.71).aspx"&gt;specific alignment requirements&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Addressing Modes - An ISA can specify various ways of addressing memory. &amp;nbsp;Examples include register, constant and displacement. &amp;nbsp;The 6502 processor supports a dozen or so different addressing modes. &amp;nbsp;A modern day Intel 64-bit processor supports even more including another level of indirection known as segment addressing.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Types and Sizes of operands - As an 8bit processor the 6502 just supports 8 bit operands. &amp;nbsp;A more sophisticated architecture like the 80x86 supports various sizes of integers and floating point&lt;/li&gt;
&lt;li&gt;Operations - There used to be a divide amongst &lt;acronym title="Reduced Instruction Set Computing"&gt;RISC&lt;/acronym&gt; / &lt;acronym title="Complex Instruction Set Computing"&gt;CISC&lt;/acronym&gt;.  Now it seems most modern processors are a combination of the two, though I guess GPUs are the emergence of simple instructions again?&lt;/li&gt;
&lt;li&gt;Control Flow Instructions - The various choices for branching. &amp;nbsp;The 6502 supports a simple selection of branching instructions that move the program counter based on some arithmetic test&lt;/li&gt;
&lt;li&gt;Encoding refers to how the assembler instructions are encoded into byte code. &amp;nbsp;There are two choices here, fixed length encoding which is easier to read but may take up more space and variable length encoding which is slower to deal with.&lt;/li&gt;
&lt;/ol&gt;&lt;div&gt;Thankfully the 6502 is one of the simplest processors available. &amp;nbsp;After reading this great&amp;nbsp;&lt;a href="http://www.obelisk.demon.co.uk/6502/registers.html"&gt;description of the registers&lt;/a&gt;, I modelled the CPU with the following simple data structure.&lt;/div&gt;&lt;br /&gt;
&lt;script src="http://gist.github.com/569173.js?file=simpleCPU.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
The RAM (a maximum addressing space of 64Kb) is a mutable &lt;a href="http://hackage.haskell.org/packages/archive/vector/0.6.0.2/doc/html/Data-Vector.html"&gt;Data.Vector&lt;/a&gt;.  The CPU consists of a number of registers. &amp;nbsp;The &lt;a href="http://en.wikipedia.org/wiki/Program_counter"&gt;program counter&lt;/a&gt;&amp;nbsp;indicates where the next instruction to execute is going to come from. &amp;nbsp;Jump instructions move the program counter to a new location.  The xr and yr registers are commonly used to hold offsets or counters for memory addressing.  The accumulator (ac) register is used by most arithmetic and logical operations.  The status register contains various flags represented by &lt;code&gt;Flag&lt;/code&gt;.  These flags can be set as instructions are executed and are hopefully self-explanatory.  Finally the stack pointer contains a pointer to the next free location on the stack.  The stack is held within a 256 byte stack between 0x0100 and 0x01FF.&lt;br /&gt;
&lt;br /&gt;
In order to access the memory we need to understand how the various addressing modes work, again  the &lt;a href="http://www.obelisk.demon.co.uk/6502/addressing.html"&gt;6502 site&lt;/a&gt; provides a very clear description.&lt;br /&gt;
&lt;script src="http://gist.github.com/570781.js?file=addressModes.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;code&gt;Accumulator&lt;/code&gt; indicates that the instruction works directly on the accumulator.  &lt;code&gt;Immediate&lt;/code&gt; is an 8 bit constant value within the constructor.  In assembler this is usually indicated with #VAL.  A &lt;code&gt;ZeroPage&lt;/code&gt; address is a byte offset relative to 0, so this only allows indexing into the first 256 bytes of memory.  &lt;code&gt;ZeroPageX&lt;/code&gt; and &lt;code&gt;ZeroPageY&lt;/code&gt; addressing modes use a zero page address together with the offset specified in the xr and yr registers.  &lt;code&gt;Relative&lt;/code&gt; addressing is only used by the branch instructions and gives a signed 8 bit number to indicate where the program counter should jump to.  &lt;code&gt;Absolute&lt;/code&gt;, &lt;code&gt;AbsoluteX&lt;/code&gt; and &lt;code&gt;AbsoluteY&lt;/code&gt; are like ZeroPage addressing but allow access to the full memory range because it supports a full 16 bit address range.  Finally there are 3 indirect addressing modes.  &lt;code&gt;Indirect&lt;/code&gt; gives a 16 bit address which identifies the location of the &lt;acronym title="Least Significant Byte"&gt;LSB&lt;/acronym&gt; of another byte that contains the real target of the instruction.  &lt;code&gt;IndexedIndirect&lt;/code&gt; and &lt;code&gt;IndirectIndexed&lt;/code&gt; is used similar to indirect but uses the xr and yr registers to index with offset.  &lt;br /&gt;
&lt;br /&gt;
Once we've got our address mode we need a handful of functions to read from the various memory addresses.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/570815.js?file=readingAddresses.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
These functions do exactly &lt;a href="http://en.wikipedia.org/wiki/Does_exactly_what_it_says_on_the_tin"&gt;as they say on the tin&lt;/a&gt;.   With a few small exceptions...  For example, readWord16 should never be called with an &lt;code&gt;AddressMode&lt;/code&gt; of accumulator (you can read 16 bits from an 8 bit value).  For now I've just left these functions with "error" definitions until I can think of a better way of expressing it.&lt;br /&gt;
&lt;br /&gt;
After getting these bits and pieces in place it's time to look at the CPU instructions, apparently developed by &lt;a href="http://en.wikipedia.org/wiki/Cyberdyne_Systems"&gt;Cyberdyne Systems&lt;/a&gt; and used in "&lt;a href="http://www.amazon.com/Terminator-Blu-ray-Arnold-Schwarzenegger/dp/B000F9RB9Y?ie=UTF8&amp;amp;tag=fatvat-20&amp;amp;link_code=btl&amp;amp;camp=213689&amp;amp;creative=392969" target="_blank"&gt;The Terminator&lt;/a&gt;&lt;img alt="" border="0" height="1" src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;amp;l=btl&amp;amp;camp=213689&amp;amp;creative=392969&amp;amp;o=1&amp;amp;a=B000F9RB9Y" style="border: none !important; margin: 0px !important; padding: 0px !important;" width="1" /&gt;".    &lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_XfmDdL0570Q/TIf92IDe5NI/AAAAAAAAAFg/55MBCUu6uVU/s1600/6502terminator.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_XfmDdL0570Q/TIf92IDe5NI/AAAAAAAAAFg/55MBCUu6uVU/s320/6502terminator.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;
&lt;/div&gt;Instructions consist of a three letter op code, together with an optional argument. &amp;nbsp;For example, &lt;code&gt;LDA&lt;/code&gt; loads the supplied value into the accumulator and sets the zero or negative fields appropriately.  For some simple instructions no argument is needed (for example, CLC clears the carry flag but requires no argument).  &lt;br /&gt;
&lt;br /&gt;
Currently, I've represented instructions as just the three letter mnemonic and an optional address mode.    In the future I'll try and make it more a specification by including the flags the operation is allowed to set, together with restricting to only the allowed addressing mode (for example, some instructions don't support all addressing modes, LDA wouldn't make much sense with a relative address).&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/570871.js?file=sampleInstructions.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
Armed with this, all that needs to be done is implement the various operations.  A simple function &lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;execute :: CPU -&amp;gt; Instruction -&amp;gt; IO ()&amp;nbsp;&lt;/span&gt;simply matches the instruction and executes it.&lt;br /&gt;
&lt;br /&gt;
Finally after all that we can execute a really simple lump of code to see if it works.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/570882.js?file=6502examplecode.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
In the example above we load a value into the accumulator.  Shift it left (i.e. multiply by 2), store this value in some location in memory.  Shift it left again (multiply by 4) and left once more (multiple by 8).  We then add the original multipled by 8 with the value in the accumulator.  For this really simple lump of code, it seems to work :)&lt;br /&gt;
&lt;br /&gt;
My next plans are to:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Make things more type-safe (e.g. no wrong addressing modes, no modifying the wrong flags)&lt;/li&gt;
&lt;li&gt;Write a simple parser using Parsec so that I can write little bits of assembler in a nicer syntax&lt;/li&gt;
&lt;li&gt;Finish implementing the remainder of the operations (need to get the design right before I go much further)&lt;/li&gt;
&lt;li&gt;Encode the instructions so that I actually use the program counter and potentially load in pre-existing code.&lt;/li&gt;
&lt;li&gt;Adding some IO to actually make it useful...&lt;/li&gt;
&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-706371031528043965?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/IJJc7Zbn7F8kUn5VtGqA9o1xw8A/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/IJJc7Zbn7F8kUn5VtGqA9o1xw8A/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/IJJc7Zbn7F8kUn5VtGqA9o1xw8A/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/IJJc7Zbn7F8kUn5VtGqA9o1xw8A/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/KMJvGepGupU" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/706371031528043965?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/706371031528043965?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/KMJvGepGupU/beginnings-of-6502-emulator-in-haskell.html" title="The Beginnings of a 6502 Emulator in Haskell" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/_XfmDdL0570Q/TIfwRi8TrhI/AAAAAAAAAFY/hbzTEoMY2Kw/s72-c/L_NTE-NTE6502.jpg" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/09/beginnings-of-6502-emulator-in-haskell.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkQFQns7eSp7ImA9Wx5QE08.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-7750313820439736738</id><published>2010-08-29T16:48:00.000-07:00</published><updated>2010-08-31T22:51:53.501-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-31T22:51:53.501-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="performance" /><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="stm" /><category scheme="http://www.blogger.com/atom/ns#" term="concurrency" /><title>Speeding up the Ants program</title><content type="html">In my previous post, I looked at implementing &lt;a href="http://www.fatvat.co.uk/2010/08/ants-and-haskell.html"&gt;Ants in Haskell&lt;/a&gt;.  As comments on the blog indicated, things started off OK, but performance rapidly got worse.  This is the first time I've had this kind of problem before, so I thought I'd document how I started to solve it.&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; has a huge range of performance testing tools to help find out what goes wrong.  &lt;a href="http://www.amazon.com/Real-World-Haskell-Bryan-OSullivan/dp/0596514980?ie=UTF8&amp;amp;tag=fatvat-20&amp;amp;link_code=btl&amp;amp;camp=213689&amp;amp;creative=392969" target="_blank"&gt;Real World Haskell&lt;/a&gt;&lt;img alt="" border="0" height="1" src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;amp;l=btl&amp;amp;camp=213689&amp;amp;creative=392969&amp;amp;o=1&amp;amp;a=0596514980" style="border: none !important; margin: 0px !important; padding: 0px !important;" width="1" /&gt; has a greater chapter on "Profiling and Optimization" that helps identify the tools available.  &lt;br /&gt;
&lt;br /&gt;
The first tool in the chain is simply to run the application and collect some basic statistics about the runtime.  You can do this with the command &lt;code&gt;./AntsVis +RTS -sstderr&lt;/code&gt;.  Anything after the "+RTS" is a &lt;a href="http://haskell.org/ghc/docs/6.12.1/html/users_guide/runtime-control.html"&gt;Haskell runtime parameter&lt;/a&gt;.  This produced the following output:&lt;br /&gt;
&lt;br /&gt;
&lt;code&gt;&lt;br /&gt;
1,835,837,744 bytes allocated in the heap&lt;br /&gt;
328,944,448 bytes copied during GC&lt;br /&gt;
2,908,728 bytes maximum residency (25 sample(s))&lt;br /&gt;
142,056 bytes maximum slop&lt;br /&gt;
9 MB total memory in use (1 MB lost due to fragmentation)&lt;br /&gt;
&lt;br /&gt;
Generation 0:  3483 collections,     0 parallel,  1.54s,  1.54s elapsed&lt;br /&gt;
Generation 1:    25 collections,     0 parallel,  0.09s,  0.07s elapsed&lt;br /&gt;
&lt;br /&gt;
INIT  time    0.00s  (  0.00s elapsed)&lt;br /&gt;
MUT   time    3.04s  (  3.13s elapsed)&lt;br /&gt;
GC    time    1.63s  (  1.61s elapsed)&lt;br /&gt;
RP    time    0.00s  (  0.00s elapsed)&lt;br /&gt;
PROF  time    0.00s  (  0.00s elapsed)&lt;br /&gt;
EXIT  time    0.00s  (  0.00s elapsed)&lt;br /&gt;
Total time    4.67s  (  4.74s elapsed)&lt;br /&gt;
&lt;br /&gt;
%GC time      34.9%  (34.0% elapsed)&lt;br /&gt;
&lt;br /&gt;
Alloc rate    603,893,994 bytes per MUT second&lt;br /&gt;
&lt;br /&gt;
Productivity  65.1% of total user, 64.2% of total elapsed&lt;br /&gt;
&lt;/code&gt;&lt;br /&gt;
&lt;br /&gt;
Ouch... The program spent 35% of the time doing garbage collection and only 65% of the time doing useful stuff.  It also allocated 603MB of memory / second!  That's clearly not acceptable and I'm definitely allocating more memory than I should! &lt;br /&gt;
&lt;br /&gt;
I lack the intuition to understand where the problems are from looking at the code, so time to break out the profiling program.  GHC gives quite a few compiler options, but for profiling these seem to be the important ones:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;code&gt;-prof&lt;/code&gt; - Enables profiling&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;code&gt;-caf-all&lt;/code&gt; - Constant Applicative form for all top-level items (constant costs, one for each module.)&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;code&gt;-auto-all&lt;/code&gt; - Cost-centre analysis for every top-level function&lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;After you compile with those functions, you can run the program again with &lt;code&gt;./AntsVis +RTS -hc -p&lt;/code&gt; to get heap information together with profiling.  That gives me the following picture.  Flat things are good, things going up at a steep gradient and bad.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_XfmDdL0570Q/THrNsRfeuvI/AAAAAAAAAEw/t-ph60H66FE/s1600/AntsVisProfileAtTheBeginning.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img alt="First Run of the Profiling Tool" border="0" src="http://4.bp.blogspot.com/_XfmDdL0570Q/THrNsRfeuvI/AAAAAAAAAEw/t-ph60H66FE/s320/AntsVisProfileAtTheBeginning.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;First Run of the Profiling Tool&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;
As you might be able to see from the image (but click to see the full one) the finger is pointed squarely at the chain of functions involving &lt;code&gt;updateTVar&lt;/code&gt; and &lt;code&gt;evaporate&lt;/code&gt;, the code for which is shown below:&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/556683.js?file=evaporate.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
So how come this tiny inoffensive lump of code is allocating a fuck-ton of memory?  The blame lies solely in the record update function (&lt;code&gt;c {pheromone = pheromone c * evapRate}&lt;/code&gt;).  Laziness means that this isn't evaluated fully, which means it keeps a reference to the previous value of the cell.  I'm not interested in the value, but because there is a reference to it, the runtime can't though the value away.  Time for some &lt;a href="http://www.haskell.org/haskellwiki/Performance/Strictness"&gt;strictness annotations&lt;/a&gt;.  Note that I've also done a few refactorings (the World data type was just a wrapper around Vector, so I got rid of the wrapper, and typing pheromone in 100 times was driving me nuts).&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/556722.js?file=strict_evaporate.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
The &lt;a href="http://haskell.org/ghc/docs/6.12.2/html/libraries/base-4.2.0.1/Prelude.html#v:seq"&gt;&lt;code&gt;seq&lt;/code&gt;&lt;/a&gt; primitive is of type &lt;code&gt;a -&amp;gt; b -&amp;gt; b&lt;/code&gt; and evaluates the first argument to &lt;a href="http://en.wikipedia.org/wiki/Beta_normal_form"&gt;head-normal form&lt;/a&gt; and returns the second.  Hopefully, this should force the evaluation of the cells and free the memory up immediately.  Running with the profiler again gives a much better picture:&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_XfmDdL0570Q/THrVtdRcVJI/AAAAAAAAAE4/yhu21ChBaQY/s1600/bit_of_strictness.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/_XfmDdL0570Q/THrVtdRcVJI/AAAAAAAAAE4/yhu21ChBaQY/s320/bit_of_strictness.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;A little bit of strictness added&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;
Much better!  Maximum memory usage is significantly down and when I run it my CPU usage is significantly down.  &lt;code&gt;seq&lt;/code&gt; isn't a panacea though.  Earlier attempts at adding bang patterns everywhere led me astray - if you make everything strict, then you can get unintended consequences (I was having problems with fromJust being called on Nothing - not entirely sure why).  I think I'll stick to putting &lt;strike&gt;&lt;code&gt;seq&lt;/code&gt;&lt;/strike&gt; strict annotations based on the information from the profiling tools, at least I until I have a better intuition for how it will affect the final result.&lt;br /&gt;
&lt;br /&gt;
Updates thanks to very helpful comments from augustss, don and gtllama!&lt;br /&gt;
&lt;br /&gt;
&lt;code&gt;seq&lt;/code&gt; is not the right way to achieve the strictness, see comments from Don Stewart about funbox-strict-fields and ! patterns for strictness.&lt;br /&gt;
&lt;br /&gt;
I've removed the &lt;a href="http://hackage.haskell.org/package/criterion"&gt;Criterion&lt;/a&gt; graphs as I don't think the way I generated them was reliable. &lt;strike&gt;&amp;nbsp;I believe my use of "seq" was completely bogus and laziness bit me.  The faster times almost certainly came from not evaluating the expression correctly once &lt;code&gt;seq&lt;/code&gt; was in place. &amp;nbsp;Rather&amp;nbsp;embarrassing!&lt;/strike&gt;.  From the comments, "Computing a value of type STM () does not execute it. STM code is only executed if it "meets up with" an 'atomically' that is executed."&lt;br /&gt;
&lt;br /&gt;
I also go to the bottom of the slowness updating.  Most definitely not an STM bug.  The problem occured because I was trying to do a single STM transaction to do evaporation - this meant that evaporation could only succeed if nothing else wrote over the pheromones used in the evaporation. &amp;nbsp;Since many ants are marching and dropping pheromones this seems to mean that this transaction would need to be retried many times. &amp;nbsp;Switching this over to use a sequence of atomic operations (one per cell) removed this problem.  Performance is now consistent and when I run with profiling I get a flat heap output as shown below:&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_XfmDdL0570Q/TH11ge5KtfI/AAAAAAAAAFI/EUxFDzR4QbQ/s1600/flat.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_XfmDdL0570Q/TH11ge5KtfI/AAAAAAAAAFI/EUxFDzR4QbQ/s320/flat.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Flat as a Pancake&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;This was definitely the most complicated Haskell program I've attempted so far, but also the one I've learnt the most from! &amp;nbsp;Probably still some more mistakes in the code to find, but I've pushed the latest code to &lt;a href="http://github.com/fffej/haskellprojects/tree/master/ants/"&gt;the repository&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-7750313820439736738?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/KPjcgoCVvn5WnAI8Fy_qaEjfBqI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KPjcgoCVvn5WnAI8Fy_qaEjfBqI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/KPjcgoCVvn5WnAI8Fy_qaEjfBqI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KPjcgoCVvn5WnAI8Fy_qaEjfBqI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/bBsnUp1-oqY" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7750313820439736738?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7750313820439736738?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/bBsnUp1-oqY/speeding-up-ants-program.html" title="Speeding up the Ants program" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_XfmDdL0570Q/THrNsRfeuvI/AAAAAAAAAEw/t-ph60H66FE/s72-c/AntsVisProfileAtTheBeginning.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/08/speeding-up-ants-program.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D08HRX4zfSp7ImA9Wx5QEU8.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-6532671699357395376</id><published>2010-08-27T16:12:00.000-07:00</published><updated>2010-08-29T16:50:34.085-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-29T16:50:34.085-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="stm" /><category scheme="http://www.blogger.com/atom/ns#" term="concurrency" /><title>Ants and Haskell</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Software_transactional_memory"&gt;Software Transactional Memory&lt;/a&gt; (&lt;acronym title="Software Transactional Memory"&gt;STM&lt;/acronym&gt;) is a concurrency control mechanism designed to simplify programming for shared memory computers.  &lt;a href="http://www.amazon.com/Beautiful-Code-Leading-Programmers-Practice/dp/0596510047?ie=UTF8&amp;amp;tag=fatvat-20&amp;amp;link_code=btl&amp;amp;camp=213689&amp;amp;creative=392969" target="_blank"&gt;Beautiful Code&lt;/a&gt;&lt;img alt="" border="0" height="1" src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;amp;l=btl&amp;amp;camp=213689&amp;amp;creative=392969&amp;amp;o=1&amp;amp;a=0596510047" style="border: none !important; margin: 0px !important; padding: 0px !important;" width="1" /&gt; contains a great introduction to the concepts of STM in the Haskell language &lt;br /&gt;
&lt;br /&gt;
In most languages, &lt;a href="http://en.wikipedia.org/wiki/Lock_(computer_science)"&gt;locks&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Monitor_(synchronization)"&gt;condition variables&lt;/a&gt; are the main mechanisms for controlling access, but these are notoriously hard to get right.  &lt;a href="http://www.amazon.com/Java-Concurrency-Practice-Brian-Goetz/dp/0321349601?ie=UTF8&amp;amp;tag=fatvat-20&amp;amp;link_code=btl&amp;amp;camp=213689&amp;amp;creative=392969" target="_blank"&gt;Java Concurrency in Practice&lt;/a&gt;&lt;img alt="" border="0" height="1" src="http://www.assoc-amazon.com/e/ir?t=fatvat-20&amp;amp;l=btl&amp;amp;camp=213689&amp;amp;creative=392969&amp;amp;o=1&amp;amp;a=0321349601" style="border: none !important; margin: 0px !important; padding: 0px !important;" width="1" /&gt; is a good read to understand just how many ways there are to shoot yourself in the foot (too few locks, too many locks, wrong locks, race conditions, wrong order, error conditions, deadlock, livelock, live stock, brain explosion).  STM simplifies shared memory programming by providing database like semantics for changing memory.  Reads/Writes to shared memory happen within a transaction - each memory access appears to happens in isolation from the others and appears atomically to observers.   If a transaction conflict with another, then one of the transactions is retried.  An implementation typically records the memory accesses somehow and then can decide whether there was a conflict.  Languages that restrict mutability (like &lt;a href="http://www.clojure.org/"&gt;Clojure&lt;/a&gt; and &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt;) have a significantly simpler implementation than imperative languages such as C/C++.&lt;br /&gt;
&lt;br /&gt;
Composability is another advantage for STM.  For example, take &lt;a href="http://download.oracle.com/javase/6/docs/api/java/util/Hashtable.html"&gt;java.util.Hashtable&lt;/a&gt; - what if you want to do an insert/delete as a single atomic operation and only make the contents visible to other threads once finished?  As the original design didn't do this, you're on your own.  In contrast STM composes well.&lt;br /&gt;
&lt;br /&gt;
Both &lt;a href="http://www.clojure.org/"&gt;Clojure&lt;/a&gt; and &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; feature support for STM.  The canonical example in Clojure is &lt;a href="http://clojure.googlegroups.com/web/ants.clj"&gt;Ants.clj&lt;/a&gt; that demonstrates STM via a simple simulation of foraging ants (see also &lt;a href="http://www.fatvat.co.uk/2009/07/flocking-about-with-clojure.html"&gt;Flocking about with Clojure&lt;/a&gt;).  As a learning exercise I thought it'd be neat to try to convert this over to Haskell!&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_XfmDdL0570Q/THgyegTPEsI/AAAAAAAAAEQ/XzkMjpawWAA/s1600/ants.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_XfmDdL0570Q/THgyegTPEsI/AAAAAAAAAEQ/XzkMjpawWAA/s320/ants.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Ants in Haskell&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;
To model the ants world, I use the following data structures.  Transactional variables (TVars) are used to hold a reference to a mutable variable.  For the Ants simulation, I use a &lt;a href="http://hackage.haskell.org/packages/archive/vector/0.6.0.2/doc/html/Data-Vector.html"&gt;Vector&lt;/a&gt; of TCell's to represent the ants world.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/552401.js?file=gistfile1.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
TVars can only be modified within the &lt;a href="http://hackage.haskell.org/packages/archive/stm/2.1.1.2/doc/html/Control-Monad-STM.html"&gt;STM context&lt;/a&gt;.  The key thing is that the &lt;b&gt;only&lt;/b&gt; way to mutate transactional variables is from within the STM monad.  To fiddle with the variables within TVar you can use &lt;code&gt;newTVar&lt;/code&gt;, readTVar and &lt;code&gt;writeTVar&lt;/code&gt;.  Oddly there didn't seem to be a primitive operation to update a TVar based on its current value.  &lt;code&gt;updateTVar&lt;/code&gt; updates the TVar by applying a function to the value inside.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/554054.js?file=gistfile1.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://hackage.haskell.org/packages/archive/stm/2.1.1.2/doc/html/Control-Monad-STM.html#v%3Acheck"&gt;&lt;code&gt;check&lt;/code&gt;&lt;/a&gt; verifies that a condition is true and if it isn't true then the transaction is retried.  The key point is that the transaction is only retried when there's a reason to do so (e.g. memory read/write) so you aren't just heating the CPU whilst the condition is being validated.  As an example, when we move an ant forward, we want to check that there is not an ant in the way.  If there is an ant in the way, we'll wait till the coast is clear before moving.&lt;br /&gt;
&lt;br /&gt;
&lt;script src="http://gist.github.com/554190.js?file=gistfile1.hs"&gt;
&lt;/script&gt;&lt;br /&gt;
&lt;br /&gt;
At some point you have to run your STM actions.  &lt;code&gt;atomically&lt;/code&gt; runs the STM transactions from the IO monad (e.g. the top-level) and returns the result.  A very important point is that you want your actions to be in the STM monad as much as possible.  If all of your functions are run within the IO monad then you lose the composability aspect.  The pattern to use is make everything use the STM monad and glue together randomly and you won't have a threading problem (you still have other problems though, it's not magic).&lt;br /&gt;
&lt;br /&gt;
The Clojure code used &lt;a href="http://clojure.org/agents"&gt;agents&lt;/a&gt; to represent each ant.  I'm not sure what the most idiomatic translation to Haskell is, but I spawned a thread for each ant using &lt;a href="http://haskell.org/ghc/docs/6.12.2/html/libraries/base-4.2.0.1/Control-Concurrent.html"&gt;Control.Concurrent&lt;/a&gt; and &lt;code&gt;forkIO&lt;/code&gt;.  Haskell threads are incredibly light-weight so spawning even thousands of them is not a problem.  Each ant thread simply evaluates the behaviour, moves, sleeps and repeats. &lt;br /&gt;
&lt;br /&gt;
The rest of the code is more or less a direction translation from the Clojure.  It's pretty verbose so I won't bother posting it here, but the full code is on &lt;a href="http://github.com/fffej/haskellprojects/tree/master/ants/"&gt;my github page&lt;/a&gt;.  You should be able to compile it with &lt;code&gt;ghc -lglut --make -main-is AntsVis AntsVis.hs&lt;/code&gt;.  Any hints on how to make it suck less appreciated!&lt;br /&gt;
&lt;br /&gt;
Performance seems very good with the default compiler options, I'm able to run with 100+ ant agents all running concurrently. &amp;nbsp;The programming is very simple and once I'd found out and added the appropriate check logic into &lt;code&gt;move&lt;/code&gt; everything worked properly.&lt;br /&gt;
&lt;br /&gt;
Hurrah for simple concurrent programming!&lt;br /&gt;
&lt;br /&gt;
(update 30/8/2010 - after finding out the performance sucked after a while with a few more ants than I'd tested with I looked at &lt;a href="http://www.fatvat.co.uk/2010/08/speeding-up-ants-program.html"&gt;speeding up the Ants program&lt;/a&gt;).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-6532671699357395376?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/SYTa-alngX0537RSFL9py2dpUOI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/SYTa-alngX0537RSFL9py2dpUOI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/SYTa-alngX0537RSFL9py2dpUOI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/SYTa-alngX0537RSFL9py2dpUOI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/9mXcgwnsS1s" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6532671699357395376?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6532671699357395376?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/9mXcgwnsS1s/ants-and-haskell.html" title="Ants and Haskell" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_XfmDdL0570Q/THgyegTPEsI/AAAAAAAAAEQ/XzkMjpawWAA/s72-c/ants.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/08/ants-and-haskell.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkEMRnw6fSp7ImA9Wx5REEo.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-6579461536753601669</id><published>2010-08-17T12:15:00.000-07:00</published><updated>2010-08-17T12:51:27.215-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-17T12:51:27.215-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="yesod" /><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="freebase" /><title>Freebasing with Haskell</title><content type="html">&lt;a href="http://www.freebase.com/"&gt;Freebase&lt;/a&gt; is a collection of structured data, queryable via a powerful &lt;acronym title="Application Programming Interface"&gt;API&lt;/acronym&gt; and recently acquired by &lt;a href="http://googleblog.blogspot.com/2010/07/deeper-understanding-with-metaweb.html"&gt;Google&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;Data is categorized according to different types.  A Topic represents a thing (such as a physical entity, a substance or a building).  Each Topic is associated with a number of Types, for example the &lt;a href="http://www.freebase.com/view/en/salvador_dali"&gt;Salvador Dali topic&lt;/a&gt; might be associated with &lt;a href="http://www.freebase.com/view/en/painting"&gt;Painting&lt;/a&gt; and &lt;a href="http://www.freebase.com/view/en/surrealism"&gt;Surrealism&lt;/a&gt;.  A group of related Types are organized into a Domain.  For example, the &lt;a href="http://schemas.freebaseapps.com/domain?id=/book"&gt;books domain&lt;/a&gt; contains types for book, literature subject and publisher.  Domains, Types and Properties are further organized into namespaces which can be thought of top-level containers to reference items.  For example, the &lt;code&gt;/en&lt;/code&gt; namespace contains human-readable IDs and URLs for popular topics.  Similarly, the &lt;code&gt;/wikipedia/en&lt;/code&gt; namespace gives a key that can be used to formulate a URL representing the corresponding article on &lt;a href="http://www.wikipedia.org/"&gt;Wikipedia&lt;/a&gt;.  This extra structure allows precise concepts to be expressed, with no ambiguity.  &lt;br /&gt;&lt;br /&gt;FreeBase has a number of &lt;a href="http://www.freebase.com/docs/web_services"&gt;web services&lt;/a&gt; you can use to interrogate the database.  The web services accept and return &lt;a href="http://www.json.org/"&gt;&lt;acronym title="JavaScript Object Notation"&gt;JSON&lt;/acronym&gt;&lt;/a&gt;.  The most basic services just give you information about the &lt;a href="http://www.freebase.com/docs/web_services/status"&gt;status&lt;/a&gt; and &lt;a href="http://www.freebase.com/docs/web_services/version"&gt;version&lt;/a&gt; in JSON format.  The &lt;a href="http://hackage.haskell.org/package/json"&gt;Text.JSON&lt;/a&gt; package provides a method for reading / writing JSON in Haskell.  This, coupled with Network.HTTP, makes making basic requests very simple.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/531547.js?file=simpleFreebaseQueries.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;More advanced requests use the &lt;a href="http://mql.freebaseapps.com/"&gt;&lt;acronym title="Metaweb Query Language"&gt;MQL&lt;/acronym&gt;&lt;/a&gt; to express queries to Freebase.  The underlying database is best thought of as a directed graph of nodes and relationships.  Each node has a unique identifier and a record of who created the node.    A node has a number of outgoing edges which are either relationships between other nodes of a primitive value.  For example the &lt;code&gt;/en/iggy_pop&lt;/code&gt; node might be linked to the &lt;code&gt;/en/the_passenger/&lt;/code&gt; via the &lt;code&gt;&gt;/music/album/artist&lt;/code&gt; property.  Relationships between nodes can have property values associated with them, for example &lt;code&gt;/en/the_passenger&lt;/code&gt; might be linked to &lt;code&gt;/music/track/length&lt;/code&gt; with &lt;code&gt;4:44&lt;/code&gt;.&lt;br /&gt;&lt;br /&gt;The &lt;a href="http://www.freebase.com/app/queryeditor"&gt;Query Editor&lt;/a&gt; provides a way of running these queries in a web browser and this, together with the &lt;a href="http://schemas.freebaseapps.com/"&gt;Schema Explorer&lt;/a&gt; allow you to get explore the data available in Freebase.  Queries are expressed as JSON and consist of a JSON object from which the blanks are filled in.  As an example, if I submit the following query with a blank array, then the returned value has the null filled in with the correct details (apparently Indiana Jones was released 23rd May 1984).&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;  {&lt;br /&gt;     query: {&lt;br /&gt;         type: "/film/film"&lt;br /&gt;         name: 'Indiana Jones and the Temple of Doom',&lt;br /&gt;         initial_release_date: null&lt;br /&gt;     }&lt;br /&gt;  }&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;The Freebase &lt;a href="http://www.freebase.com/docs/mql/ch04.html#mqlreadservice"&gt;documentation&lt;/a&gt; gives an example of a basic web app built using &lt;a href="http://en.wikipedia.org/wiki/PHP"&gt;PHP&lt;/a&gt; and I thought it'd be a good learning exercise to convert this over to a functional programming language.  &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; has quite a few web frameworks, including &lt;a href="http://www.snapframework.com/"&gt;Snap&lt;/a&gt;, &lt;a href="http://happstack.com/index.html"&gt;HappStack&lt;/a&gt; and &lt;a href="http://docs.yesodweb.com/"&gt;Yesod&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;Yesod had the strangest name, so it seemed like the logical choice.  Yesod uses some extensions to Haskell, &lt;a href="http://www.haskell.org/haskellwiki/GHC/Type_families"&gt;Type Families&lt;/a&gt;, &lt;a href="http://www.haskell.org/haskellwiki/Quasiquotation"&gt;quasi-quoting&lt;/a&gt; and &lt;a href="http://www.haskell.org/haskellwiki/Template_Haskell"&gt;Template Haskell&lt;/a&gt;.  Quasi-quoting and &lt;acronym="Template Haskell"&gt;TH&lt;/acronym&gt; are particular exciting as they allow a &lt;a href="http://haml-lang.com/"&gt;HAML&lt;/a&gt; like syntax to be used as statically compiled Haskell.&lt;br /&gt;&lt;br /&gt;Each Yesod application has a site type that is passed to all functions and contains arguments applicable to the whole site, such as database connections and global settings.  URLs are handled by a routing table which, again, is statically compiled and means you can not create an invalid internal link.  Neat! &lt;br /&gt;&lt;br /&gt;For the basic album lister app, we define three really simple routes.  A home page, a URL to call to get the albums, and a link to the static files.  Note that the static files is again checked at compile time.  If the relevant JS/CSS files don't exist, then the application fails to compile! &lt;br /&gt;&lt;br /&gt;The &lt;code&gt;getHomeR&lt;/code&gt; route handler (which ends in capital R by convention) simpler just renders a Hamlet template.  &lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/531604.js?file=app.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;A little bit of JavaScript in &lt;code&gt;script.js&lt;/code&gt; makes an Ajax call to retrieve the list of bands.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/531613.js?file=gistfile1.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;And the basic example from &lt;a href="http://www.freebase.com/docs/mql/ch04.html#phpmqlexample"&gt;the MQLRead&lt;/a&gt; documentation is converted over.  I've only spent a few hours with Yesod but I'm definitely impressed so far, good documentation and easy to get something simple done!  The complete code is on &lt;a href="http://github.com/fffej/haskellprojects/tree/master/freebase/"&gt;my github page&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-6579461536753601669?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/fkMHc-2B8ZxvO9NEzvEXELgct04/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/fkMHc-2B8ZxvO9NEzvEXELgct04/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/fkMHc-2B8ZxvO9NEzvEXELgct04/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/fkMHc-2B8ZxvO9NEzvEXELgct04/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/TMxnslRiMCg" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6579461536753601669?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/6579461536753601669?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/TMxnslRiMCg/freebasing-with-haskell.html" title="Freebasing with Haskell" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2010/08/freebasing-with-haskell.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUENQ3c7eSp7ImA9Wx5SF04.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-649716122895050340</id><published>2010-08-13T13:24:00.000-07:00</published><updated>2010-08-13T15:14:52.901-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-13T15:14:52.901-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="not-really-that-serious" /><category scheme="http://www.blogger.com/atom/ns#" term="java" /><category scheme="http://www.blogger.com/atom/ns#" term="history" /><title>A Brief History of Java</title><content type="html">In 1995, &lt;a href="http://en.wikipedia.org/wiki/Sun_Microsystems"&gt;Sun&lt;/a&gt; released the The &lt;a href="http://en.wikipedia.org/wiki/Java_(programming_language)"&gt;Java&lt;/a&gt; programming language as a part of a broader strategy known as the Java platform.  The "write once, run anywhere" (WORA) motto initially promised to make Java the &lt;i&gt;everywhere language&lt;/i&gt;, running on everything from wrist watches to cell phones and laptops to supercomputers.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_XfmDdL0570Q/TGWqWniUQ8I/AAAAAAAAADw/6O3e3E0ApGM/s1600/jblendwatch.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 306px; height: 240px;" src="http://2.bp.blogspot.com/_XfmDdL0570Q/TGWqWniUQ8I/AAAAAAAAADw/6O3e3E0ApGM/s400/jblendwatch.jpg" border="0" alt="JBlend Watch" id="BLOGGER_PHOTO_ID_5504993425077060546" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The initial reception of Java was mixed.  For every &lt;a href="http://www.jwz.org/doc/java.html"&gt;Java sucks&lt;/a&gt;, an evangelist promised that it would change the world, and &lt;a href="http://www.theregister.co.uk/2001/03/30/java_toaster_prints_weather_forecast/"&gt;Java powered toasters&lt;/a&gt; were just around the corner.&lt;br /&gt;&lt;br /&gt;As Java evolved, it accrued more baggage as it fought to keep &lt;acronym title="Write once, run anywhere"&gt;WORA&lt;/acronym&gt;.  Deprecated methods sprang up everywhere, but Sun had to keep these features in place to provide backwards compatibility.  The &lt;code&gt;java.util.DateTime&lt;/code&gt; package became synonymous with &lt;a href="http://www.jroller.com/cpurdy/entry/the_seven_habits_of_highly" type="The Seven Habits of Highly Dysfunctional Design"&gt;dysfunctional design&lt;/a&gt; and bad naming conventions were fixed for life (is it size or is it length?).&lt;br /&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/.NET_Framework"&gt;Microsoft's .NET&lt;/a&gt; began to encroach on Java's domain.    Microsoft's team introduced &lt;a href="http://en.wikipedia.org/wiki/Delegate_(.NET)"&gt;&lt;i&gt;delegates&lt;/i&gt;&lt;/a&gt;, a type-safe function pointer that makes event handling considerably simpler.  Java needed a competitor, and fast, so in version 1.1, Java grew &lt;i&gt;inner classes&lt;/i&gt;, a way of achieving a similar effect, but in a more limited, laboured fashion.  A &lt;a href="http://java.sun.com/docs/white/delegates.html"&gt;Java white-paper&lt;/a&gt; concluded &lt;quote&gt;"Bound method references are simply unnecessary.... They detract from the simplicity and unity of the Java language"&lt;/quote&gt;.  Meanwhile, efforts continue to this day for Java to adopt &lt;a href="http://openjdk.java.net/projects/lambda/"&gt;bound method references&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;When Java reached version 1.4, Sun decided that a new approach was required to compete with Microsoft's .NET strategy.  Sun thought long and hard and branded version 1.4 as "5" in an attempt to move ahead of .NET 2.0&lt;br /&gt;&lt;br /&gt;This was accompanied by a decision to implement &lt;a href="http://en.wikipedia.org/wiki/Generics_in_Java"&gt;&lt;i&gt;generics&lt;/i&gt;&lt;/a&gt;, a way of achieving additional type safety.  Unfortunately, type-safety came at the cost of typing.  As engineers adopted generics they were often heard cursing as they bashed out yet another &lt;code&gt;List&amp;lt;Foo&amp;gt; foos = new ArrayList&amp;lt;Foo&amp;gt;&lt;/code&gt; statement.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/_XfmDdL0570Q/TGWuHxcM_QI/AAAAAAAAAEI/fMI87QD_2oE/s1600/fast-typing.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 300px; height: 200px;" src="http://3.bp.blogspot.com/_XfmDdL0570Q/TGWuHxcM_QI/AAAAAAAAAEI/fMI87QD_2oE/s400/fast-typing.jpg" border="0" alt="" id="BLOGGER_PHOTO_ID_5504997568084245762" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Universities quickly adopted Java; no longer did students have to learn the intricacies of manual memory management and pointers, instead they could rely on Java to do the heavy lifting and focus on &lt;i&gt;solving problems&lt;/i&gt;.  Unfortunately, this led to the production of a &lt;a href="http://www.joelonsoftware.com/articles/ThePerilsofJavaSchools.html" title="The Perils of Java Schools"&gt;league of developers&lt;/a&gt; known as the "Patternistas" who only had a hammer and everything was a nail.  Under their leadership, Java naming conventions became &lt;a href="http://steve-yegge.blogspot.com/2006/03/execution-in-kingdom-of-nouns.html" title="Execution in the Kingdom of Nouns"&gt;increasingly ridiculous&lt;/a&gt;.  When class names such as &lt;a href="http://ws.apache.org/xmlrpc/apidocs/org/apache/xmlrpc/server/RequestProcessorFactoryFactory.RequestSpecificProcessorFactoryFactory.html"&gt;RequestProcessorFactoryFactory&lt;/a&gt; became common place some developers began to question the wisdom of the infinite tower of abstraction.&lt;br /&gt;&lt;br /&gt;As developers realized they were just shuffling thousands of lines of code around each day, they needed a word to justify their existence.  That word was &lt;a href="http://www.refactoring.com/"&gt;&lt;i&gt;refactoring&lt;/i&gt;&lt;/a&gt;.   The patternistas rejoiced; not only could they apply factory factories, singletons and visitors to solve problems, but they could repeatedly change their mind and justify it with a buzzword!&lt;br /&gt;&lt;br /&gt;A whole industry evolved to satisfy the patternistas.  RSI injuries were becoming increasingly common place amongst Java veterans so a new breed of development environment was built.  &lt;a href="http://www.jetbrains.com/idea/"&gt;IntelliJ&lt;/a&gt; and &lt;a href="http://www.eclipse.org/"&gt;Eclipse&lt;/a&gt; were built with the aim of minimizing the damage to developers.  Advanced code completion and refactoring meant that developers merely had to press key combinations instead of typing in verbose code constructs for &lt;a href="http://norvig.com/design-patterns/" title="Design Patterns in Dynamic Languages"&gt;missing language features&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_XfmDdL0570Q/TGWr_3g5n4I/AAAAAAAAAEA/25J_7j0wg6M/s1600/hammer_nail.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 232px; height: 325px;" src="http://4.bp.blogspot.com/_XfmDdL0570Q/TGWr_3g5n4I/AAAAAAAAAEA/25J_7j0wg6M/s400/hammer_nail.jpg" border="0" alt="If all you have is a hammer, everything looks like a nail" id="BLOGGER_PHOTO_ID_5504995233252351874" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Java began the push for the &lt;a href="http://en.wikipedia.org/wiki/Star_Trek:_Enterprise"&gt;Enterprise&lt;/a&gt; by employing some of the leading &lt;a href="http://www.joelonsoftware.com/articles/fog0000000018.html"&gt;architecture astronauts&lt;/a&gt; to come up with a paradigm-shifting, synergistic approach for empowering enterprise software.  The result was nothing short of a revolution; beans were born.  Beans are apparently a server-side component architecture for the modular constructor of enterprise applications. &lt;br /&gt;&lt;br /&gt;Having softened up resistance to angle brackets through the use of generics, Java moved forward and jumped on the XML bandwagon.  By using XML, developers were able to express concise concepts as huge, verbose angular nightmares.  This has the advantage that XML files (unlike other files) can be easily read by other computers.  The small price of being unreadable for humans was felt to be worth paying.  Services such as &lt;a href="http://ant.apache.org/"&gt;Ant&lt;/a&gt; and &lt;a href="http://www.jboss.org/"&gt;JBoss&lt;/a&gt; led the way with executable XML and powerful deployment descriptors.&lt;br /&gt;&lt;br /&gt;Meanwhile, in a galaxy far away from architecture astronauts, a new breed of programmer decided that &lt;i&gt;getting shit done&lt;/i&gt; was more important than typing shit all day long.  This led to the birth of frameworks such as &lt;a href="http://rubyonrails.org/"&gt;Rails&lt;/a&gt; which are designed to get out-of-the-way and let you solve problems, an approach popularized as convention over configuration.  Rails received &lt;a href="http://martinfowler.com/bliki/EvaluatingRuby.html"&gt;positive feedback&lt;/a&gt; and at least some former Java addicts kicked the habit for Ruby.  The first signs of Java's grip loosening were becoming apparent.&lt;br /&gt;&lt;br /&gt;In August 2006 the Java 7 project began.  Many developers are pushing for a feature known as &lt;a href="http://en.wikipedia.org/wiki/Lambda_expressions"&gt;lambda expressions&lt;/a&gt; that would undoubtably simplify many of the common coding tasks that Java makes so painful.  Unfortunately, four years later the Java committee is still arguing over the nuances of this feature and it's possible that this will be dropped.  The lack of movement on Java 7 has led to the birth of new, sexier languages such as &lt;a href="http://www.clojure.org/"&gt;Clojure&lt;/a&gt; and &lt;a href="http://www.scala-lang.org/"&gt;Scala&lt;/a&gt;, designed to run within the Java ecosystem, but without the need for the language itself. &lt;br /&gt;&lt;br /&gt;The final nail in the coffin started being hammered in April 2009 when Oracle &lt;a href="http://www.oracle.com/us/corporate/press/018363"&gt;announced plans&lt;/a&gt; to acquire Sun.  Headed by "Larry, Prince of Darkness", Oracle is an &lt;a href="http://en.wikipedia.org/wiki/List_of_acquisitions_by_Oracle"&gt;acquisition machine&lt;/a&gt; that specializes in Enterprise Software and making money.  When Oracle's lawyers uncovered a set of software patents, they picked a big target and started a fight.  Targets come no bigger than &lt;a href="http://www.google.com/"&gt;Google&lt;/a&gt; (a leading advertising organization), so Oracle's lawyers pounced and battle has commenced.&lt;br /&gt;&lt;br /&gt;Where does this leave Java?  From its humble beginnings 15 years ago, Java has risen to the top of the pile of popular programming languages under Sun's stewardship.  Under Oracle, it's unclear whether the next version of Java will ever get released, let alone contain the features developers crave.  Is this the beginning of the end for Java?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-649716122895050340?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/wng_HpzWy4QAOg-YT-TOFGp4RSQ/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/wng_HpzWy4QAOg-YT-TOFGp4RSQ/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/wng_HpzWy4QAOg-YT-TOFGp4RSQ/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/wng_HpzWy4QAOg-YT-TOFGp4RSQ/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/TTy-ah5cRa0" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/649716122895050340?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/649716122895050340?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/TTy-ah5cRa0/brief-history-of-java.html" title="A Brief History of Java" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_XfmDdL0570Q/TGWqWniUQ8I/AAAAAAAAADw/6O3e3E0ApGM/s72-c/jblendwatch.jpg" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/08/brief-history-of-java.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0YCRng7fSp7ImA9Wx5SEk8.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-3747565447289348263</id><published>2010-08-07T15:11:00.000-07:00</published><updated>2010-08-07T16:52:47.605-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-07T16:52:47.605-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="antiobjects" /><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="agents" /><title>Sniffing out Pacman</title><content type="html">How would you write the AI for ghosts in &lt;a href="http://en.wikipedia.org/wiki/Pac-Man"&gt;Pac-Man&lt;/a&gt;?  Your first thought is probably to centralize the logic in a ghost object/function and go from there.  This approach has quite a few traps, and a lot of complexity, for example how do you deal with dead-ends?  Eventually the algorithm is going to get pretty complicated (something like &lt;a href="http://www.fatvat.co.uk/2009/07/searching-graphs.html"&gt;A* search&lt;/a&gt;)&lt;br /&gt;&lt;br /&gt;The excellent paper &lt;a href="http://www.cs.colorado.edu/~ralex/papers/PDF/OOPSLA06antiobjects.pdf"&gt;Programming Anti-Objects&lt;/a&gt; shows a very simple way to solve this problem with a bit of &lt;a href="http://en.wikipedia.org/wiki/Lateral_thinking"&gt;lateral thinking&lt;/a&gt;.  Instead of centralizing the logic for ghost behaviour, why not distribute it?  Reconceptualizing the problem in this way brings the background objects to the fore - the behaviour does not live in the ghost; it lives in the background tiles.  The simple approach described in the paper is based on &lt;a href="http://en.wikipedia.org/wiki/Diffusion"&gt;diffusion&lt;/a&gt; - each target has a scent that is distributed via the background tiles.  Complex behaviour can emerge because some tiles distribute the scent (paths) whereas some block scent (obstacles).&lt;br /&gt;&lt;br /&gt;I've represented the tiles as a map from Point to a list of Agents.  The list is really a stack where the top-most agent is the only one that really matters.  I've assumed that the tiles are arranged in a square, hence the one size variable and I keep the current location of the pursuers and goal handy.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/513222.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;The algorithm consists of two stages; diffusing the scent over the grid followed by updating the pursuers using a simple hill-climbing approach (e.g. move to the surrounding tile with the highest scent).&lt;br /&gt;&lt;br /&gt;I've looked at the diffusion algorithm before in &lt;a href="http://www.fatvat.co.uk/2010/05/barely-function-fluid-dynamics.html"&gt;Barely Functional Fluid Dynamics&lt;/a&gt;, but I implemented it in an imperative style that makes it complicated to understand.  This time around, I looked at a purely functional solution.  Calculating the diffusion for any cell is a matter of doing some simple calculations on he four cells around it (also known as the  &lt;a href="http://en.wikipedia.org/wiki/Von_Neumann_neighborhood"&gt;Von-Neumann neighbourhood&lt;/a&gt;).  The equation below is taken from the paper:&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_XfmDdL0570Q/TF3bzkZyD2I/AAAAAAAAADg/cYnb319b4A4/s1600/equation.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 187px;" src="http://4.bp.blogspot.com/_XfmDdL0570Q/TF3bzkZyD2I/AAAAAAAAADg/cYnb319b4A4/s400/equation.png" border="0" alt="Diffusion Equation from Colloborative Diffusion - Programming AntiObjects" id="BLOGGER_PHOTO_ID_5502795998708240226"&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;One way to solve this in &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; would be to use some mutable state and just write some imperative-style code to mutate it in place.  This would probably give the best performance, but at the cost of making the code destructive and difficult to follow.  Alternatively, we can express the whole thing functionally.  When I looked at the &lt;a href="http://www.fatvat.co.uk/2010/07/foreign-exchange-arbitrage.html"&gt;Floyd Warshall algorithm&lt;/a&gt;, it made use of dynamic programming and in particular defining a data structure recursively.  We can use a similar trick to define updates to the grid in terms of previously calculated values.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_XfmDdL0570Q/TF3cGGOq2eI/AAAAAAAAADo/chDwjhnMlrY/s1600/diffusion.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 186px; height: 186px;" src="http://2.bp.blogspot.com/_XfmDdL0570Q/TF3cGGOq2eI/AAAAAAAAADo/chDwjhnMlrY/s400/diffusion.png" border="0" alt="Grid Diffusion Image" id="BLOGGER_PHOTO_ID_5502796317026081250"&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;In the image above, if we are calculating the diffusion value for the red-tile, then we can reference the "new" diffused values for the green tiles, but we still must use the original values for the blue tiles.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/513235.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Ignoring the update pursuers function, we update the board by constructing a map based on the previous environment and &lt;i&gt;itself&lt;/i&gt;.  &lt;br /&gt;&lt;br /&gt;Updating the pursuers is simple, just find the nearest path cell with the highest scent and move to it.  Putting this all together and we can run some little tests.  The example below shows a pursuer (in green) solving a little maze.  It takes a while to get going whilst the scent is diffused, but once it is it homes in on a solution very quickly.&lt;br /&gt;&lt;br /&gt;&lt;object width="320" height="266" class="BLOG_video_class" id="BLOG_video-fb28169217def6a7" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0"&gt;&lt;param name="movie" value="http://www.youtube.com/get_player"&gt;
&lt;param name="bgcolor" value="#FFFFFF"&gt;
&lt;param name="allowfullscreen" value="true"&gt;
&lt;param name="flashvars" value="flvurl=http://redirector.googlevideo.com/videoplayback?id%3Dfb28169217def6a7%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D36E6882AFA9DC1F1DA9B9C52AA576C644ADBA23B.47B0F4AA367150208F6E56F374E3EA40A66AA5F3%26key%3Dck1&amp;amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3Dfb28169217def6a7%26offsetms%3D5000%26itag%3Dw160%26sigh%3DT_rPJaAfGaRYMXWNAKD2vI9y3So&amp;amp;autoplay=0&amp;amp;ps=blogger"&gt;
&lt;embed src="http://www.youtube.com/get_player" type="application/x-shockwave-flash"
width="320" height="266" bgcolor="#FFFFFF"
flashvars="flvurl=http://redirector.googlevideo.com/videoplayback?id%3Dfb28169217def6a7%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D36E6882AFA9DC1F1DA9B9C52AA576C644ADBA23B.47B0F4AA367150208F6E56F374E3EA40A66AA5F3%26key%3Dck1&amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3Dfb28169217def6a7%26offsetms%3D5000%26itag%3Dw160%26sigh%3DT_rPJaAfGaRYMXWNAKD2vI9y3So&amp;autoplay=0&amp;ps=blogger"
allowFullScreen="true" /&gt;&lt;/object&gt;
&lt;br /&gt;&lt;br /&gt;As explained in the paper, collaborative behaviour emerges because pursuers block the scent from the goal.  This means if you have multiple paths to the goal, the pursuers will each choose a different route purely because once one pursuer is on a route it blocks out the scent for those that follow.  In the example scenario below, the agents will never take the same route and will always ensure there's no escape for the target.&lt;br /&gt;&lt;br /&gt;&lt;object width="320" height="266" class="BLOG_video_class" id="BLOG_video-87c6c9f12f65e0ab" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0"&gt;&lt;param name="movie" value="http://www.youtube.com/get_player"&gt;
&lt;param name="bgcolor" value="#FFFFFF"&gt;
&lt;param name="allowfullscreen" value="true"&gt;
&lt;param name="flashvars" value="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D87c6c9f12f65e0ab%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D8119C6B84E2F124345871CB8E3E2523E02A0AE.4A6A6536F700AE978F3E07AC861E111371C0ABE5%26key%3Dck1&amp;amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D87c6c9f12f65e0ab%26offsetms%3D5000%26itag%3Dw160%26sigh%3DVdflfTjnsmmxzgdzejWcqdZ5Vr8&amp;amp;autoplay=0&amp;amp;ps=blogger"&gt;
&lt;embed src="http://www.youtube.com/get_player" type="application/x-shockwave-flash"
width="320" height="266" bgcolor="#FFFFFF"
flashvars="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D87c6c9f12f65e0ab%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D8119C6B84E2F124345871CB8E3E2523E02A0AE.4A6A6536F700AE978F3E07AC861E111371C0ABE5%26key%3Dck1&amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D87c6c9f12f65e0ab%26offsetms%3D5000%26itag%3Dw160%26sigh%3DVdflfTjnsmmxzgdzejWcqdZ5Vr8&amp;autoplay=0&amp;ps=blogger"
allowFullScreen="true" /&gt;&lt;/object&gt;
&lt;br /&gt;&lt;br /&gt;The code for this is available on &lt;a href="http://github.com/fffej/haskellprojects/tree/master/chase/"&gt;my github page&lt;/a&gt; together with a simple GUI that allows you to build an environment and run the simulation.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-3747565447289348263?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/Jdl5ZAd1kXd9Hfdc2sOGKJ7SqOQ/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Jdl5ZAd1kXd9Hfdc2sOGKJ7SqOQ/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/Jdl5ZAd1kXd9Hfdc2sOGKJ7SqOQ/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Jdl5ZAd1kXd9Hfdc2sOGKJ7SqOQ/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/Km6dF-QLgcs" height="1" width="1"/&gt;</content><link rel="enclosure" type="video/mp4" href="http://www.blogger.com/video-play.mp4?contentId=87c6c9f12f65e0ab&amp;type=video%2Fmp4" length="0" /><link rel="enclosure" type="video/mp4" href="http://www.blogger.com/video-play.mp4?contentId=fb28169217def6a7&amp;type=video%2Fmp4" length="0" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/3747565447289348263?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/3747565447289348263?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/Km6dF-QLgcs/sniffing-out-pacman.html" title="Sniffing out Pacman" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_XfmDdL0570Q/TF3bzkZyD2I/AAAAAAAAADg/cYnb319b4A4/s72-c/equation.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/08/sniffing-out-pacman.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkIGQHk8fip7ImA9Wx5TGEg.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-4165467843455583453</id><published>2010-07-28T23:54:00.001-07:00</published><updated>2010-08-03T11:02:01.776-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-03T11:02:01.776-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="traffic" /><category scheme="http://www.blogger.com/atom/ns#" term="microsimulation" /><title>Stop the Traffic</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Traffic_flow" title="Traffic Flow"&gt;Traffic flow&lt;/a&gt; is a strange phenomenon.  The complex interaction of drivers results in strange &lt;a href="http://en.wikipedia.org/wiki/Emergence" title="Emergent Behaviour"&gt;emergent behaviour&lt;/a&gt;, such as snarled up traffic even in what seem like ideal driving conditions.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_XfmDdL0570Q/TFEltwdPYYI/AAAAAAAAADY/D_hDmwnWSgM/s1600/traffic-jam.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 267px;" src="http://2.bp.blogspot.com/_XfmDdL0570Q/TFEltwdPYYI/AAAAAAAAADY/D_hDmwnWSgM/s400/traffic-jam.jpg" border="0" alt="" id="BLOGGER_PHOTO_ID_5499218088027971970"&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;A &lt;a href="http://en.wikipedia.org/wiki/Microsimulation"&gt;microsimulation&lt;/a&gt; is a modelling technique that models individual units.  Microsimulations are used for a number of items, including health sciences, pedestian modeling and traffic simulation.  &lt;br /&gt;&lt;br /&gt;My simple traffic microsimulation is based on a simple &lt;a href="http://en.wikipedia.org/wiki/Car-following_model"&gt;car-following model&lt;/a&gt;.  If there's nothing nearby, drivers keep a constant speed.  Seeing a car ahead, drivers have the irresitable urge to catch up with them, and thus put their foot down.  Seeing a car ahead that's a little too close, drivers slam their brakes on.  Overtaking is strictly prohibited!  This is a deliberately simple model, just to see if the behaviour emerges; there's a number of much sophisticated models available such as &lt;a href="http://en.wikipedia.org/wiki/Gipps'_Model"&gt;Gipps'&lt;/a&gt; or &lt;a href="http://en.wikipedia.org/wiki/Intelligent_Driver_Model"&gt;Intelligent Driver Model&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;The simplest model I could think of is a giant round-about.  Cars circle the roundabout as fast as possible, assuming the basic model described above.  This results in a nice &lt;a href="http://www.newscientist.com/article/dn13402-shockwave-traffic-jam-recreated-for-first-time.html"&gt;traffic shockwave&lt;/a&gt; much like the one shown in the video below.&lt;br /&gt;&lt;br /&gt;&lt;object width="480" height="385"&gt;&lt;param name="movie" value="http://www.youtube.com/v/Suugn-p5C1M&amp;amp;hl=en_GB&amp;amp;fs=1"&gt;&lt;/param&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;/param&gt;&lt;param name="allowscriptaccess" value="always"&gt;&lt;/param&gt;&lt;embed src="http://www.youtube.com/v/Suugn-p5C1M&amp;amp;hl=en_GB&amp;amp;fs=1" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="480" height="385"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;br /&gt;&lt;br /&gt;The main data types are shown below and are (hopefully?) self-explanatory.  Rather than store the position of a car directly, I just store the distance to destination.  This makes placing the car and determing the distance between them considerably simpler.  I've used an infinite list of randomness to provide a source of noise to the simulation.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/497383.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;The actual logic of the simulation is equally simple.  All we have to do is reposition the cars based on their speed.  I've made lots of simplifying assumptions.  For example, I'm only considering cars on the same route as being near and I'm not allowing cars to overtake.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/497405.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Putting this together with some OpenGL code gives the following.  &lt;br /&gt;&lt;br /&gt;&lt;object width="320" height="266" class="BLOG_video_class" id="BLOG_video-5bd76d5d8c0d4c61" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0"&gt;&lt;param name="movie" value="http://www.youtube.com/get_player"&gt;
&lt;param name="bgcolor" value="#FFFFFF"&gt;
&lt;param name="allowfullscreen" value="true"&gt;
&lt;param name="flashvars" value="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D5bd76d5d8c0d4c61%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D4460731B1CA0F1D25D4B7F7D2A487B52E21B782F.5F75FDC2857A4995B0CFDEFC7D1FD0F56BC4EEC6%26key%3Dck1&amp;amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D5bd76d5d8c0d4c61%26offsetms%3D5000%26itag%3Dw160%26sigh%3Dr5nroPub-_GEoa8dIpv2nWU6Uu0&amp;amp;autoplay=0&amp;amp;ps=blogger"&gt;
&lt;embed src="http://www.youtube.com/get_player" type="application/x-shockwave-flash"
width="320" height="266" bgcolor="#FFFFFF"
flashvars="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D5bd76d5d8c0d4c61%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D4460731B1CA0F1D25D4B7F7D2A487B52E21B782F.5F75FDC2857A4995B0CFDEFC7D1FD0F56BC4EEC6%26key%3Dck1&amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D5bd76d5d8c0d4c61%26offsetms%3D5000%26itag%3Dw160%26sigh%3Dr5nroPub-_GEoa8dIpv2nWU6Uu0&amp;autoplay=0&amp;ps=blogger"
allowFullScreen="true" /&gt;&lt;/object&gt;
&lt;br /&gt;&lt;br /&gt;The &lt;a href="http://github.com/fffej/haskellprojects/tree/master/traffic/"&gt;full code&lt;/a&gt; for this is on &lt;a href="http://github.com/fffej/"&gt;my GitHub page&lt;/a&gt; and (as always!) any suggestions on how to improve the code are greatly received!  I have a sneaking suspicion I should be using a State monad somewhere in here, but it's not quite clicked how yet!&lt;br /&gt;&lt;br /&gt;(edit 3rd August 2010)&lt;br /&gt;&lt;br /&gt;I updated the code in the gist with the corrections that Edward Kmett kindly provide.  Much better looking results now, though my screen capturing skills still lead a little to be desired.  If you download the code you can now change the speed / number of cars interactively and see what happens.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-4165467843455583453?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/tn57zcKXRCvuG8rBbopDXLZeXC8/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tn57zcKXRCvuG8rBbopDXLZeXC8/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/tn57zcKXRCvuG8rBbopDXLZeXC8/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tn57zcKXRCvuG8rBbopDXLZeXC8/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/WaM3lvEQI-s" height="1" width="1"/&gt;</content><link rel="enclosure" type="video/mp4" href="http://www.blogger.com/video-play.mp4?contentId=5bd76d5d8c0d4c61&amp;type=video%2Fmp4" length="0" /><link rel="enclosure" type="video/mp4" href="http://www.blogger.com/video-play.mp4?contentId=8d6ab837585789a2&amp;type=video%2Fmp4" length="0" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/4165467843455583453?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/4165467843455583453?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/WaM3lvEQI-s/stop-traffic.html" title="Stop the Traffic" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_XfmDdL0570Q/TFEltwdPYYI/AAAAAAAAADY/D_hDmwnWSgM/s72-c/traffic-jam.jpg" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/07/stop-traffic.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE4FQ3w6eSp7ImA9WxFaEUQ.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-7175262494001380041</id><published>2010-07-15T00:42:00.000-07:00</published><updated>2010-07-15T04:21:52.211-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-07-15T04:21:52.211-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="haskell arbitrage parsec dynamic-programming floyd-warshall" /><title>Foreign Exchange Arbitrage</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Arbitrage"&gt;Arbitrage&lt;/a&gt; exploits a price difference between two or more markets to make money with zero risk.&lt;br /&gt;&lt;br /&gt;Sporting bets are perhaps the easiest example.  Given a sporting event with two possible outcomes you look for an opportunity where two book makers give slightly different odds and try to expoit it.  For example, consider a case with two outcomes, say Federer vs. Nadal.  One bookie offers odds of 5/2 on Federer, whereas another offers odds of 3/5 on Nadal.   If I bet £32 on 5/2, and £68 at 3/5 then regardless of the outcome I'm guaranteed to make a little money (a minimum of 8% in this case.  £32 at 5/2 odds gives me $112 and £68 at 3/5 gives me £108).  Wikipedia's page on &lt;a href="http://en.wikipedia.org/wiki/Arbitrage_betting"&gt;arbitrage betting&lt;/a&gt; explains it better than I can!&lt;br /&gt;&lt;br /&gt;Foreign exchange (forex) markets are another opportunity for the arbitrageur.  By finding mispriced currencies you can get money for nothing.  For example I could transfer my money from GBP to USD via CHF and end up with a profit if someone got these exchange rates wrong!  Now all I need to do is find a way of finding these opportunities and I'll be rich!&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_XfmDdL0570Q/TD68N-fN5LI/AAAAAAAAADA/DXbsGX8Agis/s1600/money-pile.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 209px; height: 173px;" src="http://1.bp.blogspot.com/_XfmDdL0570Q/TD68N-fN5LI/AAAAAAAAADA/DXbsGX8Agis/s400/money-pile.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5494035543736837298" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The exchange rates can be represented as a weighted graph with each node representing a currency and each directed edge representing the exchange rate.  To find an arbitrage opportunity we need to find a path through the graph (starting and ending at the same node) where the product of edge weights is greater than 1.0.  The shortest path is the best one, because we want to make as few trades as possible.  The &lt;a href="http://en.wikipedia.org/wiki/Floyd–Warshall_algorithm" title="Floyd Warshall Algorithm"&gt;Floyd Warshall algorithm&lt;/a&gt; is an efficient algorithm for finding the shortest paths in a weighted graph.&lt;br /&gt;&lt;br /&gt;In the graph below we can make a trade 1 -&gt; 2 -&gt; 4 -&gt; 3 -&gt; 1 and make a profit.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/_XfmDdL0570Q/TD68ZwYDNbI/AAAAAAAAADI/b0U9ZBOBg2A/s1600/arbOpp.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 384px;" src="http://3.bp.blogspot.com/_XfmDdL0570Q/TD68ZwYDNbI/AAAAAAAAADI/b0U9ZBOBg2A/s400/arbOpp.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5494035746107110834" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Floyd Warshall is an example of &lt;a href="http://en.wikipedia.org/wiki/Dynamic_programming"&gt;dynamic programming&lt;/a&gt;.   A dynamic programming solution has three components (from &lt;a href="http://www.algorist.com/"&gt;The Algorithm Design Manual&lt;/a&gt;):&lt;br /&gt;&lt;ol&gt;&lt;br /&gt;  &lt;li&gt;A recurrence relation or recursive algorithm for expressing the answer&lt;br /&gt;  &lt;li&gt;Show that the recursive version is bounded by a small polynomial&lt;br /&gt;  &lt;li&gt;An order of evaluation for the reccurence that means you always have partial results available when you need them.&lt;br /&gt;&lt;/ol&gt;&lt;br /&gt;&lt;br /&gt;We can define the arbitrage problem recursively.  First we label the nodes as being number from 1 to N.  The &lt;code&gt;shortestPath(i,j,k)&lt;/code&gt; gives the shortest path from i to j using only nodes 1 to k as intermediate nodes.  The base case is &lt;code&gt;shortestPath(i,j,0)&lt;/code&gt; where the value is simply the edge weight between i and j.  The recursive relation is &lt;code&gt;shortestPath(i,j,k) = min(shortestPath(i,j,k-1), shortestPath(i,k,k-1) + shortestPath(k,j,k-1)&lt;/code&gt;.  The recurrence relation just says that each new node that we consider only helps if there is a shortest path that goes through it.  &lt;br /&gt;&lt;br /&gt;In order to code this up in &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt; we'll need some representation of a graph.  A graph is simply a collection of vertices and some edges.  In order to make things easier I've said that each vertice should be an enum and have an integer representation.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/476601.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;I've needed two language extensions (multi parameter type classes and functional dependencies) which seems overkill!  Functional dependencies is used in &lt;code&gt;Graph a b | a -&gt; b&lt;/code&gt; to state that &lt;code&gt;b&lt;/code&gt; is uniquely defined by &lt;code&gt;a&lt;/code&gt;.  Any suggestions on the simpler solution would be greatly appreciated!&lt;br /&gt;&lt;br /&gt;Dynamic programming solutions can be expressed very neatly in lazy functional programming languages like &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt;.  See &lt;a href="http://www.haskell.org/haskellwiki/Dynamic_programming_example"&gt;here&lt;/a&gt; for more information, but the basic idea is to define a data structure in terms of itself.  If we initialize the data structure in the correct order, then all the previous results are available when they are needed.  This makes for a concise solution to the problem.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/476605.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;The Floyd Warshall algorithm has been extended a little above to find the minimum solution and to provide path reconstruction information.  Once we have the path reconstruction matrix we can work backwards to reconstruct the series of trades that lead to an arbitrage opportunity.  There's only an opportunity if there is a loop from a starting currency that returns with a value greater than 1. &lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/476609.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;So, I've now got the ability to find an arbitrage path if one exists, now all I need to do is hook my computer up to a live feed of exchange rates, put a little money in and I'll be rich by the end of the day.  Just in case it's not quite that easy, I thought I'd better run it on some test data!&lt;br /&gt;&lt;br /&gt;There's a good source of test data at &lt;a href="http://www.gaincapital.com/"&gt;GAIN Capital&lt;/a&gt; that's available &lt;a href="http://ratedata.gaincapital.com/"&gt;here&lt;/a&gt;.  I downloaded January's historial data and after a while ended up with 1.3GB of data to sort though.  The data is in a simple CSV format, with columns representing the ticket, currencies, data time, bid rate and ask rate.  This gave me an opportunity to use &lt;a href="http://www.haskell.org/haskellwiki/Parsec"&gt;Parsec&lt;/a&gt; which is apparently an &lt;i&gt;industrial strength, monadic parser combinator library&lt;/i&gt;.  Thanks to &lt;a href="http://book.realworldhaskell.org/read/using-parsec.html"&gt;RWH Chapter 16&lt;/a&gt; it was pretty simple to use and understand.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/476618.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Now that I've got a way of reading in the historical data in, I simply need to convert this data into a form that the Floyd Warshall algorithm can use and look for paths that give a profit.  I could have done that directly with the parser code, but I might want to reuse that again for some other cunning plan.  Instead I'll convert to a simple map representation, with the keys being pairs of vertices and the values being the exchange rate between those values.  The following code takes a list of foreign exchange records, converts them into exchanges and looks for arbitrage opportunities.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/476625.js?file=gistfile1.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Assuming that I've understood the format properly (anyone know any good books to get this kind of basic information?), then running this code through the 21 million (!) ticker updates in January 2010 yields absolutely ZERO arbitrage opportunities.  With my limited knowledge, I think this is because of the &lt;a href="http://en.wikipedia.org/wiki/Efficient-market_hypothesis" title="Efficient-market hypothesis"&gt;efficient-market hypothesis&lt;/a&gt;.  Prices reflect the information available and the updates are incredibly quick (otherwise currency traders would be going bankrupt on a daily basis!).&lt;br /&gt;&lt;br /&gt;Back to the drawing board with my money making schemes!&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_XfmDdL0570Q/TD69ILqVoUI/AAAAAAAAADQ/vH7sMyF_pGQ/s1600/pennies.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 240px; height: 180px;" src="http://2.bp.blogspot.com/_XfmDdL0570Q/TD69ILqVoUI/AAAAAAAAADQ/vH7sMyF_pGQ/s400/pennies.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5494036543705555266" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;(image taken from &lt;a href="http://www.flickr.com/photos/pcollingridge/201900754/"&gt;Flickr&lt;/a&gt;)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-7175262494001380041?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/wdS5JGKlEw344dZZR5dCIz3wiVo/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/wdS5JGKlEw344dZZR5dCIz3wiVo/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/wdS5JGKlEw344dZZR5dCIz3wiVo/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/wdS5JGKlEw344dZZR5dCIz3wiVo/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/YVYWFzYtUDI" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7175262494001380041?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/7175262494001380041?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/YVYWFzYtUDI/foreign-exchange-arbitrage.html" title="Foreign Exchange Arbitrage" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_XfmDdL0570Q/TD68N-fN5LI/AAAAAAAAADA/DXbsGX8Agis/s72-c/money-pile.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/07/foreign-exchange-arbitrage.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0YMQno-eyp7ImA9WxFVFEg.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-626680465315189757</id><published>2010-06-12T15:46:00.000-07:00</published><updated>2010-06-13T11:53:03.453-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-06-13T11:53:03.453-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="newton" /><category scheme="http://www.blogger.com/atom/ns#" term="orbit simulator" /><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="opengl" /><category scheme="http://www.blogger.com/atom/ns#" term="quickcheck" /><title>Orbit Simulator in Haskell</title><content type="html">&lt;a href="http://www.objectmentor.com/omTeam/martin_r.html"&gt;Uncle Bob&lt;/a&gt; recently wrote an article showing a &lt;a href="http://blog.objectmentor.com/articles/2010/06/02/orbit-in-clojure"&gt;simple orbital simulator in Clojure&lt;/a&gt;.  Aside from the strange formatting, the code is pretty easy to follow, though there is large amounts of duplicate code (see &lt;a href="http://github.com/unclebob/clojureOrbit/blob/master/src/physics/vector.clj"&gt;&lt;code&gt;vector.clj&lt;/code&gt;&lt;/a&gt; vs. &lt;a href="http://github.com/unclebob/clojureOrbit/blob/master/src/physics/position.clj"&gt;&lt;code&gt;position.clj&lt;/code&gt;&lt;/a&gt;).  I guess the reasoning behind this is to try to stop mixing up units?&lt;br /&gt;&lt;br /&gt;A &lt;a href="http://www.haskell.org/haskellwiki/Phantom_type"&gt;phantom type&lt;/a&gt; is one way to avoid this duplication in &lt;a href="http://www.haskell.org/"&gt;Haskell&lt;/a&gt;.  A phantom type is only ever used to construct types.  Its sole purpose is for validation by the type checker.  The example below uses phantom types to encode position, velocity and force.    This means we have no code repetition for each of the vector operations.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/436159.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;When we do the physical calculations to move the objects around the phantom types do their job.  As an example, if I try to add two unrelated vectors together I get a compile time error showing that I can't.  Here's an example from a ghci session.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;  Prelude Orbit&amp;gt; let a = Vec 0 0 :: Vec Force&lt;br /&gt;  Prelude Orbit&amp;gt; let b = Vec 1 1 :: Vec Position&lt;br /&gt;  Prelude Orbit&amp;gt; add a b&lt;br /&gt;&lt;br /&gt;  &lt;interactive&gt;:1:6:&lt;br /&gt;    Couldn't match expected type `Force'&lt;br /&gt;           against inferred type `Position'&lt;br /&gt;      Expected type: Vec Force&lt;br /&gt;      Inferred type: Vec Position&lt;br /&gt;    In the second argument of `add', namely `b'&lt;br /&gt;    In the expression: add a b&lt;br /&gt;&lt;/interactive&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;The code to do the physical calculations is very simple.  The record update syntax is new to me and means you can just change the selected fields in an object, rather than creating a new one and copying across all the fields (see the section on &lt;a href="http://en.wikibooks.org/wiki/Haskell/More_on_datatypes#Named_Fields_.28Record_Syntax.29"&gt;named fields&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;I think &lt;code&gt;collideAll&lt;/code&gt; looks substantially nicer than the Clojure implementation.  From the comments on the blog, I suspect it still suffers from a period of inaccuracy if three objects collide simultaneously (they won't get merged in the first round, but almost certainly will in the second).  &lt;a href="http://www.haskell.org/ghc/docs/6.12.1/html/libraries/base/Data-List.html#v%3AnubBy"&gt;&lt;code&gt;nubBy&lt;/code&gt;&lt;/a&gt; is an incredibly useful function I hadn't seen before and it allows you to remove duplicates according to your own definition of equality.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/436165.js?file=physicsLogic.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;The &lt;a href="http://www.clojure.org/"&gt;Clojure&lt;/a&gt; version has a bunch of unit tests (as you might expect given the author!).  The tests are in the &lt;code&gt;1 + 1 = 2&lt;/code&gt; style by which I mean give some inputs and validate an output.  These are the sort of tests you might capture as part of a REPL transcript, but they give you very little assurance that the code is actually correct.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/QuickCheck"&gt;QuickCheck&lt;/a&gt; gives an incredibly powerful way of testing applications by making assertions about the code and challenging QuickCheck to falsify them with generated data.  In the case of this simple physics model, we can make assertions like &lt;a href="http://en.wikipedia.org/wiki/Conservation_of_energy"&gt;the total amount of energy in the system must be conserved&lt;/a&gt; and let QuickCheck worry about generating the cases where this doesn't hold.&lt;br /&gt;&lt;br /&gt;In order to do this we first have to give QuickCheck a way of generator random objects to simulate.  The &lt;a href="http://hackage.haskell.org/packages/archive/QuickCheck/2.1.0.3/doc/html/Test-QuickCheck-Arbitrary.html"&gt;&lt;code&gt;Arbitrary&lt;/code&gt;&lt;/a&gt; type class defines &lt;code&gt;generate&lt;/code&gt;, so we just need to make &lt;code&gt;Object&lt;/code&gt; an instance of this type class and implement the method.&lt;br /&gt;&lt;br /&gt;A few simple quick check properties are shown below.  This asserts that for any given vector (apart from one with zero length), the magnitude is 1.0 (subject to a rounding error or two) and that for any given set of objects, the &lt;a href="http://en.wikipedia.org/wiki/Kinetic_energy"&gt;kinetic energy&lt;/a&gt; is conserved.  This is a very simple example of how powerful QuickCheck can be.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/436851.js?file=arbitraryObject.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Running Quick Check against this property shows that it holds for a large number of automatically generated test cases.  This gives a strong hint that this code actually works.&lt;br /&gt;&lt;br /&gt;The final task is to visualize this data.  I used OpenGL again and knocked up a quick UI, the source code for which is &lt;a href="http://github.com/fffej/haskellprojects/blob/master/newton/Main.hs"&gt;here&lt;/a&gt;.  The (terrible) video below gives an example of it in action.  If anyone knows of a better way of capturing OpenGL I'd love to know.  I'm currently using &lt;a href="http://live.gnome.org/Istanbul"&gt;Istanbul&lt;/a&gt; which works, but brings my system to its knees.  &lt;br /&gt;&lt;br /&gt;&lt;object width="320" height="266" class="BLOG_video_class" id="BLOG_video-26beb2d376da5681" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0"&gt;&lt;param name="movie" value="http://www.youtube.com/get_player"&gt;
&lt;param name="bgcolor" value="#FFFFFF"&gt;
&lt;param name="allowfullscreen" value="true"&gt;
&lt;param name="flashvars" value="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D26beb2d376da5681%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D45B0380714BEA317D0636954516EF9178BC47616.70F87242634CF5A3C4C0DEC5C30EC264209025F6%26key%3Dck1&amp;amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D26beb2d376da5681%26offsetms%3D5000%26itag%3Dw160%26sigh%3DHgAu0gVK3ICkkTPq4EeynJw2g4c&amp;amp;autoplay=0&amp;amp;ps=blogger"&gt;
&lt;embed src="http://www.youtube.com/get_player" type="application/x-shockwave-flash"
width="320" height="266" bgcolor="#FFFFFF"
flashvars="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D26beb2d376da5681%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D45B0380714BEA317D0636954516EF9178BC47616.70F87242634CF5A3C4C0DEC5C30EC264209025F6%26key%3Dck1&amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D26beb2d376da5681%26offsetms%3D5000%26itag%3Dw160%26sigh%3DHgAu0gVK3ICkkTPq4EeynJw2g4c&amp;autoplay=0&amp;ps=blogger"
allowFullScreen="true" /&gt;&lt;/object&gt;
&lt;br /&gt;&lt;br /&gt;The &lt;a href="http://github.com/fffej/haskellprojects/tree/master/newton/"&gt;complete source&lt;/a&gt; for this is available on &lt;a href="http://github.com/fffej/"&gt;my git hub page&lt;/a&gt;.  Any comments or suggested improvements greatly appreciated!&lt;br /&gt;&lt;br /&gt;Finally, the company I work for is currently recruiting in Cambridge, UK.  If you're the sort of person who is looking for challenges such as analysing petabytes of data, writing high performance servers , or developing the richest pure JavaScript interfaces imaginable  (and a whole lot more in between) then drop me a note at jeff.foster AT acm.org and I'll give you more information.  Oh, and if you're accepted you'll get your choice of an &lt;a href="http://www.apple.com/ipad/"&gt;iPad&lt;/a&gt; or the latest Android phone.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-626680465315189757?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/Hk8AdWU243YkSKUrcIoi5XSOdNg/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Hk8AdWU243YkSKUrcIoi5XSOdNg/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/Hk8AdWU243YkSKUrcIoi5XSOdNg/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Hk8AdWU243YkSKUrcIoi5XSOdNg/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/m1FXbpz3U68" height="1" width="1"/&gt;</content><link rel="enclosure" type="video/mp4" href="http://www.blogger.com/video-play.mp4?contentId=26beb2d376da5681&amp;type=video%2Fmp4" length="0" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/626680465315189757?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/626680465315189757?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/m1FXbpz3U68/orbit-simulator-in-haskell.html" title="Orbit Simulator in Haskell" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><feedburner:origLink>http://www.fatvat.co.uk/2010/06/orbit-simulator-in-haskell.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkMBRXk7eSp7ImA9WxFWFUk.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-4444846705434710504</id><published>2010-06-02T23:19:00.000-07:00</published><updated>2010-06-03T00:00:54.701-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-06-03T00:00:54.701-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="monte-carlo" /><category scheme="http://www.blogger.com/atom/ns#" term="world cup" /><title>Monte Carlo Methods and the World Cup</title><content type="html">A &lt;a href="http://en.wikipedia.org/wiki/Monte_Carlo_method"&gt;Monte Carlo method&lt;/a&gt; is a method of solving problems by statistical sampling.  A typical approach to solving a Monte-Carlo problem is (according to Wikipedia):&lt;br /&gt;&lt;ol&gt;&lt;br /&gt;&lt;li&gt;Define a domain of possible inputs.&lt;br /&gt;&lt;li&gt;Generate inputs randomly from the domain using a certain specified probability distribution.&lt;br /&gt;&lt;li&gt;Perform a deterministic computation using the inputs.&lt;br /&gt;&lt;li&gt;Aggregate the results of the individual computations into the final result.&lt;br /&gt;&lt;/ol&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_XfmDdL0570Q/TAdKCidpXRI/AAAAAAAAACw/q-JjJQ81Dho/s1600/Dice.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 283px; height: 212px;" src="http://1.bp.blogspot.com/_XfmDdL0570Q/TAdKCidpXRI/AAAAAAAAACw/q-JjJQ81Dho/s400/Dice.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5478428879190842642" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;One of the simplest example of a Monte-Carlo method is estimating pi.  The approach below hurls darts at a 1x1 square.  Some of these land within the unit circle, and some outside.  By getting the ratio between the two we can estimate the value of pi.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/420631.js?file=estimatepi.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;The accuracy of calculating pi depends on the number of samples taken.  As you can see below it's not until very large amounts of samples are taken that the number even begins to approach pi.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;    10         3.2&lt;br /&gt;    100        3.08&lt;br /&gt;    1000       3.204&lt;br /&gt;    10000      3.142&lt;br /&gt;    100000     3.14072&lt;br /&gt;    1000000    3.143136&lt;br /&gt;    10000000   3.1407732&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;We can also apply the Monte Carlo method to sporting events.  The &lt;a href="http://www.fifa.com/worldcup/"&gt;2010 World Cup&lt;/a&gt; is almost upon us.  The current &lt;a href="http://www.fifa.com/worldfootball/ranking/lastranking/gender=m/fullranking.html"&gt;Fifa World Rankings&lt;/a&gt; make Brazil or Spain the overwhelming favourities.  The &lt;a href="http://www.fifa.com/mm/document/tournament/competition/64/42/24/2010fwc_matchschedule_3004_en.pdf" title="World Cup Schedule [PDF]"&gt;draw&lt;/a&gt; (PDF) has an element of randomness though, so it's possible that two strong teams could find themselves meeting earlier and thus one of them going home.  A Monte Carlo simulation sounds like the ideal way of settling this.  Or at least, it's going to be just as wildly inaccurate as the pundits predictions!&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_XfmDdL0570Q/TAdKt9c1t0I/AAAAAAAAAC4/MFH15YTDfE8/s1600/World-Cup-trophy-2_6.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 173px; height: 290px;" src="http://2.bp.blogspot.com/_XfmDdL0570Q/TAdKt9c1t0I/AAAAAAAAAC4/MFH15YTDfE8/s400/World-Cup-trophy-2_6.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5478429625169590082" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The World cup consists of two stages.  The group stage, where four teams play each other and the top two advance and the knockout stage in which the remaining teams play until the final is reached.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/423548.js?file=gistfile1.hsc"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;I've defined a class to represent the model that will be used to simulate the matches.  For this example, I've simply based things on the &lt;a href="http://www.fifa.com/worldfootball/ranking/lastranking/gender=m/fullranking.html"&gt;Fifa world Rankings&lt;/a&gt;, but there are much &lt;a href="http://en.wikipedia.org/wiki/Statistical_Soccer_(Football)_Predictions"&gt;more sophisticated techniques&lt;/a&gt; that could be used (whether these are demonstrably any more accurate or not, I'm not sure!).&lt;br /&gt;&lt;br /&gt;The basic implementation of this type class is incredibly simple.  It uses the world rankings and simply compares these to get a result.  In the event of a draw, the home team is assumed to win!  This could obviously be substantially "improved".&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/423553.js?file=gistfile1.hsc"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;To simulate randomness, I've assumed that each time can play up to 30% better or worse than their ranking would suggest.  This means that Brazil will always beat New Zealand, whereas teams that are closer (England) may have some chance!&lt;br /&gt;&lt;br /&gt;The code below simulates the world cup - hopefully it makes sense as I've tried to be as self-documenting as possible!  &lt;code&gt;rules&lt;/code&gt; gives the knockout stage structure - it keeps getting "folded in half" as teams play each other and hopefully accurately represents the real fixtures of the world cup.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/423563.js?file=gistfile1.hsc"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;So who's going to win the World Cup according to the simulation?  I ran for 100K simulations and got the following results:&lt;br /&gt;&lt;br /&gt;&lt;li&gt;France (124)&lt;br /&gt;&lt;li&gt;Argentina (365)&lt;br /&gt;&lt;li&gt;Greece (7)&lt;br /&gt;&lt;li&gt;England (239)&lt;br /&gt;&lt;li&gt;USA (4)&lt;br /&gt;&lt;li&gt;Germany (577)&lt;br /&gt;&lt;li&gt;Serbia (1)&lt;br /&gt;&lt;li&gt;Netherlands (3706)&lt;br /&gt;&lt;li&gt;Italy (2240)&lt;br /&gt;&lt;li&gt;Brazil (48025)&lt;br /&gt;&lt;li&gt;Portugal (5249)&lt;br /&gt;&lt;li&gt;Spain (39460)&lt;br /&gt;&lt;li&gt;Chile (3)&lt;br /&gt;&lt;br /&gt;Unsurprisingly, this still has Brazil coming up winners almost half the time.  The number of times each time wins is mostly related to the ranking (as you'd suspect).  Changing the simulation slightly so that teams draw in the group stage if they have a rating within 25 points changes the results a little, but they are still broadly in line with the above.&lt;br /&gt;&lt;br /&gt;Still, doesn't matter what the simulation says!  England will still march-forward past the group stage and then lose in "unlucky" circumstances (&lt;a href="http://news.bbc.co.uk/sport1/hi/football/world_cup_2006/4991618.stm"&gt;penalties&lt;/a&gt;, &lt;a href="http://news.bbc.co.uk/sport3/worldcup2002/hi/matches_wallchart/england_v_brazil/newsid_2049000/2049924.stm"&gt;&lt;i&gt;that&lt;/i&gt; goal&lt;/a&gt;, &lt;a href="http://web.ukonline.co.uk/ic.ic/worldcup98a.html"&gt;penalties again&lt;/a&gt;).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-4444846705434710504?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/RS-65S1ybBwa82DoQZgQTOzNmo0/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/RS-65S1ybBwa82DoQZgQTOzNmo0/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/RS-65S1ybBwa82DoQZgQTOzNmo0/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/RS-65S1ybBwa82DoQZgQTOzNmo0/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/uIqjBXz_Kj8" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/4444846705434710504?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/4444846705434710504?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/uIqjBXz_Kj8/monte-carlo-methods-and-world-cup.html" title="Monte Carlo Methods and the World Cup" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_XfmDdL0570Q/TAdKCidpXRI/AAAAAAAAACw/q-JjJQ81Dho/s72-c/Dice.jpg" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/06/monte-carlo-methods-and-world-cup.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A04DR3s8cCp7ImA9Wx5TGUU.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-817396911895421106</id><published>2010-05-10T23:31:00.000-07:00</published><updated>2010-08-04T23:32:56.578-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-04T23:32:56.578-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="functional programming" /><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="fluid dynamics" /><category scheme="http://www.blogger.com/atom/ns#" term="opengl" /><title>Barely Functional Fluid Dynamics (2)</title><content type="html">Continuing on from last times adventures on &lt;a href="http://www.fatvat.co.uk/2010/05/barely-function-fluid-dynamics.html"&gt;Barely Functional Fluid Dynamics&lt;/a&gt;, it's time to start actually visualizing the results.  All of the OpenGL is heavily based on the original C code by &lt;a href="http://www.dgp.toronto.edu/~stam/"&gt;Jos Stam&lt;/a&gt; available &lt;a href="http://www.dgp.toronto.edu/people/stam/reality/Research/zip/CDROM_GDC03.zip"&gt;here&lt;/a&gt; (.zip).&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.opengl.org/resources/libraries/glut/"&gt;GLUT&lt;/a&gt; is the simplest way to get started with OpenGL.  It's a window system toolkit designed specifically for writing OpenGL programs.  The &lt;a href="http://hackage.haskell.org/platform/"&gt;Haskell Platform&lt;/a&gt; comes complete with a set of Haskell bindings to GLUT.  The documentation doesn't give a whole lot of help regarding how to get started, but thankfully there's a huge bundle of examples within the Cabal package that help you get started.&lt;br /&gt;&lt;br /&gt;To draw the &lt;a href="http://www.fatvat.co.uk/2010/05/barely-function-fluid-dynamics.html"&gt;fluid dynamics&lt;/a&gt; results, I needed to have some representation of the various state.  &lt;code&gt;State&lt;/code&gt; encapsulates the various bits necessary to draw the fluid dynamics simulation.  This includes the data (density and velocity) together with state about the mouse position and so on.  This is stored in an &lt;a href="http://cvs.haskell.org/Hugs/pages/libraries/base/Data-IORef.html"&gt;&lt;code&gt;IORef&lt;/code&gt;&lt;/a&gt; that provides a mutable reference within the IO monad.  IORef seems very simliar to Clojure's references, with similar options for changing the state inside the reference via a supplied function.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/393784.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;OpenGL supports several primitive types, including lines, points, quads and triangles.  Each of these primitives is specified using a sequence of vertices.  For example, lines are specified as a series of pairs, and a point is drawn between each pair.  Similarly quads are specified using groups of four points.  For drawing the fluid we use the GL_QUAD to specify a series of squares.  The colour of each vertex is the density at that particular point.  In order to make it look pretty the color is based on not just the density but the x and y co-ordinates.  This gives green in the top-left corner down to purple in the bottom right.  It's not based on any clever principles, I just thought it looked nice!&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_XfmDdL0570Q/S-j5_Fx3OVI/AAAAAAAAACo/bcx1TmaI46U/s1600/FunkyColors.png"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 378px; height: 400px;" src="http://2.bp.blogspot.com/_XfmDdL0570Q/S-j5_Fx3OVI/AAAAAAAAACo/bcx1TmaI46U/s400/FunkyColors.png" border="0" alt="Example colours that look nice!"id="BLOGGER_PHOTO_ID_5469896609719859538" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;To draw primitive types the &lt;a href="http://hackage.haskell.org/package/GLUT"&gt;Haskell GLUT library&lt;/a&gt; exposes a function called &lt;code&gt;renderPrimitive&lt;/code&gt; which takes the PrimitiveMode (e.g. Lines / Quads / Polygon) and an IO action.  The IO action represents the drawing of the list of points, so all we need to do is loop through each point and construct an IO action to choose a color and create the vertex.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/396983.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Similarly, to draw the velocity field we just need to draw a series of lines.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/396987.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Putting this all together with a few event handlers and timers gives a little OpenGL application (compile with &lt;code&gt;ghc -O2 --make -lglut Main.hs MFluid.hs&lt;/code&gt; - I appear to need to specify -lglut manually on my system at least?).&lt;br /&gt;&lt;br /&gt;&lt;object width="320" height="266" class="BLOG_video_class" id="BLOG_video-77aed8fe66b5895d" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0"&gt;&lt;param name="movie" value="http://www.youtube.com/get_player"&gt;
&lt;param name="bgcolor" value="#FFFFFF"&gt;
&lt;param name="allowfullscreen" value="true"&gt;
&lt;param name="flashvars" value="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D77aed8fe66b5895d%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D1903760478C7AA4662C85D54B4BD037C3CAFD5F.834556C5DE86DFA562313BB40A8F487585D1E1D1%26key%3Dck1&amp;amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D77aed8fe66b5895d%26offsetms%3D5000%26itag%3Dw160%26sigh%3Dx5xqQFZCDp3-e8G-WxvyNRbl9gw&amp;amp;autoplay=0&amp;amp;ps=blogger"&gt;
&lt;embed src="http://www.youtube.com/get_player" type="application/x-shockwave-flash"
width="320" height="266" bgcolor="#FFFFFF"
flashvars="flvurl=http://redirector.googlevideo.com/videoplayback?id%3D77aed8fe66b5895d%26itag%3D5%26app%3Dblogger%26cmo%3Dsensitive_content%253Dyes%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1340583044%26sparams%3Did,itag,ip,ipbits,expire%26signature%3D1903760478C7AA4662C85D54B4BD037C3CAFD5F.834556C5DE86DFA562313BB40A8F487585D1E1D1%26key%3Dck1&amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3D77aed8fe66b5895d%26offsetms%3D5000%26itag%3Dw160%26sigh%3Dx5xqQFZCDp3-e8G-WxvyNRbl9gw&amp;autoplay=0&amp;ps=blogger"
allowFullScreen="true" /&gt;&lt;/object&gt;
&lt;br /&gt;&lt;br /&gt;I was quite suprised about how easy it was to get something together in Haskell to do this.  Reading the web, you'd guess that it would require writing &lt;a href="http://byorgey.wordpress.com/2009/01/12/abstraction-intuition-and-the-monad-tutorial-fallacy/"&gt;a monad tutorial&lt;/a&gt; and getting a deep understanding of category theory.  In reality, you just download the &lt;a href="http://hackage.haskell.org/platform/"&gt;Haskell platform&lt;/a&gt; and get going.  There's no need to understand all the details about what's going on under the hood straight away (though no doubt it helps!).   &lt;br /&gt;&lt;br /&gt;The full code is available &lt;a href="http://github.com/fffej/haskellprojects/tree/master/fluidDynamics/" title="My GitHub Repository"&gt;here&lt;/a&gt;.  Any suggestions for improvements greatly appreciated!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-817396911895421106?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/hQGRPbPUwlVDoQTe_5sOhMvBZZk/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/hQGRPbPUwlVDoQTe_5sOhMvBZZk/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/hQGRPbPUwlVDoQTe_5sOhMvBZZk/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/hQGRPbPUwlVDoQTe_5sOhMvBZZk/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/R4zh8n4A3nA" height="1" width="1"/&gt;</content><link rel="enclosure" type="video/mp4" href="http://www.blogger.com/video-play.mp4?contentId=77aed8fe66b5895d&amp;type=video%2Fmp4" length="0" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/817396911895421106?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/817396911895421106?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/R4zh8n4A3nA/barely-function-fluid-dynamics-2.html" title="Barely Functional Fluid Dynamics (2)" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_XfmDdL0570Q/S-j5_Fx3OVI/AAAAAAAAACo/bcx1TmaI46U/s72-c/FunkyColors.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/05/barely-function-fluid-dynamics-2.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A04MRnc6cCp7ImA9Wx5TGUU.&quot;"><id>tag:blogger.com,1999:blog-5743983044224833668.post-27727853939527559</id><published>2010-05-05T00:03:00.000-07:00</published><updated>2010-08-04T23:33:07.918-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-04T23:33:07.918-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="haskell" /><category scheme="http://www.blogger.com/atom/ns#" term="fluid dynamics" /><category scheme="http://www.blogger.com/atom/ns#" term="criterion" /><category scheme="http://www.blogger.com/atom/ns#" term="opengl" /><title>Barely Functional Fluid Dynamics</title><content type="html">&lt;a href="http://www.bestinclass.dk/index.php/2010/03/functional-fluid-dynamics-in-clojure/"&gt;Functional Fluid Dynamics in Clojure&lt;/a&gt; is an implementation of a fluid dynamics simulator based on a paper by Jos Stam, &lt;a href="http://www.dgp.toronto.edu/people/stam/reality/Research/pdf/GDC03.pdf"&gt;Real Time Fluid Dynamics for Games&lt;/a&gt;.   Demonstration code is available in &lt;a href="http://www.dgp.toronto.edu/people/stam/reality/Research/zip/CDROM_GDC03.zip"&gt;OpenGL / C&lt;/a&gt; (on a side note, more papers should do this as it makes research easier to extend and far more approachable).  I thought it would be fun to reimplement this in &lt;a href="http://www.haskell.org"&gt;Haskell&lt;/a&gt; as this would give me a chance to refresh my knowledge of &lt;a href="http://www.opengl.org"&gt;OpenGL&lt;/a&gt; (last seen many years ago in &lt;a href="http://www.ecs.soton.ac.uk/admissions/ug/syllabus.php?unit=COMP3004"&gt;Principles of Computer Graphics&lt;/a&gt;.)&lt;br /&gt;&lt;br /&gt;The paper describes a simplified model for fluid dynamics based on the &lt;a href="http://en.wikipedia.org/wiki/Navier–Stokes_equations"&gt;Navier-Stokes equations&lt;/a&gt;.  The fluid is modelled as points in a 2D plane where each point has a density and a velocity.  Given an initial density and velocity, the equations calculate a delta to the next density and velocity.  The C implementation uses one dimensional arrays to store the points.  I decided to try and model the C code as closly as possible in Haskell (and then hopefully contrast it with a purely functional solution).  The main data structure is a &lt;code&gt;Grid&lt;/code&gt;, instances of which are used to store the density and velocity components.  The grid consists of a &lt;a href="http://hackage.haskell.org/package/vector"&gt;vector&lt;/a&gt; and a size.  Just like the C implementation, there's an area of padding around the outside to make calculating the edge cases simpler.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/390438.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Most of the operations involve simply looping over the pixels and doing some calculations.  The use of mutable vectors means almost every function is used only for its side-effects.  This is really ugly, and I assume that a better Haskell programmer would separate the two more!  The following functions remove a bit of boiler-plate from the code.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/390442.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Diffusion models the spread of fluid between each point.  Each point will lose density to its neighbours, but also increase by getting density from its own neighbours.  For any given cell the net difference will be the sum of fluid leaving via the neighbours minus the amount coming in which leads to the following equation.  An iterative technique, &lt;a href="http://en.wikipedia.org/wiki/Gauss–Seidel_method"&gt;Gauss-Seidel relaxation&lt;/a&gt;, is used to solve the corresponding equations.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/390444.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Advection models the changes due to the fluid's bulk motion.  Haskell wise it follows a similar pattern as before (loop over each pixel, then write).  Projection models the change in velocity.&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/390445.js"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Before this is plugging into a UI, we should profile it to find out how fast it is.  I used the same board size as the &lt;a href="http://www.bestinclass.dk/index.php/2010/03/functional-fluid-dynamics-in-clojure/"&gt;Clojure example&lt;/a&gt;.  Any comparison between the two is completely meaningless.  There's different CPU's, different methods (see Clojure's &lt;a href="http://github.com/LauJensen/Fluid-Dynamics/blob/master/fluids.clj#L112"&gt;diffusion&lt;/a&gt; which only uses a single iteration of the relaxation) and a completely different platform. (that should be enough of a disclaimer!).  I used the awesome benchmarking framework &lt;a href="http://hackage.haskell.org/package/criterion"&gt;Criterion&lt;/a&gt; and ended up with:&lt;br /&gt;&lt;br /&gt;&lt;script src="http://gist.github.com/390463.hs"&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;Running after compilation gives pretty good performance.  The "set bounds" function (which isn't shown above) takes about 12 micro-seconds per iteration.  Projection is a little slower taking 10 ms (this is actually slower than the Clojure implementation; the reason is that I'm doing 20 iterations, just like the C code - if I switch to a single iteration it takes 1.5 ms).  Even with 20 iterations, it should still be possible to get a frame rate of around 100 / second.  Now all that remains is plugging it into OpenGL and getting something like this:&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_XfmDdL0570Q/S-EY-p98S9I/AAAAAAAAACg/h5q3lIHIVdQ/s1600/fluidDynamics.png"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 378px; height: 400px;" src="http://2.bp.blogspot.com/_XfmDdL0570Q/S-EY-p98S9I/AAAAAAAAACg/h5q3lIHIVdQ/s400/fluidDynamics.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5467678887301106642" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;(it looks much better when its moving honest!  At the moment it looks like some kind of crazy &lt;a href="http://en.wikipedia.org/wiki/Echocardiography"&gt;echocardiogram&lt;/a&gt;)&lt;br /&gt;&lt;br /&gt;The full code is available on &lt;a href="http://github.com/fffej/haskellprojects/tree/master/fluidDynamics/"&gt;my git hub repository&lt;/a&gt;.   As always any suggestions on how I can improve this code much appreciated!&lt;br /&gt;&lt;br /&gt;(update, OpenGL visualization code in &lt;a href="http://www.fatvat.co.uk/2010/05/barely-function-fluid-dynamics-2.html"&gt;next entry&lt;/a&gt;.)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/5743983044224833668-27727853939527559?l=www.fatvat.co.uk' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/8vmlPqiMeN17LndCOYFdzVgv5Mg/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/8vmlPqiMeN17LndCOYFdzVgv5Mg/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/8vmlPqiMeN17LndCOYFdzVgv5Mg/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/8vmlPqiMeN17LndCOYFdzVgv5Mg/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/fatvat/~4/EVpFJlVObjk" height="1" width="1"/&gt;</content><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/27727853939527559?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/5743983044224833668/posts/default/27727853939527559?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/fatvat/~3/EVpFJlVObjk/barely-function-fluid-dynamics.html" title="Barely Functional Fluid Dynamics" /><author><name>Jeff Foster</name><uri>http://www.blogger.com/profile/08195722595923882332</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_XfmDdL0570Q/S-EY-p98S9I/AAAAAAAAACg/h5q3lIHIVdQ/s72-c/fluidDynamics.png" height="72" width="72" /><feedburner:origLink>http://www.fatvat.co.uk/2010/05/barely-function-fluid-dynamics.html</feedburner:origLink></entry></feed>

