<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:posterous="http://posterous.com/help/rss/1.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">
  <channel>
    <title>Null Disquisition</title>
    <link>http://mumrah.net</link>
    <description>Lots of talk about nothing</description>
    <generator>posterous.com</generator>
    <link xmlns="http://www.w3.org/2005/Atom" href="http://posterous.com/api/sup_update#9a656d685" type="application/json" rel="http://api.friendfeed.com/2008/03#sup" />
    
    
    <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://feeds.feedburner.com/MumrahsPosterous" /><feedburner:info uri="mumrahsposterous" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://posterous.superfeedr.com/" /><feedburner:browserFriendly></feedburner:browserFriendly><item>
      <pubDate>Mon, 20 Dec 2010 11:30:00 -0800</pubDate>
      <title>Enable WebGL in ChromeOS</title>
      <link>http://mumrah.net/enable-webgl-in-chromeos</link>
      <guid>http://mumrah.net/enable-webgl-in-chromeos</guid>
      <description>
        <![CDATA[<p>
	<p>Once you've enabled&nbsp;<a href="http://www.chromium.org/chromium-os/developer-information-for-chrome-os-devices/cr-48-chrome-notebook-developer-information#TOC-Entering-Developer-Mode" title="Developer Mode" target="_blank">Developer Mode</a>, you can startup a Chrome instance with WebGL enabled. Run "shell" from within crosh (ctl+alt+t) and then run:</p>
<p><code>/opt/google/chrome/chrome --enable-webgl</code></p>
<p>Then try loading up a WebGL demo: <a href="http://cooliris-wall.appspot.com" title="Cooliris">Cooliris Wall</a></p>
<p>YMMV</p>
	
</p>

<p><a href="http://mumrah.net/enable-webgl-in-chromeos">Permalink</a> 

	| <a href="http://mumrah.net/enable-webgl-in-chromeos#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Fri, 17 Dec 2010 14:25:45 -0800</pubDate>
      <title>Cr-48</title>
      <link>http://mumrah.net/cr-48</link>
      <guid>http://mumrah.net/cr-48</guid>
      <description>
        <![CDATA[<p>
	<p><div class='p_embed p_image_embed'>
<a href="http://posterous.com/getfile/files.posterous.com/mumrah/mvlhlwrfBzEgjzvdsHkBjCAfEnxFnwbGseafraHjuiDxxcBcbDpufgeCnrhz/p76.jpg.scaled1000.jpg"><img alt="P76" height="667" src="http://posterous.com/getfile/files.posterous.com/mumrah/mvlhlwrfBzEgjzvdsHkBjCAfEnxFnwbGseafraHjuiDxxcBcbDpufgeCnrhz/p76.jpg.scaled500.jpg" width="500" /></a>
<a href="http://posterous.com/getfile/files.posterous.com/mumrah/zbmwyAiqFdatbGzlxnAweHzbbJxpFtFsakdctHHBmjbehHegEJacxHgCvkpl/p78.jpg.scaled1000.jpg"><img alt="P78" height="375" src="http://posterous.com/getfile/files.posterous.com/mumrah/zbmwyAiqFdatbGzlxnAweHzbbJxpFtFsakdctHHBmjbehHegEJacxHgCvkpl/p78.jpg.scaled500.jpg" width="500" /></a>
<div class='p_see_full_gallery'><a href="http://mumrah.net/cr-48">See the full gallery on Posterous</a></div>
</div>
</p>X-mas came early ^_^
	
</p>

<p><a href="http://mumrah.net/cr-48">Permalink</a> 

	| <a href="http://mumrah.net/cr-48#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
      <media:content type="image/jpeg" height="1000" width="750" url="http://getfile6.posterous.com/getfile/files.posterous.com/mumrah/mvlhlwrfBzEgjzvdsHkBjCAfEnxFnwbGseafraHjuiDxxcBcbDpufgeCnrhz/p76.jpg">
        <media:thumbnail height="667" width="500" url="http://getfile2.posterous.com/getfile/files.posterous.com/mumrah/mvlhlwrfBzEgjzvdsHkBjCAfEnxFnwbGseafraHjuiDxxcBcbDpufgeCnrhz/p76.jpg.scaled500.jpg" />
      </media:content>
      <media:content type="image/jpeg" height="750" width="1000" url="http://getfile0.posterous.com/getfile/files.posterous.com/mumrah/zbmwyAiqFdatbGzlxnAweHzbbJxpFtFsakdctHHBmjbehHegEJacxHgCvkpl/p78.jpg">
        <media:thumbnail height="375" width="500" url="http://getfile8.posterous.com/getfile/files.posterous.com/mumrah/zbmwyAiqFdatbGzlxnAweHzbbJxpFtFsakdctHHBmjbehHegEJacxHgCvkpl/p78.jpg.scaled500.jpg" />
      </media:content>
    </item>
    <item>
      <pubDate>Sat, 04 Dec 2010 11:22:05 -0800</pubDate>
      <title>First Snow</title>
      <link>http://mumrah.net/first-snow</link>
      <guid>http://mumrah.net/first-snow</guid>
      <description>
        <![CDATA[<p>
	<p><div class='p_embed p_video_embed'>
<a href="http://mumrah.net/first-snow"><img alt="" src="http://posterous.com/getfile/video.posterous.com/mumrah/kaHAnbxlDffDgmFvsHtuwpqEjvmlsBCJnJwFbxujCGnqEdBiyFypIAFbiidH/frame_0000.png" /></a>
<div class='p_embed_description'>
<strong>p69.mov</strong>
<a href="http://mumrah.net/first-snow">Watch on Posterous</a>
</div>
</div>
</p>Freaking love this place
	
</p>

<p><a href="http://mumrah.net/first-snow">Permalink</a> 

	| <a href="http://mumrah.net/first-snow#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
      <media:content type="video/quicktime" fileSize="4858" url="http://getfile0.posterous.com/getfile/files.posterous.com/mumrah/kaHAnbxlDffDgmFvsHtuwpqEjvmlsBCJnJwFbxujCGnqEdBiyFypIAFbiidH/p69.mov" />
    </item>
    <item>
      <pubDate>Thu, 18 Nov 2010 15:12:22 -0800</pubDate>
      <title>MongoDC - Afterthoughts</title>
      <link>http://mumrah.net/mongodc-afterthoughts</link>
      <guid>http://mumrah.net/mongodc-afterthoughts</guid>
      <description>
        <![CDATA[<p>
	Cool conference, clearly lots of smart ppl working on this product. I think they are trying to do too much too soon (geospatial indexing, full text, etc). I'd like to see some of core pieces bolstered before features like this. Maybe they can work on lowering mongoDB's pH and getting closer to ACID.
	
</p>

<p><a href="http://mumrah.net/mongodc-afterthoughts">Permalink</a> 

	| <a href="http://mumrah.net/mongodc-afterthoughts#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Thu, 18 Nov 2010 14:36:55 -0800</pubDate>
      <title>MongoDC - ACID</title>
      <link>http://mumrah.net/mongodc-acid</link>
      <guid>http://mumrah.net/mongodc-acid</guid>
      <description>
        <![CDATA[<p>
	So, what about command isolation? "We don't do isolation". What about command level atomicity? Nope, only document level atomic updates. <p>How do you guarantee consistency with replica sets in a sharded environment? Only read from the master. </p><p>And we all know single server durability is not yet possible. </p><p>ACID fail</p>
	
</p>

<p><a href="http://mumrah.net/mongodc-acid">Permalink</a> 

	| <a href="http://mumrah.net/mongodc-acid#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Thu, 18 Nov 2010 08:42:47 -0800</pubDate>
      <title>MongoDC - First update</title>
      <link>http://mumrah.net/mongodc-first-update</link>
      <guid>http://mumrah.net/mongodc-first-update</guid>
      <description>
        <![CDATA[<p>
	Some good talks here at MongoDC. Starting to realize that MongoDB isn't really a DBMS at all, but really a structured document store with flexible indexing and querying
	
</p>

<p><a href="http://mumrah.net/mongodc-first-update">Permalink</a> 

	| <a href="http://mumrah.net/mongodc-first-update#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Thu, 18 Nov 2010 08:42:44 -0800</pubDate>
      <title>MongoDC - eCommerce</title>
      <link>http://mumrah.net/mongodc-ecommerce</link>
      <guid>http://mumrah.net/mongodc-ecommerce</guid>
      <description>
        <![CDATA[<p>
	Spoke with the CTO of Totsy about the semantics of their eCommerce app. They use a document per inventory item rather than keeping a "count" attribute for each inventory. This approach reduces resource contention on inventory documents from atomic document updates, but increases the number if documents and redundant data.<p>Seems the theme of the day is: redundant data</p>
	
</p>

<p><a href="http://mumrah.net/mongodc-ecommerce">Permalink</a> 

	| <a href="http://mumrah.net/mongodc-ecommerce#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Sat, 06 Nov 2010 10:34:10 -0700</pubDate>
      <title>Stone Mt, NC</title>
      <link>http://mumrah.net/stone-mt-nc</link>
      <guid>http://mumrah.net/stone-mt-nc</guid>
      <description>
        <![CDATA[<p>
	<p><div class='p_embed p_image_embed'>
<a href="http://posterous.com/getfile/files.posterous.com/mumrah/mlJiGnHlxynaztizcxtkcpIqJHHtsiqtkqshtEcpjwtzddfGaAgbtfGhnyEc/IMG_0000.jpg.scaled1000.jpg"><img alt="Img_0000" height="375" src="http://posterous.com/getfile/files.posterous.com/mumrah/mlJiGnHlxynaztizcxtkcpIqJHHtsiqtkqshtEcpjwtzddfGaAgbtfGhnyEc/IMG_0000.jpg.scaled500.jpg" width="500" /></a>
</div>
</p>
	
</p>

<p><a href="http://mumrah.net/stone-mt-nc">Permalink</a> 

	| <a href="http://mumrah.net/stone-mt-nc#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
      <media:content type="image/jpeg" height="1536" width="2048" url="http://getfile3.posterous.com/getfile/files.posterous.com/mumrah/mlJiGnHlxynaztizcxtkcpIqJHHtsiqtkqshtEcpjwtzddfGaAgbtfGhnyEc/IMG_0000.jpg">
        <media:thumbnail height="375" width="500" url="http://getfile9.posterous.com/getfile/files.posterous.com/mumrah/mlJiGnHlxynaztizcxtkcpIqJHHtsiqtkqshtEcpjwtzddfGaAgbtfGhnyEc/IMG_0000.jpg.scaled500.jpg" />
      </media:content>
    </item>
    <item>
      <pubDate>Sun, 08 Aug 2010 18:45:00 -0700</pubDate>
      <title>WebSockets in Python</title>
      <link>http://mumrah.net/websockets-in-python</link>
      <guid>http://mumrah.net/websockets-in-python</guid>
      <description>
        <![CDATA[<p>
	<p>Since the dawn of AJAX, web developers have longed for persistent server-side connections. For a while Comet was hailed as the bastion of &ldquo;server push&rdquo;, but deep down we knew it was just a hack. Now finally, years later, we have an API and a protocol being standardized for socket connections between the browser and the server &ndash; aptly named, WebSockets.</p>

<p>WebSockets are bi-directional communication channels that run on single TCP sockets allowing communication between the client and the server. Since they behave like regular INET sockets, we should be able to easily implement them with existing tools. However, when I was looking for example implementations in Python, I didn&rsquo;t find anything that quite satisfied me.</p>

<h2>Python <code>sockets</code> module</h2>

<p>Now don&rsquo;t make the mistake of thinking I&rsquo;m a systems programmer. I have never written a low-level network application like this, and in fact this is my first time playing with <code>sockets</code> or <code>select</code> in Python. The root of all of this is the WebSocket itself, which is just a socket.</p>

<div class="CodeRay">
  <div class="code"><pre><span class="kw">import</span> <span class="ic">socket</span>
websocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
websocket.bind((<span class="s"><span class="dl">&quot;</span><span class="k">localhost</span><span class="dl">&quot;</span></span>, <span class="i">9999</span>))
websocket.listen(<span class="i">5</span>)</pre></div>
</div>


<p>That&rsquo;s all you need to get the WebSocket up and running. Granted, it&rsquo;s not very useful since you can&rsquo;t connect to it (no handshake), but it&rsquo;s a WebSocket nonetheless. When a client connects to the socket, it initiates the handshake with the following</p>

<div class="CodeRay">
  <div class="code"><pre>GET / HTTP/1.1
Upgrade: WebSocket
Connection: Upgrade
Host: localhost:9999
Origin: file://
Sec-WebSocket-Key1: x   d3L703 2  {63 k  L1( 90
Sec-WebSocket-Key2: ^    14   +40Z7R&lt;12om I8  0[

??????????????</pre></div>
</div>


<p>And expects a response in a similar form:</p>

<div class="CodeRay">
  <div class="code"><pre>HTTP/1.1 101 Web Socket Protocol Handshake
Upgrade: WebSocket
Connection: Upgrade
WebSocket-Origin: file://
WebSocket-Location: ws://localhost:9999/
Sec-Websocket-Origin: file://
Sec-Websocket-Location: ws://localhost:9999/

??????????????</pre></div>
</div>


<p>The &ldquo;?&rdquo; are random bits used in the challenge/response part of the handshake. Interesting note: In addition to failing to do the Challenge/Response, Chrome looks for the &ldquo;Websocket-X&rdquo; headers, while Safari (correctly) looks for the &ldquo;Sec-Websocket-X&rdquo; headers.</p>

<p>Here&rsquo;s my full standalone WebSocket server: <a href="http://gist.github.com/512987">http://gist.github.com/512987</a></p>

<p>I won&rsquo;t delve into the details of the implementation, namely because I&rsquo;m sure it&rsquo;s suboptimal. I was pretty happy with Challenge/Response piece. I read the <a href="http://tools.ietf.org/html/draft-ietf-hybi-thewebsocketprotocol-00" title="WebSockets Protocol">spec from IETF</a> and implemented it, nice and simple. Aren&rsquo;t open standards great? I ended up having to do the handshake because Safari 5 won&rsquo;t let you use a WebSocket otherwise.</p>

<p>Stay tuned for everyone&rsquo;s favorite asynchronous demo: a chat program!</p>

<p>-David</p>
	
</p>

<p><a href="http://mumrah.net/websockets-in-python">Permalink</a> 

	| <a href="http://mumrah.net/websockets-in-python#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Tue, 27 Jul 2010 16:36:00 -0700</pubDate>
      <title>JSON Encoding mongoDB Documents in Python</title>
      <link>http://mumrah.net/24320734</link>
      <guid>http://mumrah.net/24320734</guid>
      <description>
        <![CDATA[<p>
	<p>One thing that kept puzzling me about pymongo was that I couldn&rsquo;t a
serialize a document as JSON. Aren&rsquo;t these things just fancy JSON
objects on the backend? Well, they are &ndash; but <code>ObjectId</code> is part of
mongoDB extension of JavaScript so there is no JSON-equivalent. And
since Python only knows about the standard JSON spec, it won&rsquo;t know
what to do with <code>ObjectId</code> instances.
When attempting to encode a Python dictionary which has an <code>ObjectId</code>
as one of its values, I get a <code>TypeError</code> saying <code>ObjectId</code> &ldquo;is not
JSON serializable&rdquo;.</p>

<p>My solution is to extend the JSONEncoder included in Python&rsquo;s <code>json</code>
module (in 2.6 or later)</p>

<p><div class="data type-python">
    
      <table class="lines" cellspacing="0" cellpadding="0">
        <tr>
          <td>
            <pre class="line_numbers"><span rel="#L1" id="L1">1</span>
<span rel="#L2" id="L2">2</span>
<span rel="#L3" id="L3">3</span>
<span rel="#L4" id="L4">4</span>
<span rel="#L5" id="L5">5</span>
<span rel="#L6" id="L6">6</span>
<span rel="#L7" id="L7">7</span>
<span rel="#L8" id="L8">8</span>
<span rel="#L9" id="L9">9</span>
</pre>
          </td>
          <td width="100%">
            
              
                <div class="highlight"><pre /><div class="line" id="LC1"><span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">JSONEncoder</span></div><div class="line" id="LC2"><span class="kn">from</span> <span class="nn">pymongo.objectid</span> <span class="kn">import</span> <span class="n">ObjectId</span></div><div class="line" id="LC3"><br /></div><div class="line" id="LC4"><span class="k">class</span> <span class="nc">MongoEncoder</span><span class="p">(</span><span class="n">JSONEncoder</span><span class="p">):</span>      </div><div class="line" id="LC5">&nbsp;&nbsp;&nbsp;&nbsp;<span class="k">def</span> <span class="nf">_iterencode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">o</span><span class="p">,</span> <span class="n">markers</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span></div><div class="line" id="LC6">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">o</span><span class="p">,</span> <span class="n">ObjectId</span><span class="p">):</span></div><div class="line" id="LC7">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="k">return</span> <span class="s">&quot;&quot;&quot;ObjectId(&quot;</span><span class="si">%s</span><span class="s">&quot;)&quot;&quot;&quot;</span> <span class="o">%</span> <span class="nb">str</span><span class="p">(</span><span class="n">o</span><span class="p">)</span></div><div class="line" id="LC8">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="k">else</span><span class="p">:</span></div><div class="line" id="LC9">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="k">return</span> <span class="n">JSONEncoder</span><span class="o">.</span><span class="n">_iterencode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">o</span><span class="p">,</span> <span class="n">markers</span><span class="p">)</span></div></pre></div>
              
            
          </td>
        </tr>
      </table>
    
  </div></p>

<p>It adds a special case to handle encoding an <code>ObjectId</code> into a literal
&ldquo;ObjectId&rdquo; in the encoded JSON.</p>

<p>Custom JSON encoders can be used when issuing <code>json.dump</code> or <code>json.dumps</code>
by specifying <code>cls</code></p>

<div class="CodeRay">
  <div class="code"><pre>json.dump(obj, cls=MongoEncoder)</pre></div>
</div>


<p>E.g.,</p>

<div class="CodeRay">
  <div class="code"><pre>&gt;&gt;&gt; <span class="kw">import</span> <span class="ic">json</span>
&gt;&gt;&gt; <span class="kw">from</span> <span class="ic">pymongo.objectid</span> <span class="kw">import</span> <span class="ic">ObjectId</span>
&gt;&gt;&gt; <span class="kw">from</span> <span class="ic">mongoencoder</span> <span class="kw">import</span> <span class="ic">MongoEncoder</span>
&gt;&gt;&gt; x = {<span class="s"><span class="dl">'</span><span class="k">a</span><span class="dl">'</span></span>:<span class="i">1</span>,<span class="s"><span class="dl">'</span><span class="k">b</span><span class="dl">'</span></span>:<span class="s"><span class="dl">&quot;</span><span class="k">foo</span><span class="dl">&quot;</span></span>,<span class="s"><span class="dl">'</span><span class="k">c</span><span class="dl">'</span></span>:ObjectId()}
&gt;&gt;&gt; <span class="kw">print</span> x
{<span class="s"><span class="dl">'</span><span class="k">a</span><span class="dl">'</span></span>: <span class="i">1</span>, <span class="s"><span class="dl">'</span><span class="k">c</span><span class="dl">'</span></span>: ObjectId(<span class="s"><span class="dl">'</span><span class="k">4c4f4f5e2554c813e4000001</span><span class="dl">'</span></span>), <span class="s"><span class="dl">'</span><span class="k">b</span><span class="dl">'</span></span>: <span class="s"><span class="dl">'</span><span class="k">foo</span><span class="dl">'</span></span>}
&gt;&gt;&gt; <span class="kw">print</span> json.dumps(x)
Traceback (most recent call last):
[...]
<span class="ex">TypeError</span>: ObjectId(<span class="s"><span class="dl">'</span><span class="k">4c4f4f5e2554c813e4000001</span><span class="dl">'</span></span>) 
  <span class="kw">is</span> <span class="kw">not</span> JSON serializable
&gt;&gt;&gt; <span class="kw">print</span> json.dumps(x, cls=MongoEncoder)
<span class="s"><span class="dl">'</span><span class="k">{&quot;a&quot;: 1, &quot;c&quot;: ObjectId(&quot;4c4f4f5e2554c813e4000001&quot;), &quot;b&quot;: &quot;foo&quot;}</span><span class="dl">'</span></span></pre></div>
</div>


<p>Viola! Enjoy</p>
	
</p>

<p><a href="http://mumrah.net/24320734">Permalink</a> 

	| <a href="http://mumrah.net/24320734#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Thu, 15 Jul 2010 13:54:30 -0700</pubDate>
      <title>Server-side Document Dereferencing in mongoDB</title>
      <link>http://mumrah.net/server-side-documents-dereferencing-in-mongod</link>
      <guid>http://mumrah.net/server-side-documents-dereferencing-in-mongod</guid>
      <description>
        <![CDATA[<p>
	<p>Seems like no one can agree on the best way to structure documents in mongoDB. The consensus seems to be: do what works for you. The nice folks at 10gen offer <a href="http://www.mongodb.org/display/DOCS/Schema+Design" title="Schema Design">some guidance</a> on laying out your documents, and they seem to sit in the camp of &ldquo;redundant data over references&rdquo;. Redundancy over references is fine for some things, but it can be a real pain in the ass for certain situations. E.g., if your would-be embedded documents are updated frequently, you&rsquo;re talking about a ridiculous amount of effort to make all the right changes in the right places. Write situations like this make me nervous about data consistency, but that&rsquo;s another story.</p>

<p>For a little prototype I was working on this past week, I was using ObjectIds to reference documents instead of going the embedded document route. One big disadvantage of this approach with mongoDB is that there is no capacity for JOIN-like operations (it&rsquo;s part of their NoSQL philosophy). I think this is somewhat bullshit, so I took it upon myself to find a workaround. The goal, get some super basic JOIN-like functionality that I can use from a client library (such as pymongo).</p>

<p>Let&rsquo;s begin. Suppose I&rsquo;ve got document class Person that looks like</p>

<div class="CodeRay">
  <div class="code"><pre>{
    <span class="ke">_id</span> : ObjectId(...),
    <span class="ke">name</span> : <span class="s"><span class="dl">&quot;</span><span class="k">string</span><span class="dl">&quot;</span></span>,
    <span class="ke">school</span>: ObjectId(...)
}</pre></div>
</div>


<p>And document class School that looks like</p>

<div class="CodeRay">
  <div class="code"><pre>{
    <span class="ke">_id</span> : ObjectId(...),
    <span class="ke">name</span> : <span class="s"><span class="dl">&quot;</span><span class="k">string</span><span class="dl">&quot;</span></span>,
}</pre></div>
</div>


<p>With the reference document approach (called DBRef by the 10gen folks), the dereferencing takes place on the client side meaning a call back to the server for each document that needs dereferencing. That&rsquo;s a lot of churn on the wire just for a little bit of data. My solution was to do the dereferencing on the database using JavaScript and db.eval().</p>

<div class="CodeRay">
  <div class="code"><pre><span class="kw">var</span> <span class="fu">deref</span> = <span class="kw">function</span> (field, collection) {
    <span class="c">// C-C-C-Closure!!</span>
    <span class="kw">return</span> <span class="kw">function</span> (doc) {
        <span class="kw">return</span> _deref(doc, field, collection);
    };
}
<span class="kw">var</span> <span class="fu">_deref</span> = <span class="kw">function</span> (doc, field, col) {
    <span class="kw">var</span> oid = ObjectId(doc[field]);
    <span class="kw">delete</span> doc[field];
    doc[field] = db[col].findOne({<span class="ke">_id</span>:oid});
    <span class="kw">return</span> doc;
}</pre></div>
</div>


<p>Once you have <a href="http://www.mongodb.org/display/DOCS/Server-side+Code+Execution#Server-sideCodeExecution-Storingfunctionsserverside" title="Storing Server-side Code">saved these functions</a> on the server, you can use them in MapReduce, $where, or db.eval() calls. Here&rsquo;s an example call using pymongo (the collection name is &ldquo;people&rdquo;):</p>

<div class="CodeRay">
  <div class="code"><pre>&gt;&gt;&gt; db.eval(<span class="s"><span class="dl">&quot;</span><span class="k">db.people.find({}).map(deref('school','people'))</span><span class="dl">&quot;</span></span>)</pre></div>
</div>


<p>Now instead of an ObjectId as the &lsquo;school&rsquo; field, you get the document whose &lsquo;_id&rsquo; is that ObjectId. The deref function takes in a field and collection so it knows which field contains the reference Id and where it should look for that document. N.B., calling <code>map</code> on a cursor will unroll that cursor (so use skip() and limit() accordingly). Also, db.eval calls will block (though I don&rsquo;t think it should be problematic since findOne is cheap).</p>

<p>The code as a Gist: <a href="http://gist.github.com/477121">http://gist.github.com/477121</a></p>
	
</p>

<p><a href="http://mumrah.net/server-side-documents-dereferencing-in-mongod">Permalink</a> 

	| <a href="http://mumrah.net/server-side-documents-dereferencing-in-mongod#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Thu, 15 Jul 2010 08:10:00 -0700</pubDate>
      <title>Schema, Less</title>
      <link>http://mumrah.net/schema-less</link>
      <guid>http://mumrah.net/schema-less</guid>
      <description>
        <![CDATA[<p>
	<p>Or should that be "Schema Free"? Lately, I've been digging into the features of several NoSQL systems,&nbsp;and each time I read the bullet points I see something to the effect of "schema-free" or "unstructured documents". This is often touted as one of the features that makes document databases so great - you are released of the bonds of relational databases: no more key constraints, no more type checking, you are essentially free to insert whatever the hell you want. And how. On the flip side, one of the shitty things about document databases is that since you are so free, it is very difficult to code in the oh-so-familiar OO paradigm if you don't know what the data looks like. There have been many efforts to mitigate this including a plethora of frameworks which let you define structured models in your application (with types and everything!)&nbsp;so that the documents are&nbsp;homogeneous&nbsp;in the database. Ok, order is restored.</p>
<div>
<p />
<div>But hold on a second - did we just move a very pain-in-the-ass, expensive piece of our data flow out of the database and into the&nbsp;application? Oh yes, yes we certainly did.&nbsp;We have overthrown the Monarchy in favor of Anarchy, but then realized we need order and rules or else everything turns to shit. So the People take on the burden of maintaining the State. That's right, document databases lead to Communism.<br />
<div>
<p />
<div>Analogies aside, inserting documents all willy-nilly is great for write performance (particularly batch loading), but having a database that doesn't allow any kind of constraints on a document, its fields, or its relationships really puts a lot of work on the application. The big question here is: is it worth it? I think so (maybe). Putting this work on the application will certainly slow down the application layer, and at a small scale the net performance will probably be worse. However, document databases are somewhat easier to scale than traditional SQL-based systems. So as the application+database scale out the net performance will be&nbsp;considerably&nbsp;better than a traditional SQL-backed stack. At least in theory.&nbsp;</div>
<p />
<div>A lot of this remains to be seen.</div>
<p />
<p />
</div>
</div>
</div>
	
</p>

<p><a href="http://mumrah.net/schema-less">Permalink</a> 

	| <a href="http://mumrah.net/schema-less#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Sat, 10 Jul 2010 15:14:50 -0700</pubDate>
      <title>New Posterous Home</title>
      <link>http://mumrah.net/new-posterous-home</link>
      <guid>http://mumrah.net/new-posterous-home</guid>
      <description>
        <![CDATA[<p>
	In anticipation of not hosting my own blog forever (servers aren't cheap), I'm setting up a new home for my nonsense. As per my modus operandi, I'll be talking about things that are tangentially related to what I'm doing at work (I'm a very single-focus hacker), and will probably be different every week. <p /> I won't promise or even suggest that I'll update this more than previous incarnations of <a href="http://mumrah.net">my blog</a>. Seriously, no promises.<p /><div>Cheers</div>
	
</p>

<p><a href="http://mumrah.net/new-posterous-home">Permalink</a> 

	| <a href="http://mumrah.net/new-posterous-home#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Mon, 08 Feb 2010 04:31:03 -0800</pubDate>
      <title>1080p content on your PS3</title>
      <link>http://mumrah.net/1080p-content-on-your-ps3</link>
      <guid>http://mumrah.net/1080p-content-on-your-ps3</guid>
      <description>
        <![CDATA[<p>
	New toys bring new adventures. My awesome wife got me a PS3 for my birthday recently and I've been tinkering around with getting some non-Bluray HD content to play on it. My initial attempts to stream stuff over my network proved unsatisfying. Since the PS3 is only capable of 802.11g, I gave up on the high bitrate stuff (it's perfectly capable of DVD quality content (1.5~2.5 Mb/s).

Software used (on Mac OS X 10.5):

* [tsMuxer](<a href="http://www.smlabs.net/tsmuxer_en.html">http://www.smlabs.net/tsmuxer_en.html</a> "tsMuxer website")
* Disk Utility
* newfs_udf
* hdiutil

Hardware used:

* Macbook Pro
* Blank CD/DVD Media
* PS3

Files used:
* MKV file with AC3 audio stream and H264 video stream

Attempt 1 (**successful**): Load the MKV with tsMuxerGUI, select M2TS muxing. If the video profile is above 4.1, lower it to 4.1 (as the PS3 cannot support higher than an H264 level 4.1). Generate the m2ts and meta file, burn them both to a CD/DVD. This will be readable by the PS3 as a data disk - it will not autoplay, but you can access it and play it. To me, this is not an ideal solution as it does not support menus, chapters, or seeking. 

Attempt 2 (**unsuccessful**): Same deal as before, but select "AVCHD disk". This option will create a BD friendly file structure (folders named BDMV and CERTIFICATE). The trick here is to burn the disk as UDF 2.5 (this is not super easy on OS X or Linux). I wasted many CDs trying to regular ISO9660. Following the instructions [here](<a href="http://www.videohelp.com/forum/archive/how-to-burn-avchd-on-dvd-r-in-high-definition-for-br-set-top-playback-t350375.html">http://www.videohelp.com/forum/archive/how-to-burn-avchd-on-dvd-r-in-high-def...</a> "about halfway down"), you must create the image and format it as UDF 2.5. 

    dd if=/dev/zero of=myfile.img bs=1k count=716800
    newfs_udf -r 2.5 myfile.img -v volume_label
    hdiutil mount -nobrowse myfile.img
    cp -R /path/to/avcdh-files/ /Volume/volume_label/
    hdiutil unmount /Volume/volume_label
<br />
In plain-speak, create an empty (large) image, format it to UDF 2.5, mount it, copy the BD-compatible files into the volume, and unmount. You then use Disk Utility to burn the resulting image. No success here, but at this point I was so close, I could taste it.

Attempt 3 (**successful**): *Identical* procedure as Attempt 2, with one important exception - the image you create with `dd` must be sized in even increments of 1GB - that is, count==N\*1024\*1024. 

A few things to try next:

* Menus
* DTS audio stream
* Subtitles

Anyone wanting to test this out with a super high quality 1080p rip, I recommend [Big Buck Bunny](<a href="http://www.bigbuckbunny.org/index.php/download/">http://www.bigbuckbunny.org/index.php/download/</a> "Big Buck Bunny - Open Movie Project"). You'll need Handbrake to convert it to a compatible container format if you download the AVI (tsMuxer doesn't like AVIs).
	
</p>

<p><a href="http://mumrah.net/1080p-content-on-your-ps3">Permalink</a> 

	| <a href="http://mumrah.net/1080p-content-on-your-ps3#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Wed, 09 Sep 2009 16:24:22 -0700</pubDate>
      <title>Making Python's pickle safe(r)</title>
      <link>http://mumrah.net/making-pythons-pickle-safer</link>
      <guid>http://mumrah.net/making-pythons-pickle-safer</guid>
      <description>
        <![CDATA[<p>
	<div class='p_embed p_image_embed'>
<img alt="Media_httpmumrahdotne_rsjfn" height="181" src="http://posterous.com/getfile/files.posterous.com/import-igkj/uuFsjaHrpwjslziFvgkCsAyJpBxIvvaIaBAnqlonEucHdxzkIqyvtxFaGnfB/media_httpmumrahdotne_rsjFn.jpg.scaled500.jpg" width="137" />
</div>
 Everyone loves pickle, I mean, what's not to love. Super fast object serialization (via cPickle). However, there are some legitimate concerns regarding the security of pickle - specifically the load/loads method. The basic problem is, if you try to unpickle untrusted data, you are liable to create some objects that can do nasty things (<a href="http://docs.python.org/3.1/library/pickle.html#restricting-globals" title="Importing OS with Pickle" target="_blank">like make system calls</a>). Python even gives us a nice warning right in the docs
<blockquote class="posterous_medium_quote">
<strong>Warning</strong>
<em>pickle</em> module is not intended to be secure against erroneous or maliciously constructed data. Never unpickle data received from an untrusted or unauthenticated source.</blockquote><br />

Now there are plenty of things you can do to improve the security of the unpickling process. Python lets you subclass pickle.Unpickler to give the user finer grained control over what gets unpickled. This is a fine approach (<a href="http://nadiana.com/python-pickle-insecure" title="Example of a safer Unpickler class" target="_blank">a nice example here</a>), and will work for most, but I will give my take on the issue.

For most of the applications I write that use pickle, I'm just looking for a way to store arbitrary Python data as a string. One example might be storing small data objects on S3, or perhaps implementing user sessions for a webapp. Either way, I <em>should</em> be able to trust my own data for unpickling, but it's always best to be double-extra-sure when dealing with something where you can blindly execute arbitrary bits of code (think, the evil eval method).

So, for my case, I simply want to verify that the pickled data I stored is coming back to me unmodified. My solution: sign the pickled data. Using the same signing method as AWS, I present the following:
<div class="CodeRay">
  <div class="code"><pre>import hmac
import hashlib
import base64
from cPickle import dumps
 # The unsigned pickled data
string_to_sign = dumps({'foo':&quot;bar&quot;,'spam':&quot;eggs&quot;,'the answer':42})
 # The signature object
signature = hmac.HMAC(key=&quot;my application's super secret key&quot;,
    msg= string_to_sign, digestmod=hashlib.sha256)
 # The signed string: store this
signed_string = string_to_sign + base64.encodestring(signature.digest())</pre></div>
</div>

Now you have your pickled data as the first part of the string with the last 45 characters being the signature. The key for HMAC signing is specific to your application, so if someone gets access to your pickled data and tries to mess with it and resign it, it won't work. Here's the unpickling process:
<div class="CodeRay">
  <div class="code"><pre>import hmac
import hashlib
import base64
from cPickle import loads
 # Break up the signed string into message and signature
signature = signed_string[-45:]
message = signed_string[:-45]
 # Calculate the signature of the message
msg_sig = hmac.HMAC(key=&quot;my application's super secret key&quot;,
    msg= message, digestmod=hashlib.sha256)
 # See that it matches the given signature
assert base64.encodestring(msg_sig.digest()) == signature</pre></div>
</div>

-David
	
</p>

<p><a href="http://mumrah.net/making-pythons-pickle-safer">Permalink</a> 

	| <a href="http://mumrah.net/making-pythons-pickle-safer#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
      <media:content type="image/jpeg" height="181" width="137" url="http://getfile5.posterous.com/getfile/files.posterous.com/import-igkj/uuFsjaHrpwjslziFvgkCsAyJpBxIvvaIaBAnqlonEucHdxzkIqyvtxFaGnfB/media_httpmumrahdotne_rsjFn.jpg">
        <media:thumbnail height="181" width="137" url="http://getfile1.posterous.com/getfile/files.posterous.com/import-igkj/uuFsjaHrpwjslziFvgkCsAyJpBxIvvaIaBAnqlonEucHdxzkIqyvtxFaGnfB/media_httpmumrahdotne_rsjFn.jpg.scaled500.jpg" />
      </media:content>
    </item>
    <item>
      <pubDate>Sat, 18 Jul 2009 22:00:56 -0700</pubDate>
      <title>API Functional Testing with Python</title>
      <link>http://mumrah.net/api-functional-testing-with-python</link>
      <guid>http://mumrah.net/api-functional-testing-with-python</guid>
      <description>
        <![CDATA[<p>
	Recently, <a href="http://loud3r.com" title="Loud3r" target="_blank">at work</a> we have written a totally badass XML API for clients to interface with our data (sorry no public side yet). After some gentle reassuring (and some not-so-gentle arm twisting), I convinced my boss-man we could do this in Python with AWS on the back-end. We settled on the Turbogears 2.0 meta-framework using Amazon S3/SimpleDB. The whole experience was very educational for many reasons - one, we had never using something besides MySQL for a data store, two, we had never used a Python framework before, and three, we had never really developed an app with a proper set of tests. That final point, testing, is the subject of this entry. 

<a href="http://codespeak.net/py/dist/test/test.html" title="[test]" target="_blank">Py.Test</a>, from the vaingloriously-named "py" module, is my unit testing framework of choice (I have <a href="http://mumrah.net/2009/02/python-unit-testing-super-fun-time/" title="Python unit testing super fun time" target="_self">written about it before</a>). It provides a convenient way to collect tests and to write generative tests (which are super useful) for unit testing. After getting a few sets of unit tests rolled out for our API, we recognized that we would need some higher level tests - so called functional, or acceptance tests. 

### Functional Tests
Functional tests describe high-level tests that rely on the interaction of many components of the system, whereas a unit test will only test smaller, lower level components. For example, one (very high-level) functional test for an XML API would be to see that the resulting XML is well-formed. The well-formedness of an XML response from an API request is dependent on several components of the system. It requires proper request parsing, validation, error handling, template rendering, et al. A more typical test might be to see that the number of items returned by the API does not exceed a user-provided maximum, i.e., if the user requests <a href="http://api.example.com/">http://api.example.com/</a>?[request params]&amp;max_count=10, no more than 10 results are shown.

Now, how to go about running these tests. The number of functional testing frameworks is too great to mention (<a href="http://www.opensourcetesting.org/functional.php" title="Exhaustive list of functional testing frameworks" target="_blank">here's a bunch</a>), but one that is well known and widely used is Selenium. It is written in Java and can do some pretty fancy stuff. However, one big drawback of Selenium is it's weight. It's <em>heavy</em> - it is Java after all, and requires a client server (whether you sacrifice your own cycles or a remote server). For the simple functional tests we were writing, it was completely overkill. After searching around for a Python functional testing framework (or at least something lighter than Selenium), it occurred to me that I could just use the test-collecting abilities of Py.Test plus some additional libraries. And that's what we did.

### Bottom Line 
Mix together <a href="http://pyxml.sourceforge.net/topics/" title="PyXML" target="_blank">PyXML</a>, Urllib2, and Py.Test and you have a pretty powerful (and portable) testing suite in Python. PyXML extends the built-in 'xml' module with some really nice packages including an XPath parser which I love.

### Exempli Gratia
Consider an API that has a "users" noun, and just one verb "show". We will allow one optional parameter <em>order_by</em> and one required parameter <em>max_count</em>. An valid URL would look like <a href="http://api.example.com/users/show?max_count=10&amp;order_by=date.">http://api.example.com/users/show?max_count=10&amp;order_by=date.</a>

We'll start by creating the class that will contain the tests, and writing a function to get an XML doc given some url parameters.
<div class="CodeRay">
  <div class="code"><pre>import urllib2
from collections import defaultdict
from xml.dom import minidom
from xml import xpath
class TestUserNoun:
        def get_xml_doc(self,url_params):
                url = &quot;http://api.example.com/users/show?&quot;
                url += &quot;max_count=%(max_count)s&amp;order_by=%(order_by)s&quot;
                url_p = urllib2.urlopen( url % defaultdict(str,url_params) )
                doc = minidom.parseString( url_p.read() )
                url_p.close()
                return doc</pre></div>
</div>

N.B., you can create a specific User-Agent with urllib2 if so desired, and defaultdict is used so we don't have to check if the incoming dict (url_params) has everything we need for the url string.

Now we can start writing some tests
<div class="CodeRay">
  <div class="code"><pre>class TestUserNoun:
        ...
        def test_user_count(self):
                # Test several values of max_count
                counts = (5,10,15,20)
                def count_users(n):
                        # Test that the number of results returned is less than or equal to n
                        doc = self.get_xml_doc({'max_count':n})
                        user_count = len( xpath.Evaluate('/xpath/expr',doc.documentElement) )
                        assert user_count 
And you get the idea - one can write tests ad nauseum (although I'm not sure if there's such a thing as too many tests). Of course neither of these tests will work since the XPath expressions are not valid - I didn't really feel like spelling out a whole XML schema just for this example. There are plenty of good XPath tutorials out there. The basic idea here is you want to test all of your request parameters for the API to see a number of things: 

* Does the controller handle the requests properly? What about missing/extra parameters?
* Are errors handled properly?
* Is the resulting XML valid? This is implicitly done by parsing the XML document
* Does the resulting data correspond to the request parameters? This one will require the most tests to be written - don't forget about generative tests!

A powerful test suite means a robust application. When you have a nice set of tests, you can push your code with confidence - and believe me, that is a very rewarding and relieving feeling. Writing this API has been an extremely rewarding experience, and probably the most educational thing I've done programming-wise since I wrote a cross-browser javascript event library like 5 years ago.

So go forth, programmer - embrace testing and empower yourself.

-David</pre></div>
</div>
	
</p>

<p><a href="http://mumrah.net/api-functional-testing-with-python">Permalink</a> 

	| <a href="http://mumrah.net/api-functional-testing-with-python#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Sat, 20 Jun 2009 20:34:15 -0700</pubDate>
      <title>Weekend Project - CloudCached</title>
      <link>http://mumrah.net/weekend-project-cloudcached</link>
      <guid>http://mumrah.net/weekend-project-cloudcached</guid>
      <description>
        <![CDATA[<p>
	A friend and I have been bouncing around the idea of a caching system that ran on Amazon's cloud for a while now. Basically something like memcached, but without the (very real) limitations of physical memory or the need of a whole server. Sure, it's hard to beat the speed of memory-level read access, but I think the appeal of a distributed, <a href="http://aws.amazon.com/s3/#functionality" title="Max 5GB per item" target="_blank">limitless</a> cache might outweigh the slowdown. 

### Idea
Provide an interface for storing/retrieving serialized data on S3

Pretty simple idea, pretty simple implementation. Thanks to the S3 interface provided by [Boto](<a href="http://code.google.com/p/boto/">http://code.google.com/p/boto/</a> "Boto rocks!"), things were a lot easier. I'm going to keep this open source under the MIT license. You can check out the code on [GitHub repository](<a href="http://github.com/mumrah/cloudcached/tree/master">http://github.com/mumrah/cloudcached/tree/master</a> "CloudCached on GitHub") - please feel free to fork, improve, submit, etc. 

### Overview
A quick walkthrough of the code will reveal truly how simple this is. The Client class provides basic CRUD methods for interfacing with S3: __put__, __get__, __update__, __delete__. The put and update methods store a timestamp as the "expires" header for the file to keep track of cache expiration. Also these two methods write a "type" header to the meta-data so CloudCached knows how to de-serialize the file. 
<div class="CodeRay">
  <div class="code"><pre>class Client:
&quot;Here's the class schema&quot;
        def get(self, key)
        def put(self, key, value, time_to_expire=3600, replace=False)
        def update(self, key, value, time_to_expire=3600)
        def delete(self, key)</pre></div>
</div>
   
There are 6 basic data types used in this code for serializing any bit of python data: basestring (for str and unicode), int (for int and long), complex, float, and other. The other data type represents anything that is not a base type in Python. These "other" types get pickled while everything else just gets str'd.

The put method checks the md5sum to make sure everything went through cleanly (maybe a bit costly, but worth it in my opinion). cPickle is used in favor of pickle for obvious reasons (it's much faster).

### Results
Some very early tests show that this might just be usable. 
<div class="CodeRay">
  <div class="code"><pre>CloudCached Benchmarks (10 runs)
        --------------------------------------------------------
        Test                                  |        Average (s)                | Total (s)  
        --------------------------------------------------------
        GET integer                         |        0.0283360004425        | 0.283360004425
        GET string (32 byte) |        0.0315794944763        | 0.315794944763
        GET string (512KB)         |        0.1265994787220        | 1.265994787220
        PUT integer                         |        0.0650457143784        | 0.650457143784
        PUT string (32 byte) |        0.0563205003738        | 0.563205003738
        PUT string (512KB)         |        0.1773290872570        | 1.773290872570
        --------------------------------------------------------</pre></div>
</div>

### Advantages
* Highly distributed. S3 data is distributed across multiple availability zones and could therefor be utilized by an application running across multiple availability zones.
* No size limit. Unlike the physical limitations of a memcached machine (or cluster of memcached machines), S3 does not have limits on the number of files (caches) you can store. Also, with S3, you can write files from 1 byte to 5 GB (although I think a 5GB cache file would defeat the purpose).
* Parallel read access. If applicable to the application, cache reads can be largely parallelized which could potentially give linear speedup to the cache loading.
* No server necessary. Since the application is reading and writing directly to S3, there is no need to a "cache server". This could lead to a great deal of savings for people running multiple memcached machines. Memcached servers typically have a large memory capacity which means a m1.xlarge or c1.xlarge EC2 instance (assuming it's running in EC2). 

### Considerations
It's going to be hard to beat the speed of memcached. As far as speed is concerned, I'm using built-in Python stuff including urllib, httplib, xml.sax, etc (all of which are used by Boto). It might be worthwhile to write a C implementation of the S3 communication methods (but maybe not). The most costly part of this code aside from network communication is probably the serialization, and since cPickle is used there is not really improvement to be made there.

It might be cool to couple the meta-data with SimpleDB. 

I registered cloudcached.com in case this gains some momentum. I will post updates and benchmarks there as they arrive. 

-David
	
</p>

<p><a href="http://mumrah.net/weekend-project-cloudcached">Permalink</a> 

	| <a href="http://mumrah.net/weekend-project-cloudcached#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
    </item>
    <item>
      <pubDate>Tue, 09 Jun 2009 01:22:23 -0700</pubDate>
      <title>First (real) MPI run on EC2</title>
      <link>http://mumrah.net/first-real-mpi-run-on-ec2</link>
      <guid>http://mumrah.net/first-real-mpi-run-on-ec2</guid>
      <description>
        <![CDATA[<p>
	After a few days of tinkering with <a href="http://github.com/mumrah/ec2mpi/tree/master" title="GitHub project page for EC2MPI" target="_blank">EC2MPI</a>, I spent some time polishing up a stat mech MPI simulation. The code in question is a 2d Ising model simulation using Replica Exchange. Right now it stands at around 400 lines of C++ using STL vectors (which I love). Once I know it works (or at least works well enough) I might post it up here, but for now I'm just trying to generate pretty hysteresis plots and observe the critical behavior of a 2d Ising model system. Here's a picture with points on it.

[caption id="" align="aligncenter" width="645" caption="Energy per spin plotted against magnetization"]<div class='p_embed p_image_embed'>
<a href="http://posterous.com/getfile/files.posterous.com/import-igkj/tnHwsfrnfknotGGsbrnmwDrIAlefkEtAnxcHgtoopdtbmxEbeFEakfqBqtAk/media_httpmumrahdotne_vrAha.png.scaled1000.png"><img alt="Media_httpmumrahdotne_vraha" height="343" src="http://posterous.com/getfile/files.posterous.com/import-igkj/tnHwsfrnfknotGGsbrnmwDrIAlefkEtAnxcHgtoopdtbmxEbeFEakfqBqtAk/media_httpmumrahdotne_vrAha.png.scaled500.png" width="500" /></a>
</div>
[/caption]

I leave the interpretation to you. The best part of this is that I can do these MPI runs without burning a hole in my lap (the MacBook gets rather warm). 
-David
	
</p>

<p><a href="http://mumrah.net/first-real-mpi-run-on-ec2">Permalink</a> 

	| <a href="http://mumrah.net/first-real-mpi-run-on-ec2#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
      <media:content type="image/png" height="442" width="645" url="http://getfile0.posterous.com/getfile/files.posterous.com/import-igkj/tnHwsfrnfknotGGsbrnmwDrIAlefkEtAnxcHgtoopdtbmxEbeFEakfqBqtAk/media_httpmumrahdotne_vrAha.png">
        <media:thumbnail height="343" width="500" url="http://getfile5.posterous.com/getfile/files.posterous.com/import-igkj/tnHwsfrnfknotGGsbrnmwDrIAlefkEtAnxcHgtoopdtbmxEbeFEakfqBqtAk/media_httpmumrahdotne_vrAha.png.scaled500.png" />
      </media:content>
    </item>
    <item>
      <pubDate>Mon, 08 Jun 2009 13:29:50 -0700</pubDate>
      <title>Time Machine In Your Pocket - Addendum</title>
      <link>http://mumrah.net/time-machine-in-your-pocket-addendum</link>
      <guid>http://mumrah.net/time-machine-in-your-pocket-addendum</guid>
      <description>
        <![CDATA[<p>
	Addendum to two <a href="http://mumrah.net/2008/12/time-machine-in-your-pocket/" title="Part 1" target="_self">previoius</a><a> </a><a href="http://mumrah.net/2009/03/timemachine-in-your-pocket-part-2/" title="Part 2" target="_self">posts</a>.

The other day, I noticed my 8GB USB volume that I use for temporary incremental backups was quite full. Curious, since the folders I back up to that volume do not total but 200MB or so, and rsync was supposed to be doing incremental backups (link-dest ftw).

After a little searching around, I found someone who had a similar problem (and a solution). When you format a volume with OS X it will, by default, ignore file ownership (the linked article explores why this is perhaps). This proves to be a problem for rsync which considers file permissions and ownership as part of the file stat (as it should). Luckily the fix is easy - "Get Info" for the volume in question, then at the bottom unselect "Ignore ownership on this volume"
<div style="text-align: center;"><div class='p_embed p_image_embed'>
<img alt="Media_httpmumrahdotne_lgddf" height="49" src="http://posterous.com/getfile/files.posterous.com/import-igkj/GoGspftvaspoCCEwqxIxJpAhfyxsfjsGBBhduflDBDDzamekwplsgHlxjglw/media_httpmumrahdotne_lgDDF.png.scaled500.png" width="238" />
</div>
</div><br />

You will probably want to delete any backups that have been created (since they won't have the correct file ownership). Source: <a href="http://terminalapp.net/backups-rsync-and-link-dest-not-working/">Terminalapp.net</a>
	
</p>

<p><a href="http://mumrah.net/time-machine-in-your-pocket-addendum">Permalink</a> 

	| <a href="http://mumrah.net/time-machine-in-your-pocket-addendum#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
      <media:content type="image/png" height="49" width="238" url="http://getfile5.posterous.com/getfile/files.posterous.com/import-igkj/GoGspftvaspoCCEwqxIxJpAhfyxsfjsGBBhduflDBDDzamekwplsgHlxjglw/media_httpmumrahdotne_lgDDF.png">
        <media:thumbnail height="49" width="238" url="http://getfile5.posterous.com/getfile/files.posterous.com/import-igkj/GoGspftvaspoCCEwqxIxJpAhfyxsfjsGBBhduflDBDDzamekwplsgHlxjglw/media_httpmumrahdotne_lgDDF.png.scaled500.png" />
      </media:content>
    </item>
    <item>
      <pubDate>Sat, 30 May 2009 23:40:24 -0700</pubDate>
      <title>MPI running on Amazon EC2</title>
      <link>http://mumrah.net/mpi-running-on-amazon-ec2</link>
      <guid>http://mumrah.net/mpi-running-on-amazon-ec2</guid>
      <description>
        <![CDATA[<p>
	<div class='p_embed p_image_embed'>
<img alt="Media_httpawsmedias3a_rrfmj" height="52" src="http://posterous.com/getfile/files.posterous.com/import-igkj/orgGoDbBDvDxwqIeopbypqyhCFpGexAbJykrGsayjhlngtbybpEsqHwergls/media_httpawsmedias3a_rrfmj.jpg.scaled500.jpg" width="127" />
</div>


<div class='p_embed p_image_embed'>
<img alt="Media_httpwwwopenmpio_xlakp" height="128" src="http://posterous.com/getfile/files.posterous.com/import-igkj/bdJcBoIjGIkBoeGuwkjHtJqppioDIbpcnFHHlhAbIlyCBnfxcqEjEEEDJriD/media_httpwwwopenmpio_xlakp.png.scaled500.png" width="129" />
</div>


For my Master's thesis, I'm going to be running a lot of MPI code, and naturally I need a place to run it. Let me first say that my university has an excellent <a href="http://hpc.fsu.edu" title="Florida State University High Performance Computing Center" target="_blank">high-performance computing center</a> run by one of my committee chairs that is more than capable of serving my needs - but yet, I am unfulfilled. With our scheduling system, there is a "backfill" that is always available for running small jobs (like the ones I run), but for my thesis, I want to test the massive scalability of an algorithm (<a href="http://en.wikipedia.org/wiki/Parallel_tempering" title="Replica Exchange, also known as Parallel Tempering" target="_blank">Replica Exchange</a>). When I mean massive, I mean <em>massive</em> - think 1000 compute nodes or more.

Big ideas, people.

In order to satisfy my need for a massively parallel platform, I looked no further than <a href="http://aws.amazon.com/ec2/" title="Amazon Elastic Computing Cloud" target="_blank">Amazon EC2</a>. As should be apparent from many of my <a href="http://mumrah.net/topics/amazon/" title="Posts filed under Amazon" target="_self">previous posts</a>, I have been doing a lot of work with Amazon's cloud services - both school and work.

A few weeks ago, I started an MIT-licensed open source project on GitHub aptly named <a href="http://github.com/mumrah/ec2mpi/tree/master" title="EC2MPI repository on GitHub" target="_blank">EC2MPI</a>. Today I made a major step forward with this project which was the motivation for this post. I finally have everything configured properly and got my first no-hassle MPI cluster up and running.

The script I wrote (<a href="http://github.com/mumrah/ec2mpi/tree/master" title="Another link" target="_blank">EC2MPI</a>), is written in Python and presents an interactive prompt to the user. You select the architecture (i386 or x64), the number of instances, and I also have support for user-defined SSH keypairs (not AWS keypairs) for cluster security. The instances are spawned, and EC2MPI sets up the SSH keys, as well as MPI configuration. It is so freaking sweet.

I wanted to share some issues I've had so far while developing this and how I solved them.

<strong>Intra-EC2 communication</strong> - For this, I needed each instance to be able to talk to one another for point-to-point as well as collective communication. My solution for this was to allow the user to generate SSH keypairs which were stored in a private S3 bucket (owned by the user). My user-data script sent to the instances took care of downloading and installing the keys upon startup.

<strong>Shared storage among instances</strong> - In order to run MPI code, the nodes in the cluster need access to a shared storage volume which will contain binary files compiled by MPI. Since EC2 has no shared storage (for now), I had to find an alternate solution. The solution I settled on was to use <a href="http://code.google.com/p/s3fs/" title="S3FS Google Code project page" target="_blank">s3fs</a>: a fuse-based filesystem which allows you to mount an S3 bucket as a volume. Reading and writing to the shared volume is pretty slow (unless it's cached), so for certain kinds of code this might not be ideal. However, I believe it is the best solution for now. I imagine one day Amazon will add a feature to the Elastic Block Storage volumes that allow them to act as shared volumes.

<strong>Starting up and tearing down clusters</strong> - I used Amazon SimpleDB to keep meta-data about the cluster: how many instances are in the cluster, internal/external IP addresses, etc. This is also how I define the master node and worker nodes. This will allow me to add features such as adding and removing instances from a cluster without having to tear the whole thing down. Also I did all startup config with a user-data script so the script does not have to log into each instance upon startup. This allows the clusters startup to scale well.

Check back soon for some benchmarks and more detailed write-ups as the project progresses. First, I need to get my maximum number of instances increased (right now I can do 20 max). Fast times ahead, friends.

-David
	
</p>

<p><a href="http://mumrah.net/mpi-running-on-amazon-ec2">Permalink</a> 

	| <a href="http://mumrah.net/mpi-running-on-amazon-ec2#comment">Leave a comment&nbsp;&nbsp;&raquo;</a>

</p>]]>
      </description>
      <posterous:author>
        <posterous:userImage>http://files.posterous.com/user_profile_pics/687840/bubsIM1.gif</posterous:userImage>
        <posterous:profileUrl>http://posterous.com/users/5BclaIIDEehr</posterous:profileUrl>
        <posterous:firstName>David</posterous:firstName>
        <posterous:lastName>Arthur</posterous:lastName>
        <posterous:nickName>mumrah</posterous:nickName>
        <posterous:displayName>David Arthur</posterous:displayName>
      </posterous:author>
      <media:content type="image/jpeg" height="52" width="127" url="http://getfile5.posterous.com/getfile/files.posterous.com/import-igkj/orgGoDbBDvDxwqIeopbypqyhCFpGexAbJykrGsayjhlngtbybpEsqHwergls/media_httpawsmedias3a_rrfmj.jpg">
        <media:thumbnail height="52" width="127" url="http://getfile4.posterous.com/getfile/files.posterous.com/import-igkj/orgGoDbBDvDxwqIeopbypqyhCFpGexAbJykrGsayjhlngtbybpEsqHwergls/media_httpawsmedias3a_rrfmj.jpg.scaled500.jpg" />
      </media:content>
      <media:content type="image/png" height="128" width="129" url="http://getfile2.posterous.com/getfile/files.posterous.com/import-igkj/bdJcBoIjGIkBoeGuwkjHtJqppioDIbpcnFHHlhAbIlyCBnfxcqEjEEEDJriD/media_httpwwwopenmpio_xlakp.png">
        <media:thumbnail height="128" width="129" url="http://getfile6.posterous.com/getfile/files.posterous.com/import-igkj/bdJcBoIjGIkBoeGuwkjHtJqppioDIbpcnFHHlhAbIlyCBnfxcqEjEEEDJriD/media_httpwwwopenmpio_xlakp.png.scaled500.png" />
      </media:content>
    </item>
  </channel>
</rss>

