<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
 <title>Hector Castro</title>
 
 <link href="http://hectcastro.me/" />
 <updated>2011-11-18T11:42:14-08:00</updated>
 <id>http://hectcastro.me/</id>
 <author>
   <name>Hector Castro</name>
   <email>hectcastro@gmail.com</email>
 </author>
 
 <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/tradingfish" /><feedburner:info uri="tradingfish" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><feedburner:feedFlare href="http://add.my.yahoo.com/rss?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://us.i1.yimg.com/us.yimg.com/i/us/my/addtomyyahoo4.gif">Subscribe with My Yahoo!</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsgator.com/ngs/subscriber/subext.aspx?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.newsgator.com/images/ngsub1.gif">Subscribe with NewsGator</feedburner:feedFlare><feedburner:feedFlare href="http://feeds.my.aol.com/add.jsp?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://o.aolcdn.com/favorites.my.aol.com/webmaster/ffclient/webroot/locale/en-US/images/myAOLButtonSmall.gif">Subscribe with My AOL</feedburner:feedFlare><feedburner:feedFlare href="http://www.bloglines.com/sub/http://feeds.feedburner.com/tradingfish" src="http://www.bloglines.com/images/sub_modern11.gif">Subscribe with Bloglines</feedburner:feedFlare><feedburner:feedFlare href="http://www.netvibes.com/subscribe.php?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.netvibes.com/img/add2netvibes.gif">Subscribe with Netvibes</feedburner:feedFlare><feedburner:feedFlare href="http://fusion.google.com/add?feedurl=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://buttons.googlesyndication.com/fusion/add.gif">Subscribe with Google</feedburner:feedFlare><feedburner:feedFlare href="http://www.pageflakes.com/subscribe.aspx?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.pageflakes.com/ImageFile.ashx?instanceId=Static_4&amp;fileName=ATP_blu_91x17.gif">Subscribe with Pageflakes</feedburner:feedFlare><feedburner:feedFlare href="http://www.plusmo.com/add?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://plusmo.com/res/graphics/fbplusmo.gif">Subscribe with Plusmo</feedburner:feedFlare><feedburner:feedFlare href="http://www.thefreedictionary.com/_/hp/AddRSS.aspx?http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://img.tfd.com/hp/addToTheFreeDictionary.gif">Subscribe with The Free Dictionary</feedburner:feedFlare><feedburner:feedFlare href="http://www.bitty.com/manual/?contenttype=rssfeed&amp;contentvalue=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.bitty.com/img/bittychicklet_91x17.gif">Subscribe with Bitty Browser</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsalloy.com/?rss=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.newsalloy.com/subrss3.gif">Subscribe with NewsAlloy</feedburner:feedFlare><feedburner:feedFlare href="http://www.live.com/?add=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://tkfiles.storage.msn.com/x1piYkpqHC_35nIp1gLE68-wvzLZO8iXl_JMledmJQXP-XTBOLfmQv4zhj4MhcWEJh_GtoBIiAl1Mjh-ndp9k47If7hTaFno0mxW9_i3p_5qQw">Subscribe with Live.com</feedburner:feedFlare><feedburner:feedFlare href="http://mix.excite.eu/add?feedurl=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://image.excite.co.uk/mix/addtomix.gif">Subscribe with Excite MIX</feedburner:feedFlare><feedburner:feedFlare href="http://download.attensa.com/app/get_attensa.html?feedurl=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.attensa.com/blogs/attensa/WindowsLiveWriter/BadgeredintoBadges_10C02/attensa_feed_button5.gif">Subscribe with Attensa for Outlook</feedburner:feedFlare><feedburner:feedFlare href="http://www.webwag.com/wwgthis.php?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.webwag.com/images/wwgthis.gif">Subscribe with Webwag</feedburner:feedFlare><feedburner:feedFlare href="http://www.podcastready.com/oneclick_bookmark.php?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.podcastready.com/images/podcastready_button.gif">Subscribe with Podcast Ready</feedburner:feedFlare><feedburner:feedFlare href="http://www.flurry.com/pushRssFeed.do?r=fb&amp;url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.flurry.com/images/flurry_rss_logo2.gif">Subscribe with Flurry</feedburner:feedFlare><feedburner:feedFlare href="http://www.wikio.com/subscribe?url=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.wikio.com/shared/img/add2wikio.gif">Subscribe with Wikio</feedburner:feedFlare><feedburner:feedFlare href="http://www.dailyrotation.com/index.php?feed=http%3A%2F%2Ffeeds.feedburner.com%2Ftradingfish" src="http://www.dailyrotation.com/rss-dr2.gif">Subscribe with Daily Rotation</feedburner:feedFlare><entry>
   <title>Preseeding Ubuntu Server and Static IP Addresses</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/37OISGM5smg/preseeding-ubuntu-server-and-static-ip-addresses.html" />
   <updated>2011-11-18T00:00:00-08:00</updated>
   <id>http://hectcastro.me/2011/11/18/preseeding-ubuntu-server-and-static-ip-addresses</id>
   <content type="html">&lt;p&gt;Setting up a cluster of computers for any purpose usually requires installing an operating system. The installation process typically consists of several questions and identical answers for each node in the cluster. Automating the submission of answers to these questions is desirable &amp;#8212; not only to prevent inconsistencies, but for general convenience.&lt;/p&gt;

&lt;h2 id='preseeding'&gt;Preseeding&lt;/h2&gt;

&lt;p&gt;I spent the last few days working to stand up a proof-of-concept Riak cluster. The first step involved installing Ubuntu Oneiric Ocelot (&lt;code&gt;11.10&lt;/code&gt;) on four virtual machines. Luckily, Ubuntu/Debian has a process called &lt;a href='http://wiki.debian.org/DebianInstaller/Preseed'&gt;preseeding&lt;/a&gt; to facilitate automated installations. Surprisingly, it also has limited support for Red Hat’s &lt;a href='http://fedoraproject.org/wiki/Anaconda/Kickstart'&gt;Kickstart&lt;/a&gt;. Playing it safe, I went with preseeding.&lt;/p&gt;

&lt;p&gt;There are three methods that can be used for preseeding: &lt;code&gt;initrd&lt;/code&gt;, &lt;code&gt;file&lt;/code&gt;, and &lt;code&gt;network&lt;/code&gt;. I wasn’t interested in re-authoring ISOs or setting up a TFTP server, so I went with a web-accessible preseed file. The pros of this approach are that the configuration file is easily modifiable, yet still accessible. The cons are that it doesn’t become available to the installer until the network is configured.&lt;/p&gt;

&lt;h2 id='assigning_a_static_ip_problem'&gt;Assigning a Static IP Problem&lt;/h2&gt;

&lt;p&gt;Because web-accessible preseed files aren’t available until the network is configured, the step to assign a static IP address gets missed. Below are several approaches I found to assign a static IP address with preseeding.&lt;/p&gt;

&lt;h3 id='boot_parameters'&gt;Boot Parameters&lt;/h3&gt;

&lt;p&gt;The boot prompt is where you tell the installer how to locate your preseed file. It is also where you can pass a fixed number of preseed directives. In our example of assigning a static IP address, you’d pass things like IP address, hostname, domain, and netmask. Ultimately, I wasn’t too interested in this approach because it required a lot of typing without clipboard access.&lt;/p&gt;
&lt;div style='text-align: center;'&gt;
&lt;p&gt;&lt;img src='http://assets.hectcastro.me/images/ubuntu-boot-prompt.png' alt='Ballooning' /&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;h3 id='reevaluating_network_configuration'&gt;Re-evaluating Network Configuration&lt;/h3&gt;

&lt;p&gt;The Ubuntu Help wiki has a suggested &lt;a href='https://help.ubuntu.com/11.10/installation-guide/i386/preseed-contents.html'&gt;hack&lt;/a&gt; to trigger re-evaluation of preseeded network configuration settings by executing commands via &lt;code&gt;preseed/run&lt;/code&gt;. Unfortunately, I was unable to get this to work successfully. In every combination I tried, it resulted in the installer failing. This related &lt;a href='http://ubuntuforums.org/showthread.php?t=1494309'&gt;Ubuntu Forums post&lt;/a&gt; outlines the suggested steps pretty well.&lt;/p&gt;

&lt;h3 id='overwriting_network_configuration'&gt;Overwriting Network Configuration&lt;/h3&gt;

&lt;p&gt;Eventually this is the solution I used to assign a static IP address. It’s a hack, but in my eyes it was the lesser of three evils. Alongside each node’s preseed configuration file, I created a corresponding shell script. The shell script gets executed before the installer triggers a reboot and overwrites &lt;code&gt;/etc/network/interfaces&lt;/code&gt; with a static IP configuration:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='bash'&gt;&lt;span class='nb'&gt;echo&lt;/span&gt; &lt;span class='s2'&gt;&amp;quot;auto lo&lt;/span&gt;
&lt;span class='s2'&gt;iface lo inet loopback&lt;/span&gt;

&lt;span class='s2'&gt;auto eth0&lt;/span&gt;
&lt;span class='s2'&gt;iface eth0 inet static&lt;/span&gt;
&lt;span class='s2'&gt; address 192.168.1.10&lt;/span&gt;
&lt;span class='s2'&gt; netmask 255.255.255.0&lt;/span&gt;
&lt;span class='s2'&gt; gateway 192.168.1.1&lt;/span&gt;
&lt;span class='s2'&gt;&amp;quot;&lt;/span&gt; &amp;gt; /etc/network/interfaces
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;p&gt;For completeness, I included my &lt;a href='https://gist.github.com/1377459'&gt;preseed configuration file&lt;/a&gt; as a Gist. If anyone has a better approach to setting a static IP address via preseeding or Kickstart, let me know!&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/37OISGM5smg" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2011/11/18/preseeding-ubuntu-server-and-static-ip-addresses.html</feedburner:origLink></entry>
 
 <entry>
   <title>Testing Command-line Applications with Aruba</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/AYYvPGVfGac/testing-command-line-applications-with-aruba.html" />
   <updated>2011-10-25T00:00:00-07:00</updated>
   <id>http://hectcastro.me/2011/10/25/testing-command-line-applications-with-aruba</id>
   <content type="html">&lt;p&gt;&lt;a href='http://cukes.info/'&gt;Cucumber&lt;/a&gt; is often used to test web applications. Many developers hook it into their Rails projects to integration test site features. Wouldn’t it be great if there were a way to test command-line applications in a similar fashion? You can with &lt;a href='https://github.com/cucumber/aruba'&gt;Aruba&lt;/a&gt;.&lt;/p&gt;

&lt;h2 id='aruba'&gt;Aruba&lt;/h2&gt;

&lt;p&gt;Aruba is a Cucumber extension for testing command-line applications written in any language. Passing arguments, interacting with the file system, capturing exit codes, and mimicking interactive usage are all features provided out of the box. Below is a basic test for the &lt;code&gt;mv&lt;/code&gt; command that passes:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='cucumber'&gt;&lt;span class='nc'&gt;Scenario:&lt;/span&gt;&lt;span class='no'&gt; Backing up test.conf&lt;/span&gt;
&lt;span class='k'&gt;  When &lt;/span&gt;I run `mv test.conf test.conf.bak`
  &lt;span class='k'&gt;Then &lt;/span&gt;the output should contain:
  &lt;span class='s'&gt;&amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
&lt;span class='s'&gt;  mv: rename test.conf to test.conf.bak: No such file or directory&lt;/span&gt;
&lt;span class='s'&gt;  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;p&gt;Now let’s showoff a few of Aruba’s built-in steps to prevent the command from failing:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='cucumber'&gt;&lt;span class='nc'&gt;Scenario:&lt;/span&gt;&lt;span class='no'&gt; Backing up test.conf&lt;/span&gt;
&lt;span class='k'&gt;  Given &lt;/span&gt;an empty file named &lt;span class='s'&gt;&amp;quot;test.conf&amp;quot;&lt;/span&gt;
  &lt;span class='k'&gt;When &lt;/span&gt;I run `mv test.conf test.conf.bak`
  &lt;span class='k'&gt;Then &lt;/span&gt;the exit status should be 0
  &lt;span class='k'&gt;And &lt;/span&gt;the following files should exist:
    | test.conf.bak |
  &lt;span class='k'&gt;And &lt;/span&gt;the following files should not exist:
    | test.conf     |
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;p&gt;The first step creates an empty file and executes &lt;code&gt;mv&lt;/code&gt; inside of Aruba’s sandbox directory. After the &lt;code&gt;mv&lt;/code&gt; command is executed, its exit status is compared to &lt;code&gt;0&lt;/code&gt; and the existence of &lt;code&gt;test.conf.bak&lt;/code&gt; (and non-existence of &lt;code&gt;test.conf&lt;/code&gt;) is confirmed.&lt;/p&gt;

&lt;p&gt;It’s also worth noting that after each scenario Aruba clears out its sandbox &amp;#8212; a temporary directory that becomes the current working directory for your command-line tool &amp;#8212; unless you explicitly tag the scenario with &lt;code&gt;@no-clobber&lt;/code&gt;. This tag preserves the previous scenario’s final state. Tying this back to the example above, the next scenario would begin with only &lt;code&gt;test.conf.bak&lt;/code&gt; in the sandbox. Additional Aruba-specific tags can be found in the &lt;a href='https://github.com/cucumber/aruba#readme'&gt;README&lt;/a&gt;.&lt;/p&gt;

&lt;h2 id='extending_the_aruba_api'&gt;Extending the Aruba API&lt;/h2&gt;

&lt;p&gt;As a command-line application evolves, other conditions not available in Aruba’s built-in API will require testing. For example, say you need to assert a file’s user and group attributes. Because Aruba’s API was built using Ruby modules, it can be reopened inside of Cucumber’s &lt;code&gt;env.rb&lt;/code&gt;:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='ruby'&gt;&lt;span class='k'&gt;module&lt;/span&gt; &lt;span class='nn'&gt;Aruba&lt;/span&gt;
  &lt;span class='k'&gt;module&lt;/span&gt; &lt;span class='nn'&gt;Api&lt;/span&gt;
    &lt;span class='k'&gt;def&lt;/span&gt; &lt;span class='nf'&gt;check_file_owner_and_group&lt;/span&gt;&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='n'&gt;paths_and_users_and_groups&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;
      &lt;span class='n'&gt;prep_for_fs_check&lt;/span&gt; &lt;span class='k'&gt;do&lt;/span&gt; &lt;span class='c1'&gt;# Lower-level function provided by Aruba&lt;/span&gt;
        &lt;span class='n'&gt;paths_and_users_and_groups&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;each&lt;/span&gt; &lt;span class='k'&gt;do&lt;/span&gt; &lt;span class='o'&gt;|&lt;/span&gt;&lt;span class='n'&gt;path&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='n'&gt;user&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='n'&gt;group&lt;/span&gt;&lt;span class='o'&gt;|&lt;/span&gt;
          &lt;span class='n'&gt;stat&lt;/span&gt; &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='no'&gt;File&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;stat&lt;/span&gt;&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='n'&gt;path&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;

          &lt;span class='no'&gt;Etc&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;getpwuid&lt;/span&gt;&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='n'&gt;stat&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;uid&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;name&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;should&lt;/span&gt; &lt;span class='o'&gt;==&lt;/span&gt; &lt;span class='n'&gt;user&lt;/span&gt;
          &lt;span class='no'&gt;Etc&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;getgrgid&lt;/span&gt;&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='n'&gt;stat&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;gid&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;name&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;should&lt;/span&gt; &lt;span class='o'&gt;==&lt;/span&gt; &lt;span class='n'&gt;group&lt;/span&gt;
        &lt;span class='k'&gt;end&lt;/span&gt;
      &lt;span class='k'&gt;end&lt;/span&gt;
    &lt;span class='k'&gt;end&lt;/span&gt;
  &lt;span class='k'&gt;end&lt;/span&gt;
&lt;span class='k'&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;p&gt;Then create a matcher:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='ruby'&gt;&lt;span class='no'&gt;Then&lt;/span&gt;&lt;span class='sr'&gt; /^the following files should have username &amp;quot;([^&amp;quot;]*)&amp;quot; and group &amp;quot;([^&amp;quot;]*)&amp;quot;:$/&lt;/span&gt; &lt;span class='k'&gt;do&lt;/span&gt; &lt;span class='o'&gt;|&lt;/span&gt;&lt;span class='n'&gt;user&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='n'&gt;group&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='n'&gt;files&lt;/span&gt;&lt;span class='o'&gt;|&lt;/span&gt;
  &lt;span class='n'&gt;check_file_owner_and_group&lt;/span&gt;&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='n'&gt;files&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;raw&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;map&lt;/span&gt; &lt;span class='p'&gt;{&lt;/span&gt; &lt;span class='o'&gt;|&lt;/span&gt;&lt;span class='n'&gt;file_row&lt;/span&gt;&lt;span class='o'&gt;|&lt;/span&gt; &lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='n'&gt;file_row&lt;/span&gt; &lt;span class='o'&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class='n'&gt;user&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt; &lt;span class='o'&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class='n'&gt;group&lt;/span&gt; &lt;span class='p'&gt;})&lt;/span&gt;
&lt;span class='k'&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;p&gt;Now that step can be included to test the user and group attributes of files:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='cucumber'&gt;&lt;span class='nc'&gt;Scenario:&lt;/span&gt;&lt;span class='no'&gt; Backing up test.conf&lt;/span&gt;
&lt;span class='k'&gt;  Given &lt;/span&gt;an empty file named &lt;span class='s'&gt;&amp;quot;test.conf&amp;quot;&lt;/span&gt;
  &lt;span class='k'&gt;When &lt;/span&gt;I run `mv test.conf test.conf.bak`
  &lt;span class='k'&gt;And &lt;/span&gt;the exit status should be 0
  &lt;span class='k'&gt;And &lt;/span&gt;the following files should exist:
    | test.conf.bak |
  &lt;span class='k'&gt;And &lt;/span&gt;the following files should not exist:
    | test.conf     |
  &lt;span class='k'&gt;And &lt;/span&gt;the following files should have username &lt;span class='s'&gt;&amp;quot;hector&amp;quot;&lt;/span&gt; and group &lt;span class='s'&gt;&amp;quot;staff&amp;quot;&lt;/span&gt;:
    | test.conf.bak |
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;h2 id='conclusion'&gt;Conclusion&lt;/h2&gt;

&lt;p&gt;Using a behavior-driven development approach for building command-line applications with Cucumber and Aruba was a pleasure. Aruba’s API covers a decent amount of ground and was easily expandable. The source code was straightforward and after skimming its internals, I was able to expand the API to meet my needs. Hopefully reading this will help you do the same.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/AYYvPGVfGac" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2011/10/25/testing-command-line-applications-with-aruba.html</feedburner:origLink></entry>
 
 <entry>
   <title>Replacing Excel with the Eighteenth Letter of the Alphabet</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/BR1NOGFQ4-U/replacing-excel-with-the-eighteenth-letter-of-the-alphabet.html" />
   <updated>2011-07-18T00:00:00-07:00</updated>
   <id>http://hectcastro.me/2011/07/18/replacing-excel-with-the-eighteenth-letter-of-the-alphabet</id>
   <content type="html">&lt;p&gt;Every once in a while I have to graph data in order to better understand it. Most of the time, I use &lt;a href='http://office.microsoft.com/en-us/excel/'&gt;Microsoft Excel&lt;/a&gt; to generate graphs because it’s one of the easiest ways to produce them. Unfortunately, Excel’s ease of use quickly degrades once you move beyond pasting data into cells and clicking the graph button. I started looking for something to make the process of visualizing data more flexible. Something with pluggable libraries, helpful examples, and room for reproducibility &amp;#8212; I ended up replacing Excel with R.&lt;/p&gt;

&lt;h2 id='r'&gt;R&lt;/h2&gt;

&lt;p&gt;R is an open source programming language for statistical computing and publication-quality graphics. Different than a general purpose programming language, R’s core includes many features designed to empower statisticians. I’m no statistician, but I was intrigued by its approachable syntax and familiar data structures. Even more, it’s backed by a potent community that contributes numerous packages to solve all sorts common problems.&lt;/p&gt;

&lt;p&gt;R code is typically interpreted in a &lt;a href='http://en.wikipedia.org/wiki/Read-eval-print_loop'&gt;REPL&lt;/a&gt;, but can also be captured in a file and executed as a script. To reduce my learning curve, I looked into an IDE for R. I settled on &lt;a href='http://rstudio.org/'&gt;RStudio&lt;/a&gt; because it has a clean UI, was easy to install, and has its source code hosted on &lt;a href='https://github.com/rstudio/rstudio'&gt;GitHub&lt;/a&gt;. If you’re going to explore R for the first time, I’d strongly encourage the use of an IDE. It makes searching documentation, viewing graphs, and inspecting output simple.&lt;/p&gt;

&lt;h2 id='graphing'&gt;Graphing&lt;/h2&gt;

&lt;p&gt;My first task for R was to parse application logs and graph the frequency of specific user interactions. The application being logged provides a web interface to query financial data sets. The log records queries, so my goal was to plot each distinct data set and the number of times it was queried.&lt;/p&gt;

&lt;p&gt;After &lt;a href='http://amzn.com/B004VB3UYW'&gt;reading&lt;/a&gt; through several examples of R’s standard graphing functions, I stumbled upon &lt;a href='http://had.co.nz/ggplot2/'&gt;ggplot2&lt;/a&gt;. The ggplot2 package brands itself as the “Grammar of Graphics.” A graphing system that takes what’s good about R’s and omits the bad. In comparison to the base graphing library, ggplot2’s syntax is slightly more intuitive. This, and the fact that several answers on &lt;a href='http://stackoverflow.com/'&gt;Stack Overflow&lt;/a&gt; recommend it, compelled me give it a try.&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='r'&gt;library&lt;span class='p'&gt;(&lt;/span&gt;plyr&lt;span class='p'&gt;)&lt;/span&gt;
library&lt;span class='p'&gt;(&lt;/span&gt;ggplot2&lt;span class='p'&gt;)&lt;/span&gt;

&lt;span class='c1'&gt;# Import a pipe-delimited file without a header row.&lt;/span&gt;
requests &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; read.csv&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='s'&gt;&amp;quot;requests.dat&amp;quot;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; header&lt;span class='o'&gt;=&lt;/span&gt;&lt;span class='kc'&gt;FALSE&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; sep&lt;span class='o'&gt;=&lt;/span&gt;&lt;span class='s'&gt;&amp;quot;|&amp;quot;&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;

&lt;span class='c1'&gt;# Extract a subset of the requested data sets (column V8)&lt;/span&gt;
&lt;span class='c1'&gt;# with a frequency greater than 2000.&lt;/span&gt;
data_set_freq &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; subset&lt;span class='p'&gt;(&lt;/span&gt;count&lt;span class='p'&gt;(&lt;/span&gt;requests&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='s'&gt;&amp;#39;V8&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;),&lt;/span&gt; freq &lt;span class='o'&gt;&amp;gt;&lt;/span&gt; &lt;span class='m'&gt;2000&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;

&lt;span class='c1'&gt;# Rename columns and rows.&lt;/span&gt;
colnames&lt;span class='p'&gt;(&lt;/span&gt;data_set_freq&lt;span class='p'&gt;)&lt;/span&gt; &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; c&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='s'&gt;&amp;#39;data_set&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='s'&gt;&amp;#39;freq&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;
row.names&lt;span class='p'&gt;(&lt;/span&gt;data_set_freq&lt;span class='p'&gt;)&lt;/span&gt; &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; data_set_freq&lt;span class='p'&gt;$&lt;/span&gt;data_set

&lt;span class='c1'&gt;# Plot it.&lt;/span&gt;
ggplot&lt;span class='p'&gt;(&lt;/span&gt;data_set_freq&lt;span class='p'&gt;,&lt;/span&gt; aes&lt;span class='p'&gt;(&lt;/span&gt;factor&lt;span class='p'&gt;(&lt;/span&gt;data_set&lt;span class='p'&gt;),&lt;/span&gt; freq&lt;span class='p'&gt;))&lt;/span&gt; &lt;span class='o'&gt;+&lt;/span&gt; 
  geom_bar&lt;span class='p'&gt;()&lt;/span&gt; &lt;span class='o'&gt;+&lt;/span&gt; 
  labs&lt;span class='p'&gt;(&lt;/span&gt;y &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='s'&gt;&amp;quot;Web Queries&amp;quot;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; x &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='s'&gt;&amp;quot;Data Sets&amp;quot;&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;p&gt;The short snippet of code above produces the following graph:&lt;/p&gt;
&lt;div style='text-align: center;'&gt;
&lt;p&gt;&lt;img src='http://assets.hectcastro.me/images/2011-web-queries-by-data-set.png' alt='2011 Web Queries by Data Set' /&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;p&gt;Now, to take that a step further, I wanted to figure out which data set is associated with the most failures. Building on the code above, here’s how I accomplished that:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='r'&gt;&lt;span class='c1'&gt;# Extract a subset of data set names (column V8) for&lt;/span&gt;
&lt;span class='c1'&gt;# failed requests (column V15).&lt;/span&gt;
data_set_errors_freq &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; count&lt;span class='p'&gt;(&lt;/span&gt;subset&lt;span class='p'&gt;(&lt;/span&gt;requests&lt;span class='p'&gt;,&lt;/span&gt;
  grepl&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='s'&gt;&amp;#39;ERROR|killed&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; requests&lt;span class='p'&gt;$&lt;/span&gt;V15&lt;span class='p'&gt;),&lt;/span&gt; select &lt;span class='o'&gt;=&lt;/span&gt; c&lt;span class='p'&gt;(&lt;/span&gt;V8&lt;span class='p'&gt;)),&lt;/span&gt; &lt;span class='s'&gt;&amp;#39;V8&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;

&lt;span class='c1'&gt;# Rename columns and rows.&lt;/span&gt;
colnames&lt;span class='p'&gt;(&lt;/span&gt;data_set_errors_freq&lt;span class='p'&gt;)&lt;/span&gt; &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; c&lt;span class='p'&gt;(&lt;/span&gt;&lt;span class='s'&gt;&amp;#39;data_set&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='s'&gt;&amp;#39;freq&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;
row.names&lt;span class='p'&gt;(&lt;/span&gt;data_set_errors_freq&lt;span class='p'&gt;)&lt;/span&gt; &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; data_set_errors_freq&lt;span class='p'&gt;$&lt;/span&gt;data_set

&lt;span class='c1'&gt;# Merge the data set frequencies with errors and create&lt;/span&gt;
&lt;span class='c1'&gt;# a third column for percent error.&lt;/span&gt;
data_set_summary &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; merge&lt;span class='p'&gt;(&lt;/span&gt;data_set_freq&lt;span class='p'&gt;,&lt;/span&gt; data_set_errors_freq&lt;span class='p'&gt;,&lt;/span&gt;
  by.x &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='s'&gt;&amp;#39;data_set&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; by.y &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='s'&gt;&amp;#39;data_set&amp;#39;&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;
data_set_summary &lt;span class='o'&gt;&amp;lt;-&lt;/span&gt; ddply&lt;span class='p'&gt;(&lt;/span&gt;data_set_summary&lt;span class='p'&gt;,&lt;/span&gt; &lt;span class='m'&gt;.&lt;/span&gt;&lt;span class='p'&gt;(&lt;/span&gt;data_set&lt;span class='p'&gt;),&lt;/span&gt; transform&lt;span class='p'&gt;,&lt;/span&gt;
  percent_error &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='p'&gt;(&lt;/span&gt;freq.y &lt;span class='o'&gt;/&lt;/span&gt; freq.x&lt;span class='p'&gt;)&lt;/span&gt; &lt;span class='o'&gt;*&lt;/span&gt; &lt;span class='m'&gt;100&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;

&lt;span class='c1'&gt;# Plot it.&lt;/span&gt;
ggplot&lt;span class='p'&gt;(&lt;/span&gt;data_set_summary&lt;span class='p'&gt;,&lt;/span&gt; aes&lt;span class='p'&gt;(&lt;/span&gt;factor&lt;span class='p'&gt;(&lt;/span&gt;data_set&lt;span class='p'&gt;),&lt;/span&gt; freq.x&lt;span class='p'&gt;,&lt;/span&gt; fill&lt;span class='o'&gt;=&lt;/span&gt;percent_error&lt;span class='p'&gt;))&lt;/span&gt; &lt;span class='o'&gt;+&lt;/span&gt; 
  geom_bar&lt;span class='p'&gt;()&lt;/span&gt; &lt;span class='o'&gt;+&lt;/span&gt; 
  labs&lt;span class='p'&gt;(&lt;/span&gt;y &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='s'&gt;&amp;quot;Web Queries&amp;quot;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; x &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='s'&gt;&amp;quot;Data Sets&amp;quot;&lt;/span&gt;&lt;span class='p'&gt;,&lt;/span&gt; fill&lt;span class='o'&gt;=&lt;/span&gt;&lt;span class='s'&gt;&amp;quot;Precent Error&amp;quot;&lt;/span&gt;&lt;span class='p'&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;p&gt;A similar graph to the one above, except that this one contains colors based on the percentage of errors:&lt;/p&gt;
&lt;div style='text-align: center;'&gt;
&lt;p&gt;&lt;img src='http://assets.hectcastro.me/images/2011-web-queries-by-data-set-with-percent-error.png' alt='2011 Web Queries by Data Set with Percent Error' /&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;h2 id='conclusion'&gt;Conclusion&lt;/h2&gt;

&lt;p&gt;Building graphs with R feels a lot like building servers with &lt;a href='http://hectcastro.me/2011/05/17/monday-morning-system-administrator.html'&gt;Chef&lt;/a&gt;. I can configure a server once manually, or I can write code that automates its deployment process forever. Likewise, I can paste data into Excel and point and click to build a graph, or I can write R code that reproduces a handful of steps with one command. There are a number of GUI tools that build graphs from data, but once you begin applying filters, merging data sets, or running calculations these tools break down. Writing code instead of clicking buttons has its downsides, but there is something comforting in knowing that as long as our log structure doesn’t change, I’ll be able to reproduce these graphs six months from now and immedietally know which data sets are most error prone.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/BR1NOGFQ4-U" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2011/07/18/replacing-excel-with-the-eighteenth-letter-of-the-alphabet.html</feedburner:origLink></entry>
 
 <entry>
   <title>Monday Morning System Administrator</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/7FdSnNFxVgw/monday-morning-system-administrator.html" />
   <updated>2011-05-17T00:00:00-07:00</updated>
   <id>http://hectcastro.me/2011/05/17/monday-morning-system-administrator</id>
   <content type="html">&lt;p&gt;Not long ago I purchased a virtual private server from &lt;a href='http://www.linode.com/?r=4e65749307f4ee44b8ee7124bec30a2cd257249c'&gt;Linode&lt;/a&gt;. At the time, the most recent &lt;a href='http://www.ubuntu.com/'&gt;Ubuntu&lt;/a&gt; release was Maverick Meerkat (&lt;code&gt;10.10&lt;/code&gt;). The first Sunday after purchasing, I sat down for a few hours and configured every aspect of the machine manually &amp;#8212; it was going to be my personal web and application server. Feeling like I had accomplished something, I logged out and nodded in approval.&lt;/p&gt;

&lt;p&gt;Fast-forward five months and Ubuntu’s most recent release is Natty Narwhal (&lt;code&gt;11.04&lt;/code&gt;). Naturally, I wanted to upgrade, but a clean install would require another couple of hours on a Sunday afternoon. If I was going to invest time in configuring the server again, how could I configure it in a way that would apply for the next release? The answer I settled on was &lt;a href='http://www.opscode.com/chef/'&gt;Chef&lt;/a&gt;.&lt;/p&gt;

&lt;h2 id='chef'&gt;Chef&lt;/h2&gt;

&lt;p&gt;Chef is an open source framework that allows you to build infrastructure from code. Explained another way, Chef allows you to checkout code from a repository and execute it against a base operating system install &amp;#8212; producing a fully operational web server, database server, etc. No replaying steps off of a build document. No complex combinations of Perl and Bash scripts. No more manual configuration.&lt;/p&gt;

&lt;p&gt;Chef comes in two flavors: server and solo. With &lt;a href='http://wiki.opscode.com/display/chef/Chef+Server'&gt;Chef Server&lt;/a&gt;, a client (your laptop) sends configuration directives to a central server and that server propagates those directives to target nodes. The server can exist within your data center, or you can leverage &lt;a href='http://www.opscode.com/platform/'&gt;Opscode’s Platform&lt;/a&gt; (Chef Server as a service). The alternative is &lt;a href='http://wiki.opscode.com/display/chef/Chef+Solo'&gt;Chef Solo&lt;/a&gt;. Chef Solo operates without Chef Server by sending directives directly to target nodes.&lt;/p&gt;

&lt;p&gt;In the end I chose Chef Solo, but don&amp;#8217;t let that impact your evaluation of Chef Server. Opscode makes it easy to test drive their platform by giving you up to five free target nodes.&lt;/p&gt;

&lt;h2 id='forward_progress'&gt;Forward Progress&lt;/h2&gt;

&lt;p&gt;Coming back to the story I began in the section above, I opened up my laptop on a Saturday morning and launched VMware Fusion to run through the installation of Ubuntu &lt;code&gt;11.04&lt;/code&gt;. Following the installation, I rebooted the virtual machine and created a snapshot (restoring to the snapshot provided an easy way to rapidly reproduce a freshly installed operating system). After a few hours on Saturday and two to three on Sunday, I had put together several Chef &lt;a href='https://github.com/hectcastro/cookbooks'&gt;cookbooks&lt;/a&gt;. When executed, the cookbooks turned a totally bare virtual machine into a fully functional web server.&lt;/p&gt;

&lt;p&gt;On Monday morning I created a snapshot of my Linode VPS and wiped the virtual disks. Then, I instructed Linode to rebuild my VPS with its custom Ubuntu &lt;code&gt;11.04&lt;/code&gt; image. When the machine came back up I replayed my Chef cookbooks against it &amp;#8212; in a little under six minutes HTTP requests were being served.&lt;/p&gt;

&lt;h2 id='conclusion'&gt;Conclusion&lt;/h2&gt;

&lt;p&gt;With around six hours worth of work, I was able to write code that could successfully redeploy a machine in under six minutes. I had been exposed to Chef before, but actually going through the steps to build a server from start to finish enlightened me &amp;#8212; I don’t think I’ll ever want to manually configure a server again. Build documents are a good starting point for consistently spinning up machines, but when a machine is configured with Chef I can be absolutely sure that its configuration matches its cookbooks. I have &lt;em&gt;never&lt;/em&gt; been able to say the same for a manually configured machine based on build documents.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/7FdSnNFxVgw" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2011/05/17/monday-morning-system-administrator.html</feedburner:origLink></entry>
 
 <entry>
   <title>VMware: A Gift and a Curse</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/cbzwrpCqaRg/vmware-a-gift-and-a-curse.html" />
   <updated>2011-03-20T00:00:00-07:00</updated>
   <id>http://hectcastro.me/2011/03/20/vmware-a-gift-and-a-curse</id>
   <content type="html">&lt;p&gt;I spent most of my free time over the past few days troubleshooting a puzzling combination of failures across databases, JVMs, operating systems, and virtual machine hypervisors. Because of the sheer amount of time I lost fiddling with settings, bugging colleagues, and reading white papers &amp;#8212; I figured I would summarize my findings in a post to save others from the same misfortune.&lt;/p&gt;

&lt;h2 id='the_setup'&gt;The Setup&lt;/h2&gt;

&lt;p&gt;WRDS recently acquired several massive data sets from the &lt;a href='http://www.nyxdata.com/Data-Products/Daily-TAQ'&gt;New York Stock Exchange&lt;/a&gt;, and I’ve been looking at different storage engines to help accommodate it. One database that has proven it can handle over a hundred terabytes of data is &lt;a href='http://cassandra.apache.org/'&gt;Cassandra&lt;/a&gt;. Cassandra is a distributed database that is designed to scale horizontally across many servers in a tunable, highly consistent or highly available manner. Another strong contender is &lt;a href='http://hadoop.apache.org/'&gt;Hadoop&lt;/a&gt;, which shares some architectural similarities with Cassandra, but has a much broader scope. Hadoop could end up being a better solution, but I wanted to investigate Cassandra first.&lt;/p&gt;

&lt;p&gt;After reading a &lt;a href='http://www.amazon.com/Cassandra-Definitive-Guide-Eben-Hewitt/dp/1449390412'&gt;book&lt;/a&gt; and several &lt;a href='http://www.datastax.com/dev/tutorials/getting_started_0_7/index'&gt;guides&lt;/a&gt; on Cassandra, I set out to build a cluster. Because Cassandra is a distributed database, it is recommended that clusters be at least 3-4 nodes. I took this recommendation and provisioned four identical Linux virtual machines in our local VMware environment.&lt;/p&gt;

&lt;p&gt;This is where things began to get ugly.&lt;/p&gt;

&lt;h2 id='clear_as_mud'&gt;Clear as Mud&lt;/h2&gt;

&lt;p&gt;Cassandra is written in Java and runs within a Java Virtual Machine. It comes bundled with scripts that dynamically assign JVM constraints based on physical machine resources &amp;#8212; maximum heap size, parallel garbage collection, and the enabling of &lt;a href='http://en.wikipedia.org/wiki/Java_Management_Extensions'&gt;Java Management Extensions&lt;/a&gt;. I read through these scripts and agreed with most of the defaults. Now the only thing left to do was to spawn Cassandra instances.&lt;/p&gt;

&lt;p&gt;Shortly after spawning one instance per node, Cassandra processes began dying abruptly without stacktraces. I inspected logs for exceptions but there were none. I restarted the Cassandra instances and connected to the JVMs remotely via JMX. This showed stable heap sizes, but also revealed that out-of-nowhere physical memory utilization was jumping to &lt;code&gt;99%&lt;/code&gt;. To alleviate the sudden memory pressure, &lt;a href='http://lwn.net/Articles/317814/'&gt;oom-killer&lt;/a&gt; began killing processes &amp;#8212; seemingly at random.&lt;/p&gt;

&lt;p&gt;I restarted Cassandra instances again, this time connecting remotely via JMX and SSH &amp;#8212; JMX to monitor the JVM and SSH to monitor per-process memory consumption. As JMX reported increased physical memory utilization, so did &lt;a href='http://en.wikipedia.org/wiki/Top_(software'&gt;top&lt;/a&gt;) &amp;#8212; the only problem was that Java held &lt;code&gt;~50%&lt;/code&gt; of all the memory (expected because of the maximum heap size settings), while all of the other machine’s processes held &lt;code&gt;2%&lt;/code&gt;. &lt;code&gt; 52%&lt;/code&gt; does not equal &lt;code&gt;99%&lt;/code&gt;, so something else was consuming memory.&lt;/p&gt;

&lt;p&gt;Because the operating system metrics weren’t adding up, I decided to investigate the virtual machine settings. After reading a VMware &lt;a href='http://www.vmware.com/files/pdf/perf-vsphere-memory_management.pdf'&gt;memory management white paper&lt;/a&gt;, it became clear that a hypervisor memory reclamation technique known as &lt;em&gt;ballooning&lt;/em&gt;, combined with virtual machine cluster resource overcommitment, were at the root of my problems.&lt;/p&gt;

&lt;h2 id='ballooning'&gt;Ballooning&lt;/h2&gt;

&lt;p&gt;In the diagram below, a balloon driver is used by the hypervisor to reclaim free guest memory when it is under pressure. The pins in &lt;strong&gt;Figure (b)&lt;/strong&gt; show memory that has been reserved by the driver for hypervisor reclamation. This chunk of memory is guaranteed not to page to disk under any circumstances, and cannot be used by the guest operating system. The combination of this balloon driver reservation, significant resource overcommitment, and a JVM that needs a sufficiently large heap size, all united to produce hard failures.&lt;/p&gt;
&lt;div style='text-align: center;'&gt;
&lt;p&gt;&lt;img src='http://assets.hectcastro.me/images/ballooning.png' alt='Ballooning' /&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;h2 id='conclusion'&gt;Conclusion&lt;/h2&gt;

&lt;p&gt;The tentative solution to this problem was to increase memory allocation reservation for virtual machines individually &amp;#8212; this process is described in &lt;strong&gt;Section 4&lt;/strong&gt; of the white paper. Altering these settings ensures that virtual machines get all of the resources they ask for, regardless of ballooning or resource overcommitment. The real solution is to add more resources to the underlying virtual machine hosts so that hypervisor memory reclamation strategies don’t need to be triggered as often.&lt;/p&gt;

&lt;p&gt;Virtualization is a tremendous tool for data center utilization and consolidation. Unfortunately, it also makes it easy to forget that there is still a dependance on physical hardware. Clicking around and adjusting sliders in a VMware management console is trivial, but being aware of the repercussions those changes have on guest operating systems is still essential to effective virtualization use. After all, virtualization is an intelligent computing strategy &amp;#8212; not magic.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/cbzwrpCqaRg" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2011/03/20/vmware-a-gift-and-a-curse.html</feedburner:origLink></entry>
 
 <entry>
   <title>Ruby and Python Lightning Talks</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/XwHKABX3lH4/ruby-and-python-lightning-talks.html" />
   <updated>2011-02-11T00:00:00-08:00</updated>
   <id>http://hectcastro.me/2011/02/11/ruby-and-python-lightning-talks</id>
   <content type="html">&lt;p&gt;On Tuesday, I was lucky enough to participate in RedSnake 2011 &amp;#8212; an event that included Philadelphia&amp;#8217;s &lt;a href='http://phillyrb.org/'&gt;Ruby&lt;/a&gt; and &lt;a href='http://www.meetup.com/phillypug'&gt;Python&lt;/a&gt; user groups. Approximately 70 local developers and system administrations gathered to hear 11 lightning talks covering a wide range of topics:&lt;/p&gt;

&lt;h2 id='ruby'&gt;Ruby&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;&lt;a href='http://lovely-assistant-rvm.heroku.com/'&gt;Ruby Version Manager&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='https://github.com/tibastral/compass-pres'&gt;Compass&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.trottercashion.com/2011/02/08/rubys-define_method-method_missing-and-instance_eval.html'&gt;Meta Programming&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.slideshare.net/matschaffer/2011-0208-cucumber'&gt;Cucumber&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.slideshare.net/delagoya/ruby-ffi'&gt;Foreign Function Interface&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h2 id='python'&gt;Python&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;virtualenv&lt;/li&gt;

&lt;li&gt;IPython&lt;/li&gt;

&lt;li&gt;Multiprocessing&lt;/li&gt;

&lt;li&gt;System Administration&lt;/li&gt;

&lt;li&gt;Interfacing to C&lt;/li&gt;

&lt;li&gt;&lt;a href='http://artifex.org/~hblanks/talks/2011/pep20_by_example.pdf'&gt;Zen of Python&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Code that generated the slides that accompanied my presentation on &lt;a href='http://rvm.beginrescueend.com/'&gt;RVM&lt;/a&gt; can be found &lt;a href='https://github.com/hectcastro/lovely-assistant-rvm'&gt;here&lt;/a&gt;. I&amp;#8217;m planning to update this post with links to other presentations as they become available. Also, Trevor Lalish Menagh was kind enough to take &lt;a href='http://trevmex.com/post/3198570101/redsnake-1st-annual-philly-ruby-python-meetup-notes'&gt;notes&lt;/a&gt;.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/XwHKABX3lH4" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2011/02/11/ruby-and-python-lightning-talks.html</feedburner:origLink></entry>
 
 <entry>
   <title>Books I Read in 2010</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/OqQOKx8DvV4/books-i-read-in-2010.html" />
   <updated>2010-12-31T00:00:00-08:00</updated>
   <id>http://hectcastro.me/2010/12/31/books-i-read-in-2010</id>
   <content type="html">&lt;p&gt;I often get criticism for only reading technical books &amp;#8212; this year was more of the same. Below are a list of books I read in 2010, and want to read in 2011.&lt;/p&gt;

&lt;h2 id='read'&gt;Read&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;&lt;a href='http://www.amazon.com/Rework-Jason-Fried/dp/0307463745'&gt;Rework&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.amazon.com/Checklist-Manifesto-How-Things-Right/dp/0805091742'&gt;The Checklist Manifesto&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://pragprog.com/titles/ppmetr/metaprogramming-ruby'&gt;Metaprogramming Ruby&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.amazon.com/Rails-AntiPatterns-Refactoring-Addison-Wesley-Professional/dp/0321604814'&gt;Rails Anti-Patterns&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.amazon.com/RESTful-Web-Services-ebook/dp/B0043D2ED6/'&gt;RESTful Web Services&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://pragprog.com/titles/prj/ship-it'&gt;Ship It!&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://pragprog.com/titles/jruby/using-jruby'&gt;Using JRuby&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://progit.org/book/'&gt;Pro Git&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.amazon.com/Official-Ubuntu-Server-Book-ebook/dp/B003YL3OXM'&gt;The Official Ubuntu Server Book&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h2 id='want_to_read'&gt;Want to Read&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;&lt;a href='http://pragprog.com/titles/shcloj/programming-clojure'&gt;Programming Clojure&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.amazon.com/Beginning-Scala-ebook/dp/B002ACP2AY'&gt;Beginning Scala&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.amazon.com/Art-Agile-Development-ebook/dp/B0043D2E18'&gt;The Art of Agile Development&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://www.pragprog.com/titles/achbd/the-rspec-book'&gt;The RSpec Book&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://books.alistapart.com/products/html5-for-web-designers'&gt;HTML5 For Web Designers&lt;/a&gt;&lt;/li&gt;

&lt;li&gt;&lt;a href='http://books.alistapart.com/products/css3-for-web-designers'&gt;CSS3 For Web Designers&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/OqQOKx8DvV4" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2010/12/31/books-i-read-in-2010.html</feedburner:origLink></entry>
 
 <entry>
   <title>Barcamp Philly 2010 Notes</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/KvppCCmU4lg/barcamp-philly-2010-notes.html" />
   <updated>2010-11-14T00:00:00-08:00</updated>
   <id>http://hectcastro.me/2010/11/14/barcamp-philly-2010-notes</id>
   <content type="html">&lt;h2 id='how_to_test_software'&gt;How to Test Software&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;What is the number of people that need to be hit by a truck before your project is unrepairable?&lt;/li&gt;

&lt;li&gt;Eliminate work from constructors.&lt;/li&gt;

&lt;li&gt;&amp;#8220;Singletons are globals with a haircut.&amp;#8221;&lt;/li&gt;

&lt;li&gt;SQLite has 600 times more test code than functional code.&lt;/li&gt;

&lt;li&gt;Continuous integration requires culture change.&lt;/li&gt;

&lt;li&gt;Builds should not fail &amp;#8211; when they do, fixing them is the #1 priority.&lt;/li&gt;

&lt;li&gt;&amp;#8220;If you need a monkey, we can hit the zoo.&amp;#8221;&lt;/li&gt;
&lt;/ul&gt;

&lt;h3 id='steps_to_quality_code'&gt;Steps to Quality Code&lt;/h3&gt;

&lt;ol&gt;
&lt;li&gt;Gather requirements&lt;/li&gt;

&lt;li&gt;Write tests against requirements&lt;/li&gt;

&lt;li&gt;Now you can write the code&lt;/li&gt;
&lt;/ol&gt;

&lt;h3 id='five_ws_or_root_cause_analysis'&gt;Five W&amp;#8217;s or Root Cause Analysis&lt;/h3&gt;

&lt;p&gt;American car makers refused to ever stop the assembly line. Doing so was a fireable offense. Because of this, even small problems that emerged became very expensive to fix. Toyota changed the game &amp;#8211; anyone had the ability to stop the line. When it was stopped, everyone would drop what they were doing and focus on the issue at hand. The line would stop more often, but solving problems happened quicker and cost less.&lt;/p&gt;

&lt;h2 id='pair_programming'&gt;Pair Programming&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Pairing enables rapid learning.&lt;/li&gt;

&lt;li&gt;Learn practically, not necessarily comprehensively (book learning).&lt;/li&gt;

&lt;li&gt;It&amp;#8217;s actually fun.&lt;/li&gt;

&lt;li&gt;Driver + Navigator divide.&lt;/li&gt;

&lt;li&gt;Pairing is hard work &amp;#8211; exhausting.&lt;/li&gt;

&lt;li&gt;Mix of work and research, instead of research on personal time.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2 id='zombies_in_my_workplace'&gt;Zombies In My Workplace&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;&amp;#8220;Change your organization, or change your organization.&amp;#8221;&lt;/li&gt;

&lt;li&gt;Socialize at happy hours, lunches, coffee machine.&lt;/li&gt;

&lt;li&gt;Bitching is fun &amp;#8211; people relate.&lt;/li&gt;

&lt;li&gt;Bitching gets old, and isn&amp;#8217;t good for your health.&lt;/li&gt;

&lt;li&gt;Do not diverge down the bitching road.&lt;/li&gt;

&lt;li&gt;Code tasting (code review) &amp;#8211; smells like legacy code from 1998.&lt;/li&gt;

&lt;li&gt;Don&amp;#8217;t ask for permission.&lt;/li&gt;

&lt;li&gt;Weekly book club on a chapter by chapter basis.&lt;/li&gt;

&lt;li&gt;Find ways to inject pair programming.&lt;/li&gt;
&lt;/ul&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/KvppCCmU4lg" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2010/11/14/barcamp-philly-2010-notes.html</feedburner:origLink></entry>
 
 <entry>
   <title>Test::Unit and Raw Request Body</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/c4VeU5kCu1Y/test-unit-and-raw-request-body.html" />
   <updated>2010-10-23T00:00:00-07:00</updated>
   <id>http://hectcastro.me/2010/10/23/test-unit-and-raw-request-body</id>
   <content type="html">&lt;p&gt;In testing a Rails 3 web API with Test::Unit, I found myself digging through documentation to POST raw JSON to a controller action. After a bit of searching, I decided to take a look at the &lt;a href='http://github.com/rails/rails/blob/3698da65e587c1c33e897c49d9204b3861f89d9d/actionpack/lib/action_dispatch/http/request.rb#L172-178' title='raw_post'&gt;source code&lt;/a&gt; for ActionDispatch::Request&amp;#8217;s &lt;code&gt;raw_post&lt;/code&gt;. Below is the end result:&lt;/p&gt;
&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='ruby'&gt;&lt;span class='nb'&gt;test&lt;/span&gt; &lt;span class='s2'&gt;&amp;quot;should create joke&amp;quot;&lt;/span&gt; &lt;span class='k'&gt;do&lt;/span&gt;
  &lt;span class='vi'&gt;@request&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;env&lt;/span&gt;&lt;span class='o'&gt;[&lt;/span&gt;&lt;span class='s1'&gt;&amp;#39;RAW_POST_DATA&amp;#39;&lt;/span&gt;&lt;span class='o'&gt;]&lt;/span&gt; &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='p'&gt;{&lt;/span&gt;
    &lt;span class='ss'&gt;:body&lt;/span&gt; &lt;span class='o'&gt;=&amp;gt;&lt;/span&gt; &lt;span class='s1'&gt;&amp;#39;Why did the chicken cross the road?&amp;#39;&lt;/span&gt;
  &lt;span class='p'&gt;}&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;to_json&lt;/span&gt;
  &lt;span class='vi'&gt;@request&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;env&lt;/span&gt;&lt;span class='o'&gt;[&lt;/span&gt;&lt;span class='s1'&gt;&amp;#39;CONTENT_TYPE&amp;#39;&lt;/span&gt;&lt;span class='o'&gt;]&lt;/span&gt; &lt;span class='o'&gt;=&lt;/span&gt; &lt;span class='s1'&gt;&amp;#39;application/json&amp;#39;&lt;/span&gt;
  &lt;span class='n'&gt;post&lt;/span&gt; &lt;span class='ss'&gt;:create&lt;/span&gt;

  &lt;span class='n'&gt;assert_response&lt;/span&gt; &lt;span class='ss'&gt;:success&lt;/span&gt;
  &lt;span class='n'&gt;assert_not_nil&lt;/span&gt; &lt;span class='vi'&gt;@response&lt;/span&gt;&lt;span class='o'&gt;.&lt;/span&gt;&lt;span class='n'&gt;headers&lt;/span&gt;&lt;span class='o'&gt;[&lt;/span&gt;&lt;span class='s1'&gt;&amp;#39;Location&amp;#39;&lt;/span&gt;&lt;span class='o'&gt;]&lt;/span&gt;
&lt;span class='k'&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/c4VeU5kCu1Y" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2010/10/23/test-unit-and-raw-request-body.html</feedburner:origLink></entry>
 
 <entry>
   <title>RVM and Cron</title>
   <link href="http://feedproxy.google.com/~r/tradingfish/~3/OwgBtiUJnVU/rvm-and-cron.html" />
   <updated>2010-06-21T00:00:00-07:00</updated>
   <id>http://hectcastro.me/2010/06/21/rvm-and-cron</id>
   <content type="html">&lt;p&gt;Recently I found myself having to execute a cronjob using specific rubies and gemsets contained within &lt;a href='http://rvm.beginrescueend.com/' title='RVM'&gt;RVM&lt;/a&gt;. The following snippet is what I used as a solution:&lt;/p&gt;

&lt;p&gt;&lt;div class='highlight'&gt;&lt;pre&gt;&lt;code class='bash'&gt;@hourly bash -l -c &lt;span class='s1'&gt;&amp;#39;rvm use rbx@gemset &amp;amp;&amp;amp; rake cron&amp;#39;&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;&lt;/p&gt;

&lt;p&gt;The &lt;code&gt;-l&lt;/code&gt; forces &lt;a href='http://www.gnu.org/software/bash/' title='bash'&gt;bash&lt;/a&gt; to act as if it had been invoked as a login shell. The &lt;code&gt;-c&lt;/code&gt; tells it to read the string that follows.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/tradingfish/~4/OwgBtiUJnVU" height="1" width="1"/&gt;</content>
   <author>
     <name>Hector Castro</name>
     <uri>http://hectcastro.me</uri>
   </author>
 <feedburner:origLink>http://hectcastro.me/2010/06/21/rvm-and-cron.html</feedburner:origLink></entry>
 
</feed>

