<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">

<channel>
	<title>Engine Yard Ruby on Rails Blog</title>
	
	<link>http://www.engineyard.com/blog</link>
	<description />
	<lastBuildDate>Mon, 30 Aug 2010 20:33:33 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.0</generator>
		<feedburner:info uri="engineyard" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://www.engineyard.com/feed/" /><feedburner:feedFlare href="http://add.my.yahoo.com/rss?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://us.i1.yimg.com/us.yimg.com/i/us/my/addtomyyahoo4.gif">Subscribe with My Yahoo!</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsgator.com/ngs/subscriber/subext.aspx?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.newsgator.com/images/ngsub1.gif">Subscribe with NewsGator</feedburner:feedFlare><feedburner:feedFlare href="http://feeds.my.aol.com/add.jsp?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://o.aolcdn.com/favorites.my.aol.com/webmaster/ffclient/webroot/locale/en-US/images/myAOLButtonSmall.gif">Subscribe with My AOL</feedburner:feedFlare><feedburner:feedFlare href="http://www.bloglines.com/sub/http://www.engineyard.com/feed/" src="http://www.bloglines.com/images/sub_modern11.gif">Subscribe with Bloglines</feedburner:feedFlare><feedburner:feedFlare href="http://www.netvibes.com/subscribe.php?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.netvibes.com/img/add2netvibes.gif">Subscribe with Netvibes</feedburner:feedFlare><feedburner:feedFlare href="http://fusion.google.com/add?feedurl=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://buttons.googlesyndication.com/fusion/add.gif">Subscribe with Google</feedburner:feedFlare><feedburner:feedFlare href="http://www.pageflakes.com/subscribe.aspx?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.pageflakes.com/ImageFile.ashx?instanceId=Static_4&amp;fileName=ATP_blu_91x17.gif">Subscribe with Pageflakes</feedburner:feedFlare><feedburner:feedFlare href="http://www.plusmo.com/add?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://plusmo.com/res/graphics/fbplusmo.gif">Subscribe with Plusmo</feedburner:feedFlare><feedburner:feedFlare href="http://www.thefreedictionary.com/_/hp/AddRSS.aspx?http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://img.tfd.com/hp/addToTheFreeDictionary.gif">Subscribe with The Free Dictionary</feedburner:feedFlare><feedburner:feedFlare href="http://www.bitty.com/manual/?contenttype=rssfeed&amp;contentvalue=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.bitty.com/img/bittychicklet_91x17.gif">Subscribe with Bitty Browser</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsalloy.com/?rss=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.newsalloy.com/subrss3.gif">Subscribe with NewsAlloy</feedburner:feedFlare><feedburner:feedFlare href="http://www.live.com/?add=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://tkfiles.storage.msn.com/x1piYkpqHC_35nIp1gLE68-wvzLZO8iXl_JMledmJQXP-XTBOLfmQv4zhj4MhcWEJh_GtoBIiAl1Mjh-ndp9k47If7hTaFno0mxW9_i3p_5qQw">Subscribe with Live.com</feedburner:feedFlare><feedburner:feedFlare href="http://mix.excite.eu/add?feedurl=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://image.excite.co.uk/mix/addtomix.gif">Subscribe with Excite MIX</feedburner:feedFlare><feedburner:feedFlare href="http://download.attensa.com/app/get_attensa.html?feedurl=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.attensa.com/blogs/attensa/WindowsLiveWriter/BadgeredintoBadges_10C02/attensa_feed_button5.gif">Subscribe with Attensa for Outlook</feedburner:feedFlare><feedburner:feedFlare href="http://www.webwag.com/wwgthis.php?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.webwag.com/images/wwgthis.gif">Subscribe with Webwag</feedburner:feedFlare><feedburner:feedFlare href="http://www.podcastready.com/oneclick_bookmark.php?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.podcastready.com/images/podcastready_button.gif">Subscribe with Podcast Ready</feedburner:feedFlare><feedburner:feedFlare href="http://www.flurry.com/pushRssFeed.do?r=fb&amp;url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.flurry.com/images/flurry_rss_logo2.gif">Subscribe with Flurry</feedburner:feedFlare><feedburner:feedFlare href="http://www.wikio.com/subscribe?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.wikio.com/shared/img/add2wikio.gif">Subscribe with Wikio</feedburner:feedFlare><feedburner:feedFlare href="http://www.dailyrotation.com/index.php?feed=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.dailyrotation.com/rss-dr2.gif">Subscribe with Daily Rotation</feedburner:feedFlare><item>
		<title>Rubinius wants to help YOU make Ruby better</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/RyDUZtXjpnE/</link>
		<comments>http://www.engineyard.com/blog/2010/rubinius-wants-to-help-you-make-ruby-better/#comments</comments>
		<pubDate>Mon, 30 Aug 2010 20:33:33 +0000</pubDate>
		<dc:creator>Brian Ford</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[Rubinius]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4063</guid>
		<description><![CDATA[It is a great time to be a Rubyist. This year we have already seen IronRuby 1.0, JRuby 1.5, with Ruby 1.9 due to be released shortly. Ruby is simply becoming better and faster on every platform. And, wherever Ruby is, Rails is sure to be nearby. Rails 3 looks more awesome each day.

Recently, our very own <a href="http://rubini.us">Rubinius</a> officially joined the ranks with a 1.0 release. We are excited to see folks trying it out. All the feedback and issues reported have been a great help. Many people are reporting that their apps "just work".

With all this great news, the Ruby world looks rosy indeed. However, we can make Ruby even better. To do so, we need your help. You may not realize this, but the quality of the Ruby code you write can have a significant impact on how great we can make Ruby. I'd like to share some tips about how you can improve your Ruby code while helping us make Ruby better too.
<h2>0. Rubinius</h2>
Rubinius is a completely new implementation of Ruby. When <a href="http://blog.fallingsnow.net/">Evan Phoenix</a> started Rubinius, he put some stakes in the sand. Rubinius has a modern, bytecode virtual machine, a cutting-edge garbage collector, a just-in-time (JIT) compiler utilizing the awesome <a href="http://llvm.org">LLVM</a> project, and a Ruby core library and bytecode compiler written in Ruby. We are only just getting started with 1.0. We have a whole list of features coming, including support for Windows and Ruby version 1.9, as well as improvements to the JIT compiler that should make Ruby several times faster, and removal of the global interpreter lock (GIL) so that your threads will execute Ruby code concurrently.

Rubinius does a lot of things differently than MRI under the covers. As Rubinius has grown up, we've definitely seen a wide cross-section of Ruby code while working on features and compatibility. The tips for writing better Ruby code below are based on some of the challenges we have faced.
<h2>1. Sending Messages</h2>
Rubinius is unique among the various Ruby implementations in that it implements the Ruby core library primarily in Ruby. Even the primitive methods, operations implemented in C++ that must access the virtual machine directly, appear to other Ruby code as normal Ruby methods. Importantly, calling these primitive methods from Ruby code is like calling any other Ruby method.

Early on in the Rubinius project, a lot of attention was focused on the idea of <em>Ruby in Ruby</em>. This was a good idea for several reasons, one of which being that Ruby is a more elegant and expressive language than C or Java, and that Ruby programmers tend to understand Ruby code pretty well. This familiarity with Ruby makes Rubinius easier to develop and maintain, and more approachable for many Ruby developers. The validity of these reasons has been demonstrated in the life of the project. However, there are two other very important reasons that don't attract quite as much attention.

The first of these is performance. As Evan often points out, Ruby is the currency of the Rubinius VM. It understands Ruby inside and out. The VM knows how to find a Ruby method, how to look up a constant, and what it means for an object to reference another object. The Rubinius VM operates on a special representation of Ruby code. This representation is often referred to as <em>bytecode</em> and is essentially a stream of instructions for the virtual machine. The JIT compiler, which can significantly improve Ruby performance, also operates on bytecode. What this means is that to the JIT, your program and the Ruby core library look an awful lot alike. So much, in fact, that the JIT compiler can mix them all together, which gives the optimizer much greater opportunity to generate <em>really</em> fast code.

The second reason is the consistency and elegance of an object-oriented language. When the Ruby core library is written in Ruby, you call a Ruby method, well, by calling a Ruby method. That may sound redundant, but I assure you, it is not. In MRI, for example, with the Ruby core library written in C, the code will often call directly to a C function rather than dispatching normally through Ruby method calls. What this means for you is that MRI may invoke "Ruby" functionality without engaging you in the conversation at all. That inconsistency may prevent you from using simple and elegant object-oriented code that extends the functionality of core classes.

In contrast, when functionality is invoked through normal Ruby dispatch, your code can be elegant and participate in the process. However, this is a significant double-edged sword, as we have become painfully aware of in Rubinius. When we implement all the complex behavior of the core library in Ruby, it's quite possible to do something crazy, like remove all the Ruby methods we need to make an object work! That is pretty crazy, right? Fortunately, in this coding wild west, there is a very important principle that can lend some law and order.
<h2>2. Liskov Substitution Principle</h2>
You may have heard this term tossed around in discussions. If you haven't, don't worry, we'll delve into this fairly intuitive idea. If you have, I hope to renew your commitment and respect for this principle.

So, what are we talking about here? Barbara Liskov and her collaborators were concerned with how to write reliable object-oriented software. As you know, one of the principle ideas in class-based object-oriented languages is inheritance, or the relationship between a class and its subclasses. What sort of rules should govern this relationship? What should we expect when we use a subtype in place of a supertype in our program? These are the questions that Barbara Liskov and others were pondering.

What they proposed is referred to as the <em>Subtype Requirement</em>, which they defined as:
<blockquote><em>Let q(x) be a property provable about objects x of type T. Then q(y) should be true for objects y of type S where S is a subtype of T.</em></blockquote>
(see <em>Behavioral Subtyping Using Invariants and Constraints</em>, by Barbara H. Liskov and Jeannette M. Wing.)

Let's consider this in terms of some Ruby code. Suppose you have this class in your program:
<pre>  class FancyArray &lt; Array
    def initialize(size)
       # ...
    end
  end</pre>
What is wrong with this picture? Well, in my Ruby code, I can do <code>x = Array.new</code>.  But what happens when I attempt to use the FancyArray class in place of Array? If I do <code>x = FancyArray.new</code>, I will surely get an ArgumentError exception because FancyArray requires that I pass one argument when calling the <em>new</em> method.

Let's phrase this in terms of the <em>Subtype Requirement</em>: Let <em>x</em> be an instance of Array. Then q(x) = <em>the arity of the initialize method is -1</em>. Let <em>y</em> be an instance of FancyArray, which is a subclass of Array. Then q(y) = <em>arity of the initialize method is -1</em> by the <em>Subtype Requirement</em>.

Now let's relate the above to Ruby code and check if the <em>Subtype Requirement</em> holds:
<pre>irb(main):001:0&gt; x = Array.instance_method(:initialize).arity
=&gt; -1
irb(main):002:0&gt; y = FancyArray.instance_method(:initialize).arity
=&gt; 1
irb(main):003:0&gt; x == y
=&gt; false</pre>
It is clear from this that FancyArray does not conform to the <em>Subtype Requirement</em>. Consequently, code that expects to use an Array will not function correctly when a FancyArray is substituted. It's important to also note that the <em>Subtype Requirement</em> applies to any observable property of the object. The example used in the paper is of a Stack and Queue. Both classes may provide <em>push</em> and <em>pop</em> methods, but the semantics of the methods are quite different between the two classes.

Now, you may say, "But, I have a very good reason for requiring an argument to <em>new</em>." Well then, I would venture to say you have an important reason to consider the difference between composition and inheritance for designing your program.
<h2>3. Composition versus Inheritance</h2>
Of the three object-oriented principles—inheritance, encapsulation, and polymorphism—inheritance has been so abused there could be a 12-step program devoted entirely to it. Fortunately, the remedy for inappropriate use of inheritance is quite simple: compose your objects of other objects.

Inheritance models an <em><strong>is a</strong></em> relationship, while composition models a <em><strong>has a</strong></em> relationship. If your object is a String, then it will do all the normal String things <em>just as a String would do them</em>. This is very important. It needs to do <em>String things</em> not just externally, when you call the methods, but internally, when the other String methods call each other. Is your FancyTemplate class really a String? Then, for example, I should always be able to request its length. However, your FancyTemplate instance probably doesn't have a length when it is being built. Therefore, String methods that may be employed during the construction phase could be highly confused. In such case, I suggest your FancyTemplate <strong>has a</strong> String internally, and it can be urged to give you a representation of that String at some point in time. Yet, it is not a String from the perspective of inheritance and conforming to the <em>Liskov Substitution Principle</em>.

Only you can tell whether your model is best represented by inheritance or composition. When designing your classes, be sure to consider the view from inside and out. If you are contorting your methods to act like the class you are inheriting from, perhaps your class only <em>has one</em> of those things, rather than <em>being one</em> of them. Most importantly, remember that you are not the only kid on the playground.
<h2>4. Playing Nicely</h2>
This is more about general advice than specific admonitions. We are lucky to have such a powerful, expressive language in Ruby. Opening a core class to patch a method is tremendously useful and powerful. However, remember that with great power, comes great responsibility.

First and foremost, simply be conscious of what you are asking Ruby to do for you. I used this example earlier, and I'm going to repeat it because in Rubinius we have encountered this more times that we can count. Ruby is an object-oriented language. You cause computation to occur by sending messages to an object. <em><strong>How can the object work if it has no methods?</strong></em> (I say with my best Zoolander impersonation). If your code does:
<pre>  class SomeClass
    instance_methods(false).each { |m| undef_method m }
  end</pre>
<em>you are (most likely) doing it wrong</em>. There are many variations on this theme, but they all share the same problem: the assumption that those methods you are removing are as superfluous as Johnny's appendix. I assure you, we don't randomly add methods to classes in Rubinius. Again, your code may work fine in MRI when you do this because MRI calls C functions on that object behind your back with impunity. But, we do want to have nice things, right? If you ever wonder what consequences your code may have, just drop into the #rubinius channel on freenode. We will happily discuss it with you.

A related problem occurs when code inherits from a core Ruby class and redefines one of the core methods. When the core classes are implemented in Ruby, the methods may depend on one another to perform their tasks. For example, in Hash it would not be entirely unreasonable for <em>each_value</em> to be implemented in terms of <em>each</em>. Well, not unreasonable, that is, until you try to run REXML in the Ruby Standard Library. REXML has an Attributes class that inherits from Hash. The Attributes class then implements an <em>each_attribute</em> method.  For good measure, it overrides <em>each</em> to use <em>each_attribute</em>. And <em>each_attribute</em> calls <em>each_value</em>. <em>Waiter, I believe there's a StackError in my Attributes</em>. The moral of the story: the two edges on this wonderful Ruby sword are sharp. It does take extra work to consider how methods on a particular class interact with one another; to some extent, this is an implementation detail. However, it's something to be aware of when you write code. Of course, you can always browse the Ruby implementation of the core classes in Rubinius.

Playing nicely is more than being conscientious about how you write your own code. It's also important to consider how you use code others have written. Your code should not depend on implementation details of the classes and libraries you use. However, it's often hard to know what those implementation details are. Often the dependency will be subtle and implicit. Your code will appear to work fine in MRI but break in one of the alternative implementations. There is no general solution to this problem, but you can usually avoid it by checking the assumptions your code makes about the other code it uses. One example of this is mutating a collection in the block passed to an iterating method. Consider the following code:
<pre>some_hash.each { |key, value| some_hash.delete(key) if fancy_test(value) }</pre>
Hash is a fairly complex data structure and this bit of code can have very different behavior depending on how Hash is implemented. Thankfully, Matz has explicitly said <a href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/23633">this behavior is undefined</a>.
<h2>5. Neighborly C Extensions</h2>
While playing nicely in Ruby code is important, it's also very important when writing C extensions. These are programs typically written in C/C++ that directly access the C functions that MRI uses to implement Ruby.  You probably regularly use one or more gems or libraries that are partially implemented by a C extension. C extensions are often used to access native libraries from Ruby, for example, when writing database adapters.

C extensions are not the only way to access native libraries from Ruby. There are also the FFI and DL libraries. Rubinius was the first implementation to popularize the use of the foreign-function interface (FFI) library for accessing native code. In fact, vital pieces of the core library in Rubinius are implemented via FFI, which is a modern implementation of DL, the dynamic load library that MRI has included for years. There are now quality implementations of FFI available on both JRuby and MRI.

FFI is generally the preferred way to interface with native libraries. The benefits include not needing a C compiler and being able to harness the speed or power of a native library while writing pure Ruby code. However, there are still two core use cases for C extensions: 1) when the data marshaling through the FFI layer imposes too large a performance cost; or 2) when your code already relies on an existing C extension. These use cases are hard to get around. Fortunately, we have put a lot of effort into getting C extensions working quite well on Rubinius. In fact, many C extensions just work.

However, there is one particular problem with some C extensions that limits our ability to support them: some have explicit dependencies on MRI data structures, for example, RHash. Depending on a data structure your code does not control makes your program vulnerable to breaking if the other code changes its implementation. Unfortunately, the C programming language doesn't do much to enforce good practices here. If the C compiler can see a structure or function in a header file, you are free to use it in your program. Yet, just because you can, does not mean you should. Instead, you should always use a function interface (also known as an API) to access the data. Treat data structures that are not your own as opaque.

Of course, that is the ideal world. MRI cannot foretell every use case that a C extension may have. So some of these problems are simply the result of people being more creative than the MRI developers imagined, which is mostly a good thing. In version 1.9, MRI is enforcing the use of API's over raw struct access. For example, rather than using <code>RSTRING(obj)-&gt;ptr</code>, your code should do <code>RSTRING_PTR(obj)</code> instead. Since Rubinius is compatible with MRI version 1.8.7, we still support both forms in this case. However, to make your code robust and portable, you should use the RSTRING_PTR API.

One thing Rubinius does not support is code like <code>RHASH(obj)-&gt;tbl</code> that accesses the RHash struct directly. This is partially because, in Rubinius, Hash is implemented entirely in Ruby. However, most C extension code needs to do something like iterate over the entries rather than just access the structure. In this case, the <em>rb_hash_foreach</em> function is available, so it's quite easy to change a C extension so it will run on Rubinius. In fact, a number of C extensions have already been updated in this way. If you encounter a problem with a C extension, please file an issue for it.

We understand there are valid use cases for writing C extensions. While Rubinius is implemented very differently than MRI, we want your C extensions to be able to run in Rubinius and we have worked hard to ensure that most C extensions do run. If you encounter cases where there is no function API to work with MRI data, let us know. We can collaborate with Matz and the MRI developers to add such APIs. That way, you can help us help you to make Ruby better for everyone. Win!

Ruby is a terrific language and with your help, it can be even better. Do you have any tips for writing better Ruby code? Please, let us know.

If you are new to Rubinius, you may find these previous posts informative:
<ul>
	<li><a href="http://www.engineyard.com/blog/2010/making-ruby-fast-the-rubinius-jit/">Making Ruby Fast: The Rubinius JIT</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/improving-the-rubinius-bytecode-compiler/">Improving the Rubinius Bytecode Compiler</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/the-anatomy-of-a-ruby-jit-compile/">Compiling Ruby: From Text to Bytecode</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/5-things-youll-love-about-rubinius/">5 Things You’ll Love About Rubinius</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/rubinius-the-book-tour/">Rubinius: The Book Tour</a></li>
</ul><p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[It is a great time to be a Rubyist. This year we have already seen IronRuby 1.0, JRuby 1.5, with Ruby 1.9 due to be released shortly. Ruby is simply becoming better and faster on every platform. And, wherever Ruby is, Rails is sure to be nearby. Rails 3 looks more awesome each day.

Recently, our very own <a href="http://rubini.us">Rubinius</a> officially joined the ranks with a 1.0 release. We are excited to see folks trying it out. All the feedback and issues reported have been a great help. Many people are reporting that their apps "just work".

With all this great news, the Ruby world looks rosy indeed. However, we can make Ruby even better. To do so, we need your help. You may not realize this, but the quality of the Ruby code you write can have a significant impact on how great we can make Ruby. I'd like to share some tips about how you can improve your Ruby code while helping us make Ruby better too.
<h2>0. Rubinius</h2>
Rubinius is a completely new implementation of Ruby. When <a href="http://blog.fallingsnow.net/">Evan Phoenix</a> started Rubinius, he put some stakes in the sand. Rubinius has a modern, bytecode virtual machine, a cutting-edge garbage collector, a just-in-time (JIT) compiler utilizing the awesome <a href="http://llvm.org">LLVM</a> project, and a Ruby core library and bytecode compiler written in Ruby. We are only just getting started with 1.0. We have a whole list of features coming, including support for Windows and Ruby version 1.9, as well as improvements to the JIT compiler that should make Ruby several times faster, and removal of the global interpreter lock (GIL) so that your threads will execute Ruby code concurrently.

Rubinius does a lot of things differently than MRI under the covers. As Rubinius has grown up, we've definitely seen a wide cross-section of Ruby code while working on features and compatibility. The tips for writing better Ruby code below are based on some of the challenges we have faced.
<h2>1. Sending Messages</h2>
Rubinius is unique among the various Ruby implementations in that it implements the Ruby core library primarily in Ruby. Even the primitive methods, operations implemented in C++ that must access the virtual machine directly, appear to other Ruby code as normal Ruby methods. Importantly, calling these primitive methods from Ruby code is like calling any other Ruby method.

Early on in the Rubinius project, a lot of attention was focused on the idea of <em>Ruby in Ruby</em>. This was a good idea for several reasons, one of which being that Ruby is a more elegant and expressive language than C or Java, and that Ruby programmers tend to understand Ruby code pretty well. This familiarity with Ruby makes Rubinius easier to develop and maintain, and more approachable for many Ruby developers. The validity of these reasons has been demonstrated in the life of the project. However, there are two other very important reasons that don't attract quite as much attention.

The first of these is performance. As Evan often points out, Ruby is the currency of the Rubinius VM. It understands Ruby inside and out. The VM knows how to find a Ruby method, how to look up a constant, and what it means for an object to reference another object. The Rubinius VM operates on a special representation of Ruby code. This representation is often referred to as <em>bytecode</em> and is essentially a stream of instructions for the virtual machine. The JIT compiler, which can significantly improve Ruby performance, also operates on bytecode. What this means is that to the JIT, your program and the Ruby core library look an awful lot alike. So much, in fact, that the JIT compiler can mix them all together, which gives the optimizer much greater opportunity to generate <em>really</em> fast code.

The second reason is the consistency and elegance of an object-oriented language. When the Ruby core library is written in Ruby, you call a Ruby method, well, by calling a Ruby method. That may sound redundant, but I assure you, it is not. In MRI, for example, with the Ruby core library written in C, the code will often call directly to a C function rather than dispatching normally through Ruby method calls. What this means for you is that MRI may invoke "Ruby" functionality without engaging you in the conversation at all. That inconsistency may prevent you from using simple and elegant object-oriented code that extends the functionality of core classes.

In contrast, when functionality is invoked through normal Ruby dispatch, your code can be elegant and participate in the process. However, this is a significant double-edged sword, as we have become painfully aware of in Rubinius. When we implement all the complex behavior of the core library in Ruby, it's quite possible to do something crazy, like remove all the Ruby methods we need to make an object work! That is pretty crazy, right? Fortunately, in this coding wild west, there is a very important principle that can lend some law and order.
<h2>2. Liskov Substitution Principle</h2>
You may have heard this term tossed around in discussions. If you haven't, don't worry, we'll delve into this fairly intuitive idea. If you have, I hope to renew your commitment and respect for this principle.

So, what are we talking about here? Barbara Liskov and her collaborators were concerned with how to write reliable object-oriented software. As you know, one of the principle ideas in class-based object-oriented languages is inheritance, or the relationship between a class and its subclasses. What sort of rules should govern this relationship? What should we expect when we use a subtype in place of a supertype in our program? These are the questions that Barbara Liskov and others were pondering.

What they proposed is referred to as the <em>Subtype Requirement</em>, which they defined as:
<blockquote><em>Let q(x) be a property provable about objects x of type T. Then q(y) should be true for objects y of type S where S is a subtype of T.</em></blockquote>
(see <em>Behavioral Subtyping Using Invariants and Constraints</em>, by Barbara H. Liskov and Jeannette M. Wing.)

Let's consider this in terms of some Ruby code. Suppose you have this class in your program:
<pre>  class FancyArray &lt; Array
    def initialize(size)
       # ...
    end
  end</pre>
What is wrong with this picture? Well, in my Ruby code, I can do <code>x = Array.new</code>.  But what happens when I attempt to use the FancyArray class in place of Array? If I do <code>x = FancyArray.new</code>, I will surely get an ArgumentError exception because FancyArray requires that I pass one argument when calling the <em>new</em> method.

Let's phrase this in terms of the <em>Subtype Requirement</em>: Let <em>x</em> be an instance of Array. Then q(x) = <em>the arity of the initialize method is -1</em>. Let <em>y</em> be an instance of FancyArray, which is a subclass of Array. Then q(y) = <em>arity of the initialize method is -1</em> by the <em>Subtype Requirement</em>.

Now let's relate the above to Ruby code and check if the <em>Subtype Requirement</em> holds:
<pre>irb(main):001:0&gt; x = Array.instance_method(:initialize).arity
=&gt; -1
irb(main):002:0&gt; y = FancyArray.instance_method(:initialize).arity
=&gt; 1
irb(main):003:0&gt; x == y
=&gt; false</pre>
It is clear from this that FancyArray does not conform to the <em>Subtype Requirement</em>. Consequently, code that expects to use an Array will not function correctly when a FancyArray is substituted. It's important to also note that the <em>Subtype Requirement</em> applies to any observable property of the object. The example used in the paper is of a Stack and Queue. Both classes may provide <em>push</em> and <em>pop</em> methods, but the semantics of the methods are quite different between the two classes.

Now, you may say, "But, I have a very good reason for requiring an argument to <em>new</em>." Well then, I would venture to say you have an important reason to consider the difference between composition and inheritance for designing your program.
<h2>3. Composition versus Inheritance</h2>
Of the three object-oriented principles—inheritance, encapsulation, and polymorphism—inheritance has been so abused there could be a 12-step program devoted entirely to it. Fortunately, the remedy for inappropriate use of inheritance is quite simple: compose your objects of other objects.

Inheritance models an <em><strong>is a</strong></em> relationship, while composition models a <em><strong>has a</strong></em> relationship. If your object is a String, then it will do all the normal String things <em>just as a String would do them</em>. This is very important. It needs to do <em>String things</em> not just externally, when you call the methods, but internally, when the other String methods call each other. Is your FancyTemplate class really a String? Then, for example, I should always be able to request its length. However, your FancyTemplate instance probably doesn't have a length when it is being built. Therefore, String methods that may be employed during the construction phase could be highly confused. In such case, I suggest your FancyTemplate <strong>has a</strong> String internally, and it can be urged to give you a representation of that String at some point in time. Yet, it is not a String from the perspective of inheritance and conforming to the <em>Liskov Substitution Principle</em>.

Only you can tell whether your model is best represented by inheritance or composition. When designing your classes, be sure to consider the view from inside and out. If you are contorting your methods to act like the class you are inheriting from, perhaps your class only <em>has one</em> of those things, rather than <em>being one</em> of them. Most importantly, remember that you are not the only kid on the playground.
<h2>4. Playing Nicely</h2>
This is more about general advice than specific admonitions. We are lucky to have such a powerful, expressive language in Ruby. Opening a core class to patch a method is tremendously useful and powerful. However, remember that with great power, comes great responsibility.

First and foremost, simply be conscious of what you are asking Ruby to do for you. I used this example earlier, and I'm going to repeat it because in Rubinius we have encountered this more times that we can count. Ruby is an object-oriented language. You cause computation to occur by sending messages to an object. <em><strong>How can the object work if it has no methods?</strong></em> (I say with my best Zoolander impersonation). If your code does:
<pre>  class SomeClass
    instance_methods(false).each { |m| undef_method m }
  end</pre>
<em>you are (most likely) doing it wrong</em>. There are many variations on this theme, but they all share the same problem: the assumption that those methods you are removing are as superfluous as Johnny's appendix. I assure you, we don't randomly add methods to classes in Rubinius. Again, your code may work fine in MRI when you do this because MRI calls C functions on that object behind your back with impunity. But, we do want to have nice things, right? If you ever wonder what consequences your code may have, just drop into the #rubinius channel on freenode. We will happily discuss it with you.

A related problem occurs when code inherits from a core Ruby class and redefines one of the core methods. When the core classes are implemented in Ruby, the methods may depend on one another to perform their tasks. For example, in Hash it would not be entirely unreasonable for <em>each_value</em> to be implemented in terms of <em>each</em>. Well, not unreasonable, that is, until you try to run REXML in the Ruby Standard Library. REXML has an Attributes class that inherits from Hash. The Attributes class then implements an <em>each_attribute</em> method.  For good measure, it overrides <em>each</em> to use <em>each_attribute</em>. And <em>each_attribute</em> calls <em>each_value</em>. <em>Waiter, I believe there's a StackError in my Attributes</em>. The moral of the story: the two edges on this wonderful Ruby sword are sharp. It does take extra work to consider how methods on a particular class interact with one another; to some extent, this is an implementation detail. However, it's something to be aware of when you write code. Of course, you can always browse the Ruby implementation of the core classes in Rubinius.

Playing nicely is more than being conscientious about how you write your own code. It's also important to consider how you use code others have written. Your code should not depend on implementation details of the classes and libraries you use. However, it's often hard to know what those implementation details are. Often the dependency will be subtle and implicit. Your code will appear to work fine in MRI but break in one of the alternative implementations. There is no general solution to this problem, but you can usually avoid it by checking the assumptions your code makes about the other code it uses. One example of this is mutating a collection in the block passed to an iterating method. Consider the following code:
<pre>some_hash.each { |key, value| some_hash.delete(key) if fancy_test(value) }</pre>
Hash is a fairly complex data structure and this bit of code can have very different behavior depending on how Hash is implemented. Thankfully, Matz has explicitly said <a href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/23633">this behavior is undefined</a>.
<h2>5. Neighborly C Extensions</h2>
While playing nicely in Ruby code is important, it's also very important when writing C extensions. These are programs typically written in C/C++ that directly access the C functions that MRI uses to implement Ruby.  You probably regularly use one or more gems or libraries that are partially implemented by a C extension. C extensions are often used to access native libraries from Ruby, for example, when writing database adapters.

C extensions are not the only way to access native libraries from Ruby. There are also the FFI and DL libraries. Rubinius was the first implementation to popularize the use of the foreign-function interface (FFI) library for accessing native code. In fact, vital pieces of the core library in Rubinius are implemented via FFI, which is a modern implementation of DL, the dynamic load library that MRI has included for years. There are now quality implementations of FFI available on both JRuby and MRI.

FFI is generally the preferred way to interface with native libraries. The benefits include not needing a C compiler and being able to harness the speed or power of a native library while writing pure Ruby code. However, there are still two core use cases for C extensions: 1) when the data marshaling through the FFI layer imposes too large a performance cost; or 2) when your code already relies on an existing C extension. These use cases are hard to get around. Fortunately, we have put a lot of effort into getting C extensions working quite well on Rubinius. In fact, many C extensions just work.

However, there is one particular problem with some C extensions that limits our ability to support them: some have explicit dependencies on MRI data structures, for example, RHash. Depending on a data structure your code does not control makes your program vulnerable to breaking if the other code changes its implementation. Unfortunately, the C programming language doesn't do much to enforce good practices here. If the C compiler can see a structure or function in a header file, you are free to use it in your program. Yet, just because you can, does not mean you should. Instead, you should always use a function interface (also known as an API) to access the data. Treat data structures that are not your own as opaque.

Of course, that is the ideal world. MRI cannot foretell every use case that a C extension may have. So some of these problems are simply the result of people being more creative than the MRI developers imagined, which is mostly a good thing. In version 1.9, MRI is enforcing the use of API's over raw struct access. For example, rather than using <code>RSTRING(obj)-&gt;ptr</code>, your code should do <code>RSTRING_PTR(obj)</code> instead. Since Rubinius is compatible with MRI version 1.8.7, we still support both forms in this case. However, to make your code robust and portable, you should use the RSTRING_PTR API.

One thing Rubinius does not support is code like <code>RHASH(obj)-&gt;tbl</code> that accesses the RHash struct directly. This is partially because, in Rubinius, Hash is implemented entirely in Ruby. However, most C extension code needs to do something like iterate over the entries rather than just access the structure. In this case, the <em>rb_hash_foreach</em> function is available, so it's quite easy to change a C extension so it will run on Rubinius. In fact, a number of C extensions have already been updated in this way. If you encounter a problem with a C extension, please file an issue for it.

We understand there are valid use cases for writing C extensions. While Rubinius is implemented very differently than MRI, we want your C extensions to be able to run in Rubinius and we have worked hard to ensure that most C extensions do run. If you encounter cases where there is no function API to work with MRI data, let us know. We can collaborate with Matz and the MRI developers to add such APIs. That way, you can help us help you to make Ruby better for everyone. Win!

Ruby is a terrific language and with your help, it can be even better. Do you have any tips for writing better Ruby code? Please, let us know.

If you are new to Rubinius, you may find these previous posts informative:
<ul>
	<li><a href="http://www.engineyard.com/blog/2010/making-ruby-fast-the-rubinius-jit/">Making Ruby Fast: The Rubinius JIT</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/improving-the-rubinius-bytecode-compiler/">Improving the Rubinius Bytecode Compiler</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/the-anatomy-of-a-ruby-jit-compile/">Compiling Ruby: From Text to Bytecode</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/5-things-youll-love-about-rubinius/">5 Things You’ll Love About Rubinius</a></li>
	<li><a href="http://www.engineyard.com/blog/2009/rubinius-the-book-tour/">Rubinius: The Book Tour</a></li>
</ul><p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/RyDUZtXjpnE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/rubinius-wants-to-help-you-make-ruby-better/feed/</wfw:commentRss>
		<slash:comments>8</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/rubinius-wants-to-help-you-make-ruby-better/</feedburner:origLink></item>
		<item>
		<title>Introducing JRubyConf 2010</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/G3CVFngE0vI/</link>
		<comments>http://www.engineyard.com/blog/2010/introducing-jrubyconf-2010/#comments</comments>
		<pubDate>Wed, 25 Aug 2010 20:02:32 +0000</pubDate>
		<dc:creator>Melissa Sheehan</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4530</guid>
		<description><![CDATA[We're excited to join forces with our friends at <a href="http://www.edgecase.com">EdgeCase</a> to co-host the second annual <a href="http://jrubyconf.com/">JRubyConf</a>, taking place October 1-3 at Quest Conference Center in Columbus, Ohio. This year, we've expanded the event to include three days of JRuby-filled goodness. JRubyConf will showcase the growing use of Ruby in the enterprise while also highlighting elements of the Java language that Ruby developers can benefit from via <a href="http://jruby.org/">JRuby</a>.

We've got a fantastic speaker lineup including: Tom Enebo, Chad Fowler, Jeremy Heingardner, Rich Kilmer, Keavy McKinn, Charles Nutter, Joe O’Brien, Nick Sieger, Brian Swan, Glenn Vanderburg, Jim Weirich, and more to be announced soon.  If you're curious about what JRuby can do for you, or if you're someone who has been using it for years - join us! We've got something for everyone.

JRubyConf will begin with Java and Ruby specific talks before progressing to more advanced sessions that demonstrate the possibilities of using both languages with JRuby – all focused on bringing the Ruby and Java communities together in a collaborative environment to share best development practices.

Topics to be covered include:
•	Introduction to JRuby
•	How to use Java in Ruby applications
•	Best practices for introducing Ruby to Java development teams
•	Effectively managing large agile teams that use Ruby
•	Large scale testing with Ruby
•	How to scale Ruby on Rails

Our growing sponsor list includes <a href="http://edgecase.com">EdgeCase</a>, <a href="http://www.8thlight.com/">8th Light</a>, <a href="http://www.elctech.com/">ELC Technologies</a>, <a href="http://www.kineticdata.com/">Kinetic Data</a>, <a href="http://oreilly.com/">O'Reilly</a>, <a href="http://www.terremark.com/default.aspx">Terremark</a>, and <a href="http://www.wyeworks.com/">WyeWorks</a>.

Registration is now open. Take advantage of an early bird discount for registration before September 1. If you would like information on user group discounts, <a href="mailto:jrubyconf@edgecase.com">give us a shout</a>!

To register to attend or to participate as a sponsor, visit the <a href="http://jrubyconf.com/">JRubyConf event site</a>. Follow <a href="http://twitter.com/jrubyconf">@JRubyConf</a> on Twitter to stay on top of announcements.

Hope you can join us in Columbus!<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[We're excited to join forces with our friends at <a href="http://www.edgecase.com">EdgeCase</a> to co-host the second annual <a href="http://jrubyconf.com/">JRubyConf</a>, taking place October 1-3 at Quest Conference Center in Columbus, Ohio. This year, we've expanded the event to include three days of JRuby-filled goodness. JRubyConf will showcase the growing use of Ruby in the enterprise while also highlighting elements of the Java language that Ruby developers can benefit from via <a href="http://jruby.org/">JRuby</a>.

We've got a fantastic speaker lineup including: Tom Enebo, Chad Fowler, Jeremy Heingardner, Rich Kilmer, Keavy McKinn, Charles Nutter, Joe O’Brien, Nick Sieger, Brian Swan, Glenn Vanderburg, Jim Weirich, and more to be announced soon.  If you're curious about what JRuby can do for you, or if you're someone who has been using it for years - join us! We've got something for everyone.

JRubyConf will begin with Java and Ruby specific talks before progressing to more advanced sessions that demonstrate the possibilities of using both languages with JRuby – all focused on bringing the Ruby and Java communities together in a collaborative environment to share best development practices.

Topics to be covered include:
•	Introduction to JRuby
•	How to use Java in Ruby applications
•	Best practices for introducing Ruby to Java development teams
•	Effectively managing large agile teams that use Ruby
•	Large scale testing with Ruby
•	How to scale Ruby on Rails

Our growing sponsor list includes <a href="http://edgecase.com">EdgeCase</a>, <a href="http://www.8thlight.com/">8th Light</a>, <a href="http://www.elctech.com/">ELC Technologies</a>, <a href="http://www.kineticdata.com/">Kinetic Data</a>, <a href="http://oreilly.com/">O'Reilly</a>, <a href="http://www.terremark.com/default.aspx">Terremark</a>, and <a href="http://www.wyeworks.com/">WyeWorks</a>.

Registration is now open. Take advantage of an early bird discount for registration before September 1. If you would like information on user group discounts, <a href="mailto:jrubyconf@edgecase.com">give us a shout</a>!

To register to attend or to participate as a sponsor, visit the <a href="http://jrubyconf.com/">JRubyConf event site</a>. Follow <a href="http://twitter.com/jrubyconf">@JRubyConf</a> on Twitter to stay on top of announcements.

Hope you can join us in Columbus!<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/G3CVFngE0vI" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/introducing-jrubyconf-2010/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/introducing-jrubyconf-2010/</feedburner:origLink></item>
		<item>
		<title>Pragmatic Polyglot Persistence with Rails</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/ZLDs8zlAMBQ/</link>
		<comments>http://www.engineyard.com/blog/2010/pragmatic-polyglot-persistence-with-rails/#comments</comments>
		<pubDate>Mon, 23 Aug 2010 09:50:38 +0000</pubDate>
		<dc:creator>Kent Fenwick</dc:creator>
				<category><![CDATA[Technology]]></category>
		<category><![CDATA[Rails]]></category>
		<category><![CDATA[Redis]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4355</guid>
		<description><![CDATA[<div class="note">This post comes from guest community contributor Kent Fenwick. Kent is the tech co-founder of of <a href="http://viewpointr.com/">Viewpointr</a>, a personalized Q&amp;A service that aims to provide an easy way to get and give help. When he isn't programming, he spends time with his family and friends in Toronto. Kent writes <a href="http://kent.posterous.com/">here</a> and can be followed on Twitter at <a href="http://twitter.com/kentf">@kentf</a>.</div>
It's getting more and more difficult to pick a persistence layer for your web application. When I started in Rails four years ago, there was really only one option, MySQL. Now, there are many more, each with their own pros and cons. Some are new and some are old, some are tested, and others, not so much.  What's clear is that when you are building a business around data, you want to make good decisions. That being said, often only the future will tell if you've made the right ones. I want to share with you my persistence story about how I ended up getting the best of both worlds.

h2. The Problem

There are too many choices and each choice has a loud evangelist of its own. When designing <a href="http://viewpointr.com/">Viewpointr</a> I went go back and forth daily between MongoDB, MySQL, PostgreSQL and Cassandra. Viewpointr is essentially Twitter with a focus on helping people. Therefore, we have some common data elements: a user specific time line, a user specific list of people who they are helping, and a user specific list of people helping them. Because I am ambitious, I would find myself asking questions like:

bq. "Hmm... but will MySQL scale to 1,000,000 records?"

Looking back on these internal conversations I find them funny; programmers always tend to think big. However, these are real concerns that developers and teams think about. While planning I would constantly consult the blogosphere for help, and to see what others were doing. <a href="http://www.engineyard.com/blog/author/kirkhaines/">Kirk Haines of Engine Yard wrote a great series of NoSQL posts</a> highlighting and comparing different key-value stores and explaining their pros and cons. Since then, there has been a flurry of articles each week outlining different NoSQL datastores, NoSQL vs. MySQL debates and flamewars etc.

h2. The Opportunity

Data is not created equal and this is a good thing. The same way we do not use an array for every "list" type problem when programming, sometimes hashes or linked lists will better suit the needs of the problem. We need to start thinking about data the same way. This was the best decision we made at Viewpointr and it allowed us to move forward at a great pace.

I looked at our application and broke it down into components. Viewpointr has many typical CRUD features similar to all Rails apps. These are very well designed for MySQL and a relational database. Being able to pull a list of answers based on a given question using simple and optimized SQL that I understand is a big win. However, there are some things that it doesn't model well.

Friendships. The simplest way to model friendship using a relational database is to create a relation that refers to the same table with two different names. Let's say you have a users table and you want to model Twitter-like friendship where User:1 can befriend User:2 without User:2's permission. It's easy enough.
<pre lang="ruby">class Friend &lt; ActiveRecord::Base

 belongs_to :user
 belongs_to :contact, :class_name => "User", :foreign_key => "contact_id"

 # user befriends contact
 def self.befriend(user,contact)
    relationship = find_by_user_id_and_contact_id(user.id,friend.id)
    if relationship.nil?
      transaction do
        Friend.create(:user => user, :contact => contact)
      end
    end
 end

end

class User &lt; ActiveRecord::Base

  has_many :friends, :dependent => :destroy
  has_many :contacts, :through => :friends, :order => "created_at DESC", :dependent => :destroy

end</pre>
However, I have always felt that it's clumsy. What I really want to say is:

"Each user has a list of IDs that represent the people that they are friends with."

Sounds like a de-normalized list right?

h2. The Solution

Enter Redis. Redis is a key-value store similar to memcached but more flexible since lists, sets, ordered sets and strings can all be used as values. Thanks to its simple API, the problem I described is essentially an atomic operation in Redis. Redis has a great "set" implementation and allows you to do all of the things you would imagine a set to do: addition, subtraction, unique insertion, deletion, union, intersection, etc.

The operation will ultimately look like this:
<pre lang="ruby">SET = Redis.new
SET.set_add key, value</pre>
However, since we are working inside a Rails app, we need to make sure we have the right plumbing setup.

# Create a redis.rb in your initializers folder.
# Create a new Redis database for each of your needs.

In our case, we want to have a dataset that keeps track of a User's helpers (other users who are helping them) and a list of a User's friends (other users that the user is helping). Since we are going to be using these Redis objects throughout the codebase, I like to declare them as global variables in the redis.rb initializer file.
<pre lang="ruby">HELPERS = Redis.new(:db => 0)
HELPING = Redis.new(:db => 1)</pre>
Notice that I pass in the :db key so that we make sure HELPERS and HELPING will hold two different Redis objects. You can use <a href="http://github.com/defunkt/redis-namespace">redis-namespace gem</a> if you want, but I find the default syntax from the <a href="http://github.com/ezmobius/redis-rb">redis-rb gem</a> works well enough for my purposes.

Now that we have these global Redis objects at our disposal throughout the application, we can start using it in our Friend.befriend method.
<pre lang="ruby">class Friend &lt; ActiveRecord::Base

 belongs_to :user
 belongs_to :contact, :class_name => "User", :foreign_key => "contact_id"

 # user befriends contact
 def self.befriend(user,contact)
    begin
     HELPERS.set_add contact.id, user.id
     HELPING.set_add user.id, contact.id
    rescue
     RedisLogger.info "Redis Exception"
    end
 end

end

class User &lt; ActiveRecord::Base

  has_many :friends, :dependent => :destroy
  has_many :contacts, :through => :friends, :order => "created_at DESC", :dependent => :destroy

end</pre>
However, this isn't the best solution right out of the gate. Using a NoSQL datastore has some drawbacks that aren't apparent in development mode but reveals its ugly face in production. If you are not careful, a simple restart of your Redis server can cause you to loose all your data. Managing your Redis data in production deserves it's own post, (coming soon) but for now, let's create a safer solution that you can gradually roll out as you become more comfortable with storing, backing up and using Redis datafiles.
<pre lang="ruby">class Friend &lt; ActiveRecord::Base

 belongs_to :user
 belongs_to :contact, :class_name => "User", :foreign_key => "contact_id"

 # user befriends contact
 def self.befriend(user,contact)
    relationship = find_by_user_id_and_contact_id(user.id,friend.id)
    if relationship.nil?
      transaction do
        Friend.create(:user => user, :contact => contact)
      end
    add_to_denormalized_list(user,contact)
    end
 end

  def self.add_to_denormalized_list(user,contact)
    begin
     HELPERS.set_add contact.id, user.id
     HELPING.set_add user.id, contact.id
    rescue e
      RedisLogger.info "Redis Exception"
    end
  end

end

class User &lt; ActiveRecord::Base

  has_many :friends, :dependent => :destroy
  has_many :contacts, :through => :friends, :order => "created_at DESC", :dependent => :destroy

end</pre>
The strategy is simple, mirror the MySQL data in Redis. By adding a call to add_to_denormalized_list, we mirror the ActiveRecord call using the simple and elegant Redis set syntax discussed above. As you and your team get more practice and become more comfortable using Redis in production, you can start writing more to the denormalized list, eventually moving this part of your application away from ActiveRecord and MySQL to Redis. You could do this manually or you can use James Golick's recently released gem called <a href="http://github.com/jamesgolick/rollout">Rollout</a> that uses, you guessed it, Redis, to programatically rollout features to users.

Like anything else you code, testing and benchmarking this process in production is crucial to make sure you are saving time and cycles. It might seem like a waste to duplicate your data in Redis, but you are a pragmatic polyglot persistence developer right? You want to explore the NoSQL space while making sure that a little mistake or misunderstanding doesn't sink your ship. Give something like this a try, it doesn't get any more pragmatic. When do you try it or come up with something new, let me and everyone else know about it.

Thanks for reading.<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[<div class="note">This post comes from guest community contributor Kent Fenwick. Kent is the tech co-founder of of <a href="http://viewpointr.com/">Viewpointr</a>, a personalized Q&amp;A service that aims to provide an easy way to get and give help. When he isn't programming, he spends time with his family and friends in Toronto. Kent writes <a href="http://kent.posterous.com/">here</a> and can be followed on Twitter at <a href="http://twitter.com/kentf">@kentf</a>.</div>
It's getting more and more difficult to pick a persistence layer for your web application. When I started in Rails four years ago, there was really only one option, MySQL. Now, there are many more, each with their own pros and cons. Some are new and some are old, some are tested, and others, not so much.  What's clear is that when you are building a business around data, you want to make good decisions. That being said, often only the future will tell if you've made the right ones. I want to share with you my persistence story about how I ended up getting the best of both worlds.

h2. The Problem

There are too many choices and each choice has a loud evangelist of its own. When designing <a href="http://viewpointr.com/">Viewpointr</a> I went go back and forth daily between MongoDB, MySQL, PostgreSQL and Cassandra. Viewpointr is essentially Twitter with a focus on helping people. Therefore, we have some common data elements: a user specific time line, a user specific list of people who they are helping, and a user specific list of people helping them. Because I am ambitious, I would find myself asking questions like:

bq. "Hmm... but will MySQL scale to 1,000,000 records?"

Looking back on these internal conversations I find them funny; programmers always tend to think big. However, these are real concerns that developers and teams think about. While planning I would constantly consult the blogosphere for help, and to see what others were doing. <a href="http://www.engineyard.com/blog/author/kirkhaines/">Kirk Haines of Engine Yard wrote a great series of NoSQL posts</a> highlighting and comparing different key-value stores and explaining their pros and cons. Since then, there has been a flurry of articles each week outlining different NoSQL datastores, NoSQL vs. MySQL debates and flamewars etc.

h2. The Opportunity

Data is not created equal and this is a good thing. The same way we do not use an array for every "list" type problem when programming, sometimes hashes or linked lists will better suit the needs of the problem. We need to start thinking about data the same way. This was the best decision we made at Viewpointr and it allowed us to move forward at a great pace.

I looked at our application and broke it down into components. Viewpointr has many typical CRUD features similar to all Rails apps. These are very well designed for MySQL and a relational database. Being able to pull a list of answers based on a given question using simple and optimized SQL that I understand is a big win. However, there are some things that it doesn't model well.

Friendships. The simplest way to model friendship using a relational database is to create a relation that refers to the same table with two different names. Let's say you have a users table and you want to model Twitter-like friendship where User:1 can befriend User:2 without User:2's permission. It's easy enough.
<pre lang="ruby">class Friend &lt; ActiveRecord::Base

 belongs_to :user
 belongs_to :contact, :class_name => "User", :foreign_key => "contact_id"

 # user befriends contact
 def self.befriend(user,contact)
    relationship = find_by_user_id_and_contact_id(user.id,friend.id)
    if relationship.nil?
      transaction do
        Friend.create(:user => user, :contact => contact)
      end
    end
 end

end

class User &lt; ActiveRecord::Base

  has_many :friends, :dependent => :destroy
  has_many :contacts, :through => :friends, :order => "created_at DESC", :dependent => :destroy

end</pre>
However, I have always felt that it's clumsy. What I really want to say is:

"Each user has a list of IDs that represent the people that they are friends with."

Sounds like a de-normalized list right?

h2. The Solution

Enter Redis. Redis is a key-value store similar to memcached but more flexible since lists, sets, ordered sets and strings can all be used as values. Thanks to its simple API, the problem I described is essentially an atomic operation in Redis. Redis has a great "set" implementation and allows you to do all of the things you would imagine a set to do: addition, subtraction, unique insertion, deletion, union, intersection, etc.

The operation will ultimately look like this:
<pre lang="ruby">SET = Redis.new
SET.set_add key, value</pre>
However, since we are working inside a Rails app, we need to make sure we have the right plumbing setup.

# Create a redis.rb in your initializers folder.
# Create a new Redis database for each of your needs.

In our case, we want to have a dataset that keeps track of a User's helpers (other users who are helping them) and a list of a User's friends (other users that the user is helping). Since we are going to be using these Redis objects throughout the codebase, I like to declare them as global variables in the redis.rb initializer file.
<pre lang="ruby">HELPERS = Redis.new(:db => 0)
HELPING = Redis.new(:db => 1)</pre>
Notice that I pass in the :db key so that we make sure HELPERS and HELPING will hold two different Redis objects. You can use <a href="http://github.com/defunkt/redis-namespace">redis-namespace gem</a> if you want, but I find the default syntax from the <a href="http://github.com/ezmobius/redis-rb">redis-rb gem</a> works well enough for my purposes.

Now that we have these global Redis objects at our disposal throughout the application, we can start using it in our Friend.befriend method.
<pre lang="ruby">class Friend &lt; ActiveRecord::Base

 belongs_to :user
 belongs_to :contact, :class_name => "User", :foreign_key => "contact_id"

 # user befriends contact
 def self.befriend(user,contact)
    begin
     HELPERS.set_add contact.id, user.id
     HELPING.set_add user.id, contact.id
    rescue
     RedisLogger.info "Redis Exception"
    end
 end

end

class User &lt; ActiveRecord::Base

  has_many :friends, :dependent => :destroy
  has_many :contacts, :through => :friends, :order => "created_at DESC", :dependent => :destroy

end</pre>
However, this isn't the best solution right out of the gate. Using a NoSQL datastore has some drawbacks that aren't apparent in development mode but reveals its ugly face in production. If you are not careful, a simple restart of your Redis server can cause you to loose all your data. Managing your Redis data in production deserves it's own post, (coming soon) but for now, let's create a safer solution that you can gradually roll out as you become more comfortable with storing, backing up and using Redis datafiles.
<pre lang="ruby">class Friend &lt; ActiveRecord::Base

 belongs_to :user
 belongs_to :contact, :class_name => "User", :foreign_key => "contact_id"

 # user befriends contact
 def self.befriend(user,contact)
    relationship = find_by_user_id_and_contact_id(user.id,friend.id)
    if relationship.nil?
      transaction do
        Friend.create(:user => user, :contact => contact)
      end
    add_to_denormalized_list(user,contact)
    end
 end

  def self.add_to_denormalized_list(user,contact)
    begin
     HELPERS.set_add contact.id, user.id
     HELPING.set_add user.id, contact.id
    rescue e
      RedisLogger.info "Redis Exception"
    end
  end

end

class User &lt; ActiveRecord::Base

  has_many :friends, :dependent => :destroy
  has_many :contacts, :through => :friends, :order => "created_at DESC", :dependent => :destroy

end</pre>
The strategy is simple, mirror the MySQL data in Redis. By adding a call to add_to_denormalized_list, we mirror the ActiveRecord call using the simple and elegant Redis set syntax discussed above. As you and your team get more practice and become more comfortable using Redis in production, you can start writing more to the denormalized list, eventually moving this part of your application away from ActiveRecord and MySQL to Redis. You could do this manually or you can use James Golick's recently released gem called <a href="http://github.com/jamesgolick/rollout">Rollout</a> that uses, you guessed it, Redis, to programatically rollout features to users.

Like anything else you code, testing and benchmarking this process in production is crucial to make sure you are saving time and cycles. It might seem like a waste to duplicate your data in Redis, but you are a pragmatic polyglot persistence developer right? You want to explore the NoSQL space while making sure that a little mistake or misunderstanding doesn't sink your ship. Give something like this a try, it doesn't get any more pragmatic. When do you try it or come up with something new, let me and everyone else know about it.

Thanks for reading.<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/ZLDs8zlAMBQ" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/pragmatic-polyglot-persistence-with-rails/feed/</wfw:commentRss>
		<slash:comments>8</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/pragmatic-polyglot-persistence-with-rails/</feedburner:origLink></item>
		<item>
		<title>Engine Yard CLI Now Open Source</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/6TB9_obxtLE/</link>
		<comments>http://www.engineyard.com/blog/2010/engine-yard-cli-now-open-source/#comments</comments>
		<pubDate>Wed, 18 Aug 2010 16:51:09 +0000</pubDate>
		<dc:creator>Andy Delcambre</dc:creator>
				<category><![CDATA[Cloud]]></category>
		<category><![CDATA[News]]></category>
		<category><![CDATA[Technology]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4431</guid>
		<description><![CDATA[<p>Engine Yard has <a href="http://www.engineyard.com/open-source">a long history with open source software</a>. We have supported many big name projects over the years including Merb, Ruby 1.8.6, Rubinius, JRuby, and Rails. In addition to these larger projects, we also strive to open source internal technology that benefits the community as a whole. These projects are usually less well known, but we'd like to fix that.</p>

<p>Today we are announcing that <strong>the Engine Yard command line client is fully open sourced</strong>.</p>

<p>First, we have developed a new deployment tool that runs on the instance that is being deployed to. This code runs when you deploy from the command line, and will soon be the default for deploying from the dashboard.  The code is available at <a href="http://github.com/engineyard/engineyard-serverside">engineyard-serverside</a>.</p>

<p>The second component is the engineyard gem itself, a client library for our dashboard API. It is primarily used for managing custom recipes and deployment, but it will continue to expand over time.  This code is available at <a href="http://github.com/engineyard/engineyard">engineyard</a>.</p>

<p>The Engine Yard CLI was announced last month and we have a complete <a href="http://www.engineyard.com/blog/2010/engine-yard-appcloud-cli/">overview on the blog</a>. This new deployment system separates the deployment of your code, and the configuration of your cluster. This allows code to be deployed without any fear of incompatible configuration, and allows configuration changes to your server when the time is right for you. We provide even more flexibility through simple hooks into the deployment process, allowing you to completely override the way deployments happen. You can read about these and other features in greater detail in <a href="http://docs.engineyard.com/appcloud/guides/deployment/home">our recently revamped documentation site</a></p>

<p>Please feel free to send pull requests and file any bugs or feature requests using <a href="https://github.com/engineyard/engineyard/issues">Github Issues</a>.</p>
<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[<p>Engine Yard has <a href="http://www.engineyard.com/open-source">a long history with open source software</a>. We have supported many big name projects over the years including Merb, Ruby 1.8.6, Rubinius, JRuby, and Rails. In addition to these larger projects, we also strive to open source internal technology that benefits the community as a whole. These projects are usually less well known, but we'd like to fix that.</p>

<p>Today we are announcing that <strong>the Engine Yard command line client is fully open sourced</strong>.</p>

<p>First, we have developed a new deployment tool that runs on the instance that is being deployed to. This code runs when you deploy from the command line, and will soon be the default for deploying from the dashboard.  The code is available at <a href="http://github.com/engineyard/engineyard-serverside">engineyard-serverside</a>.</p>

<p>The second component is the engineyard gem itself, a client library for our dashboard API. It is primarily used for managing custom recipes and deployment, but it will continue to expand over time.  This code is available at <a href="http://github.com/engineyard/engineyard">engineyard</a>.</p>

<p>The Engine Yard CLI was announced last month and we have a complete <a href="http://www.engineyard.com/blog/2010/engine-yard-appcloud-cli/">overview on the blog</a>. This new deployment system separates the deployment of your code, and the configuration of your cluster. This allows code to be deployed without any fear of incompatible configuration, and allows configuration changes to your server when the time is right for you. We provide even more flexibility through simple hooks into the deployment process, allowing you to completely override the way deployments happen. You can read about these and other features in greater detail in <a href="http://docs.engineyard.com/appcloud/guides/deployment/home">our recently revamped documentation site</a></p>

<p>Please feel free to send pull requests and file any bugs or feature requests using <a href="https://github.com/engineyard/engineyard/issues">Github Issues</a>.</p>
<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/6TB9_obxtLE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/engine-yard-cli-now-open-source/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/engine-yard-cli-now-open-source/</feedburner:origLink></item>
		<item>
		<title>Concurrency, Real and Imagined, in MRI; Threads</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/SYY6sh--WXQ/</link>
		<comments>http://www.engineyard.com/blog/2010/concurrency-real-and-imagined-in-mri-threads/#comments</comments>
		<pubDate>Wed, 11 Aug 2010 09:00:59 +0000</pubDate>
		<dc:creator>Kirk Haines</dc:creator>
				<category><![CDATA[Technology]]></category>
		<category><![CDATA[MRI]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=3649</guid>
		<description><![CDATA[<blockquote>In computer science, concurrency is a property of systems in which several computations  are executing simultaneously, and potentially interacting with each other. The computations may be executing on multiple cores in the same chip, preemptively time-shared threads on the same processor, or executed on physically separated processors.</blockquote>
-- <a href="http://en.wikipedia.org/wiki/Concurrency_%28computer_science%29">Wikipedia Concurrency article</a>

Simply put, concurrency is when you have more than one logical thread of execution occurring simultaneously, or at least appearing to occur simultaneously. When you write software that makes use of concurrency, you want your software to do two or more things at once.

The motivations for using concurrency are varied. Sometimes you may have architectural reasons for using concurrency -- your code makes more sense to you or is easier to write if you conceive it in more than one discretely executing unit. In other cases you may want to employ concurrency in order to make better use of the multiple cores that many modern computers have, enabling you to get better total throughput out of your code than you would have from a non-concurrent implementation.

Whatever the motivation for employing concurrency, the reality is that concurrency is a complex subject. There are many different ways to achieve concurrency in software, and they each have their own set of tradeoffs. Furthermore, if your platform is Ruby, your decisions about what kind of concurrency to employ will be influenced by the specific Ruby implementation you are targeting. Each provides a different set of concurrency options for you to consider.

This is the first installment in a new series of articles focusing on introducing and exploring the variety of concurrency options available in the Ruby ecosystem. Advantages and disadvantages will be discussed for each, and I'll leave you with a few examples of how you can leverage these different options in your code. It should be a fun subject to explore!

Concurrency is all about multitasking -- doing more than one thing at once. The building blocks of multitasking are processes, threads, and fibers. Each of these components is complex in itself, both because of the nuances in how they interact and can be combined, and because different platforms have variations in which capabilities they implement and in how they are implemented. Luckily, their overall description can be summarized in a useful way.

<strong>Processes</strong> are independent units of execution that generally share nothing with other processes, except for resources which are intended to be shared (such as shared memory segments, shared IO resources, or memory mapped files). Processes carry a lot of state information with them and have their own address spaces. Communication between them has to be through an interprocess communication mechanism provided by the platform that the processes are running in. Processes running on the same machine will be scheduled by the kernel, which will typically use some sort of time slicing algorithm to spread CPU usage of all running processes across the available cores.

<strong>Threads</strong> come in several different flavors, including kernel, user space, and green threads. On some platforms there are entities called light-weight processes that bring kernel threads into user space so they look somewhat like processes, but are less expensive. For our purposes, threads are contained within a process, and share the memory space and process state of the process with each other. Green threads differ in that they are not controlled or scheduled by the operating system. Rather, they are provided by the process itself. This has a portability advantage because it means that the threads will be available on every platform that the process can run on, and will work the same on each. The main disadvantage is that green threads, being managed by the process itself, are generally confined to sharing a single core, and are limited to the peculiarities of the process's threading implementation (which may vary substantially from the platform's own threading implementation). Regardless of the type of threading, context switching with threads is generally faster than it is with processes.

<strong>Fibers</strong> are like user space threads, except the operating system doesn't handle scheduling for them. Instead, fibers must be explicitly yielded to allow other fibers to run. This can have performance advantages like the reduction of system scheduling overhead. Since multitasking with fibers is cooperative, the need to use locks on shared resources is reduced or eliminated. Programmers can also leverage fibers to their advantage with IO operations by allowing other things to run while waiting for a slow or blocking IO operation.

Ruby concurrency isn't quite as simple as selecting one of the above and using it, however. In the beginning, there was just <strong>Ruby</strong>, a single implementation that everyone used. This Ruby implementation, now commonly called the Matz Ruby Implmenetation (MRI), saw a widespread usage explosion with the 1.8.x version. It's pretty old now. This is from the <a href="ftp://ruby-lang.org/pub/ruby/1.8">ftp://ruby-lang.org</a> FTP server:
<pre>carbon:/home/ftp/pub/ruby/1.8$ ls -la | grep ruby-1.8.0
-rw-rw-r--  1 root     ftp   1979070 Aug  4  2003 ruby-1.8.0.tar.gz</pre>
So, it has been around for a while, and offers a good starting point for discussing concurrency in Ruby.

MRI Ruby 1.8.x supports concurrency in a few ways. One of the first things newcomers to Ruby leap for are its threads. Depending on the language these newcomers were familiar with before arriving at Ruby, they may be in for a surprise. MRI Ruby 1.8.x provides a green thread implementation. As mentioned above, green threads do not make use of any threading system native to the platform. Instead, 1.8.x's threads are implemented within the interpreter itself. This leads to threads behaving consistently across any platform the interpreter runs on. Because they are green threads, however, they offer no advantages for CPU bound tasks.

<strong>cpu_bound_threads.rb</strong>
<pre>require 'benchmark'
threads = []
thread_count = ARGV[0].to_i
iterations = ARGV[1].to_i
increment = iterations / thread_count.to_f
sum = 0

Benchmark.bm do |bm|
  bm.report do
    thread_count.times do |counter|
      threads &lt;&lt; Thread.new do
        my_sum = 0
        queue = (1 + (increment * counter).to_i)..(0 + (increment * (counter + 1)).to_i)
        queue.each do |x|
          my_sum += x
        end
        Thread.current[:sum] = my_sum
      end
    end

    threads.each {|thread| thread.join; sum += thread[:sum]}

    puts "The sum of #{iterations} is #{sum}"

  end
end
</pre>
This is a simple program that takes a large range of numbers, divides them into smaller ranges, and hands each smaller range to a thread that calculates the sum of the range it was given. The results from each individual thread are then added together to arrive at a final answer.

All examples ran on an 8 core Linux machine. The numbers below are an average of the results of 100 runs for each set of inputs.
<table>
<caption>Threads</caption>
<thead>
<tr>
<th>Iterations</th>
<th>50000</th>
<th>500000</th>
<th>5000000</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>0.01730298</td>
<td>0.17149276</td>
<td>1.70610744</td>
</tr>
<tr>
<td>2</td>
<td>0.01724724</td>
<td>0.17179465</td>
<td>1.70557474</td>
</tr>
<tr>
<td>4</td>
<td>0.01729293</td>
<td>0.17181384</td>
<td>1.70570264</td>
</tr>
<tr>
<td>8</td>
<td>0.01741591</td>
<td>0.17210276</td>
<td>1.71201153</td>
</tr>
</tbody>
</table>
As demonstrated by the numbers, MRI 1.8 threads are absolutely no help at all for a CPU bound application. In fact, there is a small but measurable cost to the overhead of managing them that is apparent in the numbers. As thread count increased, timing consistently and measurably slowed. If you are an MRI 1.8 user, do not despair; threads are but one concurrency option available to you.

An option that will better serve you for CPU bound tasks is process based concurrency. The idea is simple. In order to leverage multiple cores/CPUs, just create more than one process to handle the work load. Ruby provides a <code>fork()</code> method call which, on platforms that support it using the underlying <code>fork()</code> call from the C standard library. This will create a new process, with a new process ID, that can be considered an exact copy of the parent process, except that its resource allocations will be reset to 0.

Since processes do not share memory spaces, you must utilize another system provided communication mechanism in order to pass work to or from processes; this avoids the potential pitfalls that arise when trying to correctly manage locks on shared resources, but it does force one to think more specifically about exactly how to achieve communication.

<strong>cpu_bound_processes.rb</strong>
<pre>require 'benchmark'
processes = []
process_count = ARGV[0].to_i
iterations = ARGV[1].to_i
increment = iterations / process_count.to_f
sum = 0

def in_subprocess
  from_subprocess, to_parent = IO.pipe

  pid = fork do
    from_subprocess.close
    r = yield
    to_parent.puts [Marshal.dump(r)].pack("m")
    exit!
  end

  to_parent.close
  [pid,from_subprocess]
end

def get_result_from_subprocess(pid, from_subprocess)
  r = from_subprocess.read
  from_subprocess.close
  Process.waitpid(pid)
  Marshal.load(r.unpack("m")[0])
end

Benchmark.bm do |bm|
  bm.report do
    process_count.times do |counter|
      processes &lt;&lt; in_subprocess do
        my_sum = 0
        queue = (1 + (increment * counter).to_i)..(0 + (increment * (counter + 1)).to_i)
        queue.each do |x|
          my_sum += x
        end
        my_sum
      end
    end

   processes.each {|process| sum += get_result_from_subprocess(*process)}

   puts "The sum of #{iterations} is #{sum}"

  end
end
</pre>
In this example I used IO pipes to send data from the master process to the children, and to receive data from the children, back into the master.

As earlier, testing was done on an 8 core linux machine, with 100 runs of each test. The program is equivalent to the threaded version, and was changed only as necessary to enable it to be used in a multiprocess model instead of a multithread model.
<table>
<caption>Worker Processes</caption>
<thead>
<tr>
<th>Iterations</th>
<th>50000</th>
<th>500000</th>
<th>5000000</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>0.01805432</td>
<td>0.17199047</td>
<td>1.70812685</td>
</tr>
<tr>
<td>2</td>
<td>0.0098329</td>
<td>0.08675517</td>
<td>0.85509328</td>
</tr>
<tr>
<td>4</td>
<td>0.00609409</td>
<td>0.0446612</td>
<td>0.43100698</td>
</tr>
<tr>
<td>8</td>
<td>0.00847991</td>
<td>0.05346145</td>
<td>0.25621009</td>
</tr>
</tbody>
</table>
Take a good look at these numbers. Everything moves in the correct direction, until you get to the 8 process column. Then timing slows for both the 50000 and 500000 iteration rows that are under the 4 process column. Do you have any theories as to why?

Processes are, in many ways, a great way to handle concurrency. One of their drawbacks, though, is that they are heavy structures. They can take up significant time and resources to create . Linux uses copy-on-write semantics when creating forked processes. This means it doesn't actually duplicate the address space of the forked process until pages in that space start changing. Then, it duplicates what changes. This means that forked processes on Linux can be created fairly quickly. However, MRI 1.8 is not very friendly to copy-on-write semantics.

If you are unfamiliar with the way memory is managed and garbage is collected in MRI 1.8, you should check out <a href="http://www.engineyard.com/blog/2010/mri-memory-allocation-a-primer-for-developers/">my article on MRI Memory Allocation</a>. One key aspect is that objects carry all of their status bits with them. This means that when the garbage collector scans the object space to find objects it can collect, it touches every object in the address space. For a process forked with copy-on-write semantics, this forces the kernel to make copies of all of those pages. This takes time, and largely negates the fast-creation benefit of copy-on-write forked processes.

The times for the lower iterations on the 8 thread test reveal a cost to this form of concurrency. The overhead associated with creating the forked processes overwhelms the performance gains from the division of labor when the work to be done is brief enough. This is a reality for any form of concurrency -- there is always a performance tax from some amount of overhead. That tax is just higher when spawning something heavy like a process. Keep this in mind when you explore concurrency options for your task.

These first two examples both represent CPU bound problems. Many real world problems are not CPU bound, though. Rather, they are IO bound issues. Because an IO bound problem has latencies imposed on it by something outside of the program itself, IO bound problems can provide an excellent case for using MRI 1.8's green threads to improve performance.

<strong>io_bound_threads.rb</strong>
<pre>require 'net/http'
require 'thread'
require 'benchmark'

def get_data(url)
  tries = 0
  response = nil
  if /^http/.match(url)
    m = /^http:\/\/([^\/]*)(.*)/.match(url)
    site = m[1]
    path = m[2]
    begin
      http = Net::HTTP.new(site)
      http.open_timeout = 30
      http.start {|h| response = h.get(path)}
    rescue Exception
      tries += 1
      retry if tries &lt; 5
    end
  end
  response.kind_of?(Array) ? response[1] : response.respond_to?(:body) ? response.body : ''
end

mutex = Mutex.new
signal = ConditionVariable.new
thread_count = ARGV[0].to_i
fetches = ARGV[1].to_i
url = ARGV[2]
threads = []
count = 0
active_threads = 0

Benchmark.bm do |bm|
  bm.report do
    while count &lt; fetches
      while count &lt; fetches &amp;&amp; active_threads &lt; thread_count
        mutex.synchronize do
          active_threads += 1
          count += 1
        end
        Thread.new do
          get_data(url)
          mutex.synchronize do
            active_threads -= 1
            threads &lt;&lt; Thread.current
            signal.signal
          end
        end
      end

      mutex.synchronize do
        signal.wait(mutex)
      end
      while th = threads.shift
        th.join
      end
    end
  end
end
</pre>
This script makes many HTTP requests. For simplicity's sake, lets say it just makes the same request over and over again, but could easily be expanded to take a list of URLs, and to do something useful with the returned data. The script uses threads much like the CPU bound example, except that it is a bit more sophisticated in how it counts the work it has assigned to generated threads, and how it waits for all the threads to be completed.

This table shows timing from it in action. The target URL used was not local to the testing machine. Each run used the indicated number of threads to gather the URL, either a "fast" URL, with an over-the-net response speed of about 35 requests per second, or a "slow" URL with an over-the-net response speed of about 3 requests per second, 400 times. There were 100 runs completed. The numbers below are an average from those runs.
<table>
<caption>Worker Threads</caption>
<thead>
<tr>
<th>Request speed</th>
<th>35/second</th>
<th>3/second</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>6.53462668</td>
<td>61.1016239</td>
</tr>
<tr>
<td>2</td>
<td>3.34861606</td>
<td>30.4514539</td>
</tr>
<tr>
<td>5</td>
<td>1.38942396</td>
<td>12.1620945</td>
</tr>
<tr>
<td>10</td>
<td>0.72804622</td>
<td>6.0968646</td>
</tr>
<tr>
<td>20</td>
<td>0.47964698</td>
<td>3.0411382</td>
</tr>
</tbody>
</table>
Just a glance at these numbers clearly shows that Ruby threads are a big help with an IO bound activity like this. The relationship between number of threads and reduction in time to complete the task is not linear; but even with up to 20 threads there is a significant benefit to additional numbers of threads.  The benefit is more linear, and evident for slower requests because the requests spend more time waiting on IO, and less on CPU bound activities.

There are some caveats to be aware of with regard to Ruby threads.  First, even though they are green threads, as soon as one starts sharing resources between threads, threading becomes something that can be hard to get right. Share as little as possible, thoroughly think through your code, and use tests to support your reasoning, because threading problems can be hard to diagnose and solve.

Second, MRI 1.8 has a limit on the number of threads that it will manage. As a consequence of how the internals are implemented, this means that on most systems (notably excluding win32 systems), total thread count is limited to 1024. Also, because of the way it is implemented, the overhead increases to manage a larger number of threads versus smaller. Each thread consumes a significant amount of memory, so do not go crazy with threads or it will backfire on you.

Third, because of the way that Ruby threading is implemented, it is possible for a C extension to Ruby to take control of the process and prevent Ruby from allowing context switches to other threads. It is possible to write extensions so that they do not do this, but many are not written in this way. Where this bites most people, is with code that interacts with a database. One can reasonably look at a database query as an IO bound activity -- all the Ruby process is really doing is sending a request to the DB and waiting for a response. However, most DB interaction libraries are implemented as C extensions, and some of them do not play well with Ruby threads. One of the most common offenders is Mysql-Ruby. It will block all of Ruby while waiting for the result from a long running query. This means that a long running query will block the whole process until it returns. On the other hand, Ruby-PG, the driver for Postgres, will context switch within <code>pgconn_block()</code>, the function that makes blocking calls to the database, thus permitting other MRI 1.8 threads to run even during a long running query.

Fourth, because MRI 1.8 threads are green threads, they all run inside the context of a single process and a single system thread. Thus, while they give the appearance of concurrency, there is actually only one thread running at once. This is okay, because it is the appearance of concurrency that matters. If you run <code>top</code> on your laptop or VM shell, you will see a large number of processes running on your system. This number will exceed the number of cores that you have by a large margin, but you rarely have to worry about which processes are actually running on one of the cores at any given time. Your kernel takes care of slicing up access to the CPU into fine enough grains that it appears that all the running processes are executing on a core at the same time (even though most of them probably are not actually running at any given time). Concurrency in computing doesn't strictly mean that two or more things are actually running at the same time. Rather, it means that there is an appearance that they are, and that one works with them on the assumption that they are, and lets the underlying scheduler deal with making reality fit that appearance.

An entire book could be written about concurrency in Ruby. I've just scratched the surface with this overview of process and thread based concurrency in Ruby. Hopefully this helped answer a few questions or suggested some techniques to consider. 

Future installments in this series will cover Ruby 1.9.x (which uses system threads as opposed to green threads), JRuby, Rubinius, and using event systems like EventMachine to handle concurrency. So stay tuned! There is a lot more coming soon!<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[<blockquote>In computer science, concurrency is a property of systems in which several computations  are executing simultaneously, and potentially interacting with each other. The computations may be executing on multiple cores in the same chip, preemptively time-shared threads on the same processor, or executed on physically separated processors.</blockquote>
-- <a href="http://en.wikipedia.org/wiki/Concurrency_%28computer_science%29">Wikipedia Concurrency article</a>

Simply put, concurrency is when you have more than one logical thread of execution occurring simultaneously, or at least appearing to occur simultaneously. When you write software that makes use of concurrency, you want your software to do two or more things at once.

The motivations for using concurrency are varied. Sometimes you may have architectural reasons for using concurrency -- your code makes more sense to you or is easier to write if you conceive it in more than one discretely executing unit. In other cases you may want to employ concurrency in order to make better use of the multiple cores that many modern computers have, enabling you to get better total throughput out of your code than you would have from a non-concurrent implementation.

Whatever the motivation for employing concurrency, the reality is that concurrency is a complex subject. There are many different ways to achieve concurrency in software, and they each have their own set of tradeoffs. Furthermore, if your platform is Ruby, your decisions about what kind of concurrency to employ will be influenced by the specific Ruby implementation you are targeting. Each provides a different set of concurrency options for you to consider.

This is the first installment in a new series of articles focusing on introducing and exploring the variety of concurrency options available in the Ruby ecosystem. Advantages and disadvantages will be discussed for each, and I'll leave you with a few examples of how you can leverage these different options in your code. It should be a fun subject to explore!

Concurrency is all about multitasking -- doing more than one thing at once. The building blocks of multitasking are processes, threads, and fibers. Each of these components is complex in itself, both because of the nuances in how they interact and can be combined, and because different platforms have variations in which capabilities they implement and in how they are implemented. Luckily, their overall description can be summarized in a useful way.

<strong>Processes</strong> are independent units of execution that generally share nothing with other processes, except for resources which are intended to be shared (such as shared memory segments, shared IO resources, or memory mapped files). Processes carry a lot of state information with them and have their own address spaces. Communication between them has to be through an interprocess communication mechanism provided by the platform that the processes are running in. Processes running on the same machine will be scheduled by the kernel, which will typically use some sort of time slicing algorithm to spread CPU usage of all running processes across the available cores.

<strong>Threads</strong> come in several different flavors, including kernel, user space, and green threads. On some platforms there are entities called light-weight processes that bring kernel threads into user space so they look somewhat like processes, but are less expensive. For our purposes, threads are contained within a process, and share the memory space and process state of the process with each other. Green threads differ in that they are not controlled or scheduled by the operating system. Rather, they are provided by the process itself. This has a portability advantage because it means that the threads will be available on every platform that the process can run on, and will work the same on each. The main disadvantage is that green threads, being managed by the process itself, are generally confined to sharing a single core, and are limited to the peculiarities of the process's threading implementation (which may vary substantially from the platform's own threading implementation). Regardless of the type of threading, context switching with threads is generally faster than it is with processes.

<strong>Fibers</strong> are like user space threads, except the operating system doesn't handle scheduling for them. Instead, fibers must be explicitly yielded to allow other fibers to run. This can have performance advantages like the reduction of system scheduling overhead. Since multitasking with fibers is cooperative, the need to use locks on shared resources is reduced or eliminated. Programmers can also leverage fibers to their advantage with IO operations by allowing other things to run while waiting for a slow or blocking IO operation.

Ruby concurrency isn't quite as simple as selecting one of the above and using it, however. In the beginning, there was just <strong>Ruby</strong>, a single implementation that everyone used. This Ruby implementation, now commonly called the Matz Ruby Implmenetation (MRI), saw a widespread usage explosion with the 1.8.x version. It's pretty old now. This is from the <a href="ftp://ruby-lang.org/pub/ruby/1.8">ftp://ruby-lang.org</a> FTP server:
<pre>carbon:/home/ftp/pub/ruby/1.8$ ls -la | grep ruby-1.8.0
-rw-rw-r--  1 root     ftp   1979070 Aug  4  2003 ruby-1.8.0.tar.gz</pre>
So, it has been around for a while, and offers a good starting point for discussing concurrency in Ruby.

MRI Ruby 1.8.x supports concurrency in a few ways. One of the first things newcomers to Ruby leap for are its threads. Depending on the language these newcomers were familiar with before arriving at Ruby, they may be in for a surprise. MRI Ruby 1.8.x provides a green thread implementation. As mentioned above, green threads do not make use of any threading system native to the platform. Instead, 1.8.x's threads are implemented within the interpreter itself. This leads to threads behaving consistently across any platform the interpreter runs on. Because they are green threads, however, they offer no advantages for CPU bound tasks.

<strong>cpu_bound_threads.rb</strong>
<pre>require 'benchmark'
threads = []
thread_count = ARGV[0].to_i
iterations = ARGV[1].to_i
increment = iterations / thread_count.to_f
sum = 0

Benchmark.bm do |bm|
  bm.report do
    thread_count.times do |counter|
      threads &lt;&lt; Thread.new do
        my_sum = 0
        queue = (1 + (increment * counter).to_i)..(0 + (increment * (counter + 1)).to_i)
        queue.each do |x|
          my_sum += x
        end
        Thread.current[:sum] = my_sum
      end
    end

    threads.each {|thread| thread.join; sum += thread[:sum]}

    puts "The sum of #{iterations} is #{sum}"

  end
end
</pre>
This is a simple program that takes a large range of numbers, divides them into smaller ranges, and hands each smaller range to a thread that calculates the sum of the range it was given. The results from each individual thread are then added together to arrive at a final answer.

All examples ran on an 8 core Linux machine. The numbers below are an average of the results of 100 runs for each set of inputs.
<table>
<caption>Threads</caption>
<thead>
<tr>
<th>Iterations</th>
<th>50000</th>
<th>500000</th>
<th>5000000</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>0.01730298</td>
<td>0.17149276</td>
<td>1.70610744</td>
</tr>
<tr>
<td>2</td>
<td>0.01724724</td>
<td>0.17179465</td>
<td>1.70557474</td>
</tr>
<tr>
<td>4</td>
<td>0.01729293</td>
<td>0.17181384</td>
<td>1.70570264</td>
</tr>
<tr>
<td>8</td>
<td>0.01741591</td>
<td>0.17210276</td>
<td>1.71201153</td>
</tr>
</tbody>
</table>
As demonstrated by the numbers, MRI 1.8 threads are absolutely no help at all for a CPU bound application. In fact, there is a small but measurable cost to the overhead of managing them that is apparent in the numbers. As thread count increased, timing consistently and measurably slowed. If you are an MRI 1.8 user, do not despair; threads are but one concurrency option available to you.

An option that will better serve you for CPU bound tasks is process based concurrency. The idea is simple. In order to leverage multiple cores/CPUs, just create more than one process to handle the work load. Ruby provides a <code>fork()</code> method call which, on platforms that support it using the underlying <code>fork()</code> call from the C standard library. This will create a new process, with a new process ID, that can be considered an exact copy of the parent process, except that its resource allocations will be reset to 0.

Since processes do not share memory spaces, you must utilize another system provided communication mechanism in order to pass work to or from processes; this avoids the potential pitfalls that arise when trying to correctly manage locks on shared resources, but it does force one to think more specifically about exactly how to achieve communication.

<strong>cpu_bound_processes.rb</strong>
<pre>require 'benchmark'
processes = []
process_count = ARGV[0].to_i
iterations = ARGV[1].to_i
increment = iterations / process_count.to_f
sum = 0

def in_subprocess
  from_subprocess, to_parent = IO.pipe

  pid = fork do
    from_subprocess.close
    r = yield
    to_parent.puts [Marshal.dump(r)].pack("m")
    exit!
  end

  to_parent.close
  [pid,from_subprocess]
end

def get_result_from_subprocess(pid, from_subprocess)
  r = from_subprocess.read
  from_subprocess.close
  Process.waitpid(pid)
  Marshal.load(r.unpack("m")[0])
end

Benchmark.bm do |bm|
  bm.report do
    process_count.times do |counter|
      processes &lt;&lt; in_subprocess do
        my_sum = 0
        queue = (1 + (increment * counter).to_i)..(0 + (increment * (counter + 1)).to_i)
        queue.each do |x|
          my_sum += x
        end
        my_sum
      end
    end

   processes.each {|process| sum += get_result_from_subprocess(*process)}

   puts "The sum of #{iterations} is #{sum}"

  end
end
</pre>
In this example I used IO pipes to send data from the master process to the children, and to receive data from the children, back into the master.

As earlier, testing was done on an 8 core linux machine, with 100 runs of each test. The program is equivalent to the threaded version, and was changed only as necessary to enable it to be used in a multiprocess model instead of a multithread model.
<table>
<caption>Worker Processes</caption>
<thead>
<tr>
<th>Iterations</th>
<th>50000</th>
<th>500000</th>
<th>5000000</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>0.01805432</td>
<td>0.17199047</td>
<td>1.70812685</td>
</tr>
<tr>
<td>2</td>
<td>0.0098329</td>
<td>0.08675517</td>
<td>0.85509328</td>
</tr>
<tr>
<td>4</td>
<td>0.00609409</td>
<td>0.0446612</td>
<td>0.43100698</td>
</tr>
<tr>
<td>8</td>
<td>0.00847991</td>
<td>0.05346145</td>
<td>0.25621009</td>
</tr>
</tbody>
</table>
Take a good look at these numbers. Everything moves in the correct direction, until you get to the 8 process column. Then timing slows for both the 50000 and 500000 iteration rows that are under the 4 process column. Do you have any theories as to why?

Processes are, in many ways, a great way to handle concurrency. One of their drawbacks, though, is that they are heavy structures. They can take up significant time and resources to create . Linux uses copy-on-write semantics when creating forked processes. This means it doesn't actually duplicate the address space of the forked process until pages in that space start changing. Then, it duplicates what changes. This means that forked processes on Linux can be created fairly quickly. However, MRI 1.8 is not very friendly to copy-on-write semantics.

If you are unfamiliar with the way memory is managed and garbage is collected in MRI 1.8, you should check out <a href="http://www.engineyard.com/blog/2010/mri-memory-allocation-a-primer-for-developers/">my article on MRI Memory Allocation</a>. One key aspect is that objects carry all of their status bits with them. This means that when the garbage collector scans the object space to find objects it can collect, it touches every object in the address space. For a process forked with copy-on-write semantics, this forces the kernel to make copies of all of those pages. This takes time, and largely negates the fast-creation benefit of copy-on-write forked processes.

The times for the lower iterations on the 8 thread test reveal a cost to this form of concurrency. The overhead associated with creating the forked processes overwhelms the performance gains from the division of labor when the work to be done is brief enough. This is a reality for any form of concurrency -- there is always a performance tax from some amount of overhead. That tax is just higher when spawning something heavy like a process. Keep this in mind when you explore concurrency options for your task.

These first two examples both represent CPU bound problems. Many real world problems are not CPU bound, though. Rather, they are IO bound issues. Because an IO bound problem has latencies imposed on it by something outside of the program itself, IO bound problems can provide an excellent case for using MRI 1.8's green threads to improve performance.

<strong>io_bound_threads.rb</strong>
<pre>require 'net/http'
require 'thread'
require 'benchmark'

def get_data(url)
  tries = 0
  response = nil
  if /^http/.match(url)
    m = /^http:\/\/([^\/]*)(.*)/.match(url)
    site = m[1]
    path = m[2]
    begin
      http = Net::HTTP.new(site)
      http.open_timeout = 30
      http.start {|h| response = h.get(path)}
    rescue Exception
      tries += 1
      retry if tries &lt; 5
    end
  end
  response.kind_of?(Array) ? response[1] : response.respond_to?(:body) ? response.body : ''
end

mutex = Mutex.new
signal = ConditionVariable.new
thread_count = ARGV[0].to_i
fetches = ARGV[1].to_i
url = ARGV[2]
threads = []
count = 0
active_threads = 0

Benchmark.bm do |bm|
  bm.report do
    while count &lt; fetches
      while count &lt; fetches &amp;&amp; active_threads &lt; thread_count
        mutex.synchronize do
          active_threads += 1
          count += 1
        end
        Thread.new do
          get_data(url)
          mutex.synchronize do
            active_threads -= 1
            threads &lt;&lt; Thread.current
            signal.signal
          end
        end
      end

      mutex.synchronize do
        signal.wait(mutex)
      end
      while th = threads.shift
        th.join
      end
    end
  end
end
</pre>
This script makes many HTTP requests. For simplicity's sake, lets say it just makes the same request over and over again, but could easily be expanded to take a list of URLs, and to do something useful with the returned data. The script uses threads much like the CPU bound example, except that it is a bit more sophisticated in how it counts the work it has assigned to generated threads, and how it waits for all the threads to be completed.

This table shows timing from it in action. The target URL used was not local to the testing machine. Each run used the indicated number of threads to gather the URL, either a "fast" URL, with an over-the-net response speed of about 35 requests per second, or a "slow" URL with an over-the-net response speed of about 3 requests per second, 400 times. There were 100 runs completed. The numbers below are an average from those runs.
<table>
<caption>Worker Threads</caption>
<thead>
<tr>
<th>Request speed</th>
<th>35/second</th>
<th>3/second</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>6.53462668</td>
<td>61.1016239</td>
</tr>
<tr>
<td>2</td>
<td>3.34861606</td>
<td>30.4514539</td>
</tr>
<tr>
<td>5</td>
<td>1.38942396</td>
<td>12.1620945</td>
</tr>
<tr>
<td>10</td>
<td>0.72804622</td>
<td>6.0968646</td>
</tr>
<tr>
<td>20</td>
<td>0.47964698</td>
<td>3.0411382</td>
</tr>
</tbody>
</table>
Just a glance at these numbers clearly shows that Ruby threads are a big help with an IO bound activity like this. The relationship between number of threads and reduction in time to complete the task is not linear; but even with up to 20 threads there is a significant benefit to additional numbers of threads.  The benefit is more linear, and evident for slower requests because the requests spend more time waiting on IO, and less on CPU bound activities.

There are some caveats to be aware of with regard to Ruby threads.  First, even though they are green threads, as soon as one starts sharing resources between threads, threading becomes something that can be hard to get right. Share as little as possible, thoroughly think through your code, and use tests to support your reasoning, because threading problems can be hard to diagnose and solve.

Second, MRI 1.8 has a limit on the number of threads that it will manage. As a consequence of how the internals are implemented, this means that on most systems (notably excluding win32 systems), total thread count is limited to 1024. Also, because of the way it is implemented, the overhead increases to manage a larger number of threads versus smaller. Each thread consumes a significant amount of memory, so do not go crazy with threads or it will backfire on you.

Third, because of the way that Ruby threading is implemented, it is possible for a C extension to Ruby to take control of the process and prevent Ruby from allowing context switches to other threads. It is possible to write extensions so that they do not do this, but many are not written in this way. Where this bites most people, is with code that interacts with a database. One can reasonably look at a database query as an IO bound activity -- all the Ruby process is really doing is sending a request to the DB and waiting for a response. However, most DB interaction libraries are implemented as C extensions, and some of them do not play well with Ruby threads. One of the most common offenders is Mysql-Ruby. It will block all of Ruby while waiting for the result from a long running query. This means that a long running query will block the whole process until it returns. On the other hand, Ruby-PG, the driver for Postgres, will context switch within <code>pgconn_block()</code>, the function that makes blocking calls to the database, thus permitting other MRI 1.8 threads to run even during a long running query.

Fourth, because MRI 1.8 threads are green threads, they all run inside the context of a single process and a single system thread. Thus, while they give the appearance of concurrency, there is actually only one thread running at once. This is okay, because it is the appearance of concurrency that matters. If you run <code>top</code> on your laptop or VM shell, you will see a large number of processes running on your system. This number will exceed the number of cores that you have by a large margin, but you rarely have to worry about which processes are actually running on one of the cores at any given time. Your kernel takes care of slicing up access to the CPU into fine enough grains that it appears that all the running processes are executing on a core at the same time (even though most of them probably are not actually running at any given time). Concurrency in computing doesn't strictly mean that two or more things are actually running at the same time. Rather, it means that there is an appearance that they are, and that one works with them on the assumption that they are, and lets the underlying scheduler deal with making reality fit that appearance.

An entire book could be written about concurrency in Ruby. I've just scratched the surface with this overview of process and thread based concurrency in Ruby. Hopefully this helped answer a few questions or suggested some techniques to consider. 

Future installments in this series will cover Ruby 1.9.x (which uses system threads as opposed to green threads), JRuby, Rubinius, and using event systems like EventMachine to handle concurrency. So stay tuned! There is a lot more coming soon!<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/SYY6sh--WXQ" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/concurrency-real-and-imagined-in-mri-threads/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/concurrency-real-and-imagined-in-mri-threads/</feedburner:origLink></item>
		<item>
		<title>The Boys and Girl of Summer</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/14z4gGUftO8/</link>
		<comments>http://www.engineyard.com/blog/2010/the-boys-and-girl-of-summer/#comments</comments>
		<pubDate>Fri, 06 Aug 2010 12:15:14 +0000</pubDate>
		<dc:creator>Tom Mornini</dc:creator>
				<category><![CDATA[News]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4294</guid>
		<description><![CDATA[Mark Twain once said "The coldest winter I ever saw was the summer I spent in San Francisco." While the micro-climate of SOMA (South of Market), the area where Engine Yard HQ is located, has considerably more sun than the famously foggy western side of town, the winds of change are definitely blowing through here this summer.

Tammer Saleh joined us 3 weeks ago as Director of Application Development. Most recently Tammer operated his own consultancy practice, and he is a well known and respected member of the Ruby community. He's already identified a number of quick wins that will continue the rapid fire development of AppCloud. While the AppCloud team has been absolutely killing it, I have confidence that Tammer's skills and techniques will further press the pace.

As we continue to grow, we felt the time was right to reaffirm our commitment to open source.  We don't want newcomers to the community to think we're a mere commercial entity, as opposed to the open source symbiote that long time members know us to be! Today I'm announcing two hires that will, I believe, make our commitment abundantly clear.

Dr. Nic Williams will be arriving from Australia to take the role of VP of Technology. His primary responsibility will be to organize and guide Engine Yard's open source efforts. He has already <a title="drnicwilliams.com" href="http://drnicwilliams.com/2010/08/04/coming-to-america/">blogged</a> about his pending move; perhaps I should have left off the Mark Twain quote? Thank you for your sacrifice, Mrs. Dr. Nic! Hopefully you and my wife, Elizabeth, will become fast friends! I find San Francisco to be a friendly and wonderful place to live and suspect you will too! :-)

Roger Levy will be joining us later in the month to oversee engineering, support and product management in his role as SVP of Products. Roger, who managed the SUSE Linux business at Novell, certainly has the open source experience and credentials to continue to reinforce Engine Yard's commitment to open source.

Finally, we've also added Sara Gardner as VP of Marketing and Steve Gross as VP of Business Development. There are so many things to inform the community about, and so many great companies to partner with, that Sara and Steve are already busy! I welcome them to Engine Yard and anxiously anticipate their unique contributions.

Startups that grow quickly place a LOT of stress on their founders and early employees. Many founders thrive on this, I know that Lance and I did. Finally, as a company grows, priorities and roles change. I won't argue with those who say that staying small is beautiful, as I agree with much of what has been said on that subject. We, however, chose a different path: go big or go home! :-)

Perhaps, then, it should not be a surprise that Ezra and his family have decided to move to Portland to be closer to family. I wish my good friend a very fond farewell, and a reduction of the stress that he has endured along with the rest of the founders and early employees of Engine Yard.

I cannot express how exciting and fulfilling it has been to steer Engine Yard over the last 4 years, from an early advocate of Ruby on Rails and its community, to the force that it is today. As the last remaining founder, I must admit that I'm very proud of what we have achieved during that time, both at Engine Yard and beyond! Lance Walley is now CEO of <a title="Chargify" href="http://chargify.com">Chargify</a>, an uber-cool recurring billing service. Jayson Vantuyl has created a successful consulting business and is up to <a title="ScatterBit" href="http://scatterbit.com">something sneaky</a> as well! And while only time will tell what Ezra shall choose to pursue next, I'm certain that additional success awaits him.

Finally, I'd like to close with something that I wish I could say every time I open my mouth: Thank you to our nearly 1,500 customers and all of my hard working, talented and dedicated employees. Perhaps the highest praise one person can offer another is "you make my dreams possible" and for making mine possible, I'll forever be grateful to each and every one of you. :-)<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[Mark Twain once said "The coldest winter I ever saw was the summer I spent in San Francisco." While the micro-climate of SOMA (South of Market), the area where Engine Yard HQ is located, has considerably more sun than the famously foggy western side of town, the winds of change are definitely blowing through here this summer.

Tammer Saleh joined us 3 weeks ago as Director of Application Development. Most recently Tammer operated his own consultancy practice, and he is a well known and respected member of the Ruby community. He's already identified a number of quick wins that will continue the rapid fire development of AppCloud. While the AppCloud team has been absolutely killing it, I have confidence that Tammer's skills and techniques will further press the pace.

As we continue to grow, we felt the time was right to reaffirm our commitment to open source.  We don't want newcomers to the community to think we're a mere commercial entity, as opposed to the open source symbiote that long time members know us to be! Today I'm announcing two hires that will, I believe, make our commitment abundantly clear.

Dr. Nic Williams will be arriving from Australia to take the role of VP of Technology. His primary responsibility will be to organize and guide Engine Yard's open source efforts. He has already <a title="drnicwilliams.com" href="http://drnicwilliams.com/2010/08/04/coming-to-america/">blogged</a> about his pending move; perhaps I should have left off the Mark Twain quote? Thank you for your sacrifice, Mrs. Dr. Nic! Hopefully you and my wife, Elizabeth, will become fast friends! I find San Francisco to be a friendly and wonderful place to live and suspect you will too! :-)

Roger Levy will be joining us later in the month to oversee engineering, support and product management in his role as SVP of Products. Roger, who managed the SUSE Linux business at Novell, certainly has the open source experience and credentials to continue to reinforce Engine Yard's commitment to open source.

Finally, we've also added Sara Gardner as VP of Marketing and Steve Gross as VP of Business Development. There are so many things to inform the community about, and so many great companies to partner with, that Sara and Steve are already busy! I welcome them to Engine Yard and anxiously anticipate their unique contributions.

Startups that grow quickly place a LOT of stress on their founders and early employees. Many founders thrive on this, I know that Lance and I did. Finally, as a company grows, priorities and roles change. I won't argue with those who say that staying small is beautiful, as I agree with much of what has been said on that subject. We, however, chose a different path: go big or go home! :-)

Perhaps, then, it should not be a surprise that Ezra and his family have decided to move to Portland to be closer to family. I wish my good friend a very fond farewell, and a reduction of the stress that he has endured along with the rest of the founders and early employees of Engine Yard.

I cannot express how exciting and fulfilling it has been to steer Engine Yard over the last 4 years, from an early advocate of Ruby on Rails and its community, to the force that it is today. As the last remaining founder, I must admit that I'm very proud of what we have achieved during that time, both at Engine Yard and beyond! Lance Walley is now CEO of <a title="Chargify" href="http://chargify.com">Chargify</a>, an uber-cool recurring billing service. Jayson Vantuyl has created a successful consulting business and is up to <a title="ScatterBit" href="http://scatterbit.com">something sneaky</a> as well! And while only time will tell what Ezra shall choose to pursue next, I'm certain that additional success awaits him.

Finally, I'd like to close with something that I wish I could say every time I open my mouth: Thank you to our nearly 1,500 customers and all of my hard working, talented and dedicated employees. Perhaps the highest praise one person can offer another is "you make my dreams possible" and for making mine possible, I'll forever be grateful to each and every one of you. :-)<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/14z4gGUftO8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/the-boys-and-girl-of-summer/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/the-boys-and-girl-of-summer/</feedburner:origLink></item>
		<item>
		<title>Monitoring Memory with JRuby, Part 1: jhat and VisualVM</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/8X9Y0N6XsCM/</link>
		<comments>http://www.engineyard.com/blog/2010/monitoring-memory-with-jruby-part-1-jhat-and-visualvm/#comments</comments>
		<pubDate>Wed, 04 Aug 2010 11:27:20 +0000</pubDate>
		<dc:creator>Charles Oliver Nutter</dc:creator>
				<category><![CDATA[Technology]]></category>
		<category><![CDATA[JRuby]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4249</guid>
		<description><![CDATA[There's been a lot of fuss made lately over memory inspection and profiling tools for Ruby implementations. And it's not without reason; inspecting a Ruby application's memory profile, much less diagnosing problems, has traditionally been very difficult. At least, difficult if you don't use JRuby.

Because JRuby runs on the JVM, we benefit from the dozens of tools that have been written for the JVM. Among these tools are numerous memory inspection, profiling, and reporting tools, some built into the JDK itself. Want a heap dump? Check out the jmap (Java memory map) and jhat (Java heap analysis tool) shipped with Hotspot-based JVMs (Sun, OpenJDK). Looking for a bit more? There's the Memory Analysis Tool based on Eclipse, the YourKit memory and CPU profiling app, VisualVM, now also shipped with Hotspot JVMs...and many more. There's literally dozens of these tools, and they provide just about everything you can imagine for investigating memory.

In this post, I'll show how you can use two of these tools: VisualVM, a simple, graphical tool for exploring a <b>running</b> JVM; and the jmap/jhat combination, which allows you to dump the memory heap to disk for inspection offline.

<strong>Getting JRuby Prepared</strong>

All these tools work with any version of JRuby, but as part of JRuby 1.6 development I've been adding some enhancements. Specifically, I've made some modifications that allow Ruby objects to show up side-by-side with Java objects in memory profiles. A little explanation is in order.

In JRuby, all the core classes are represented by "native" Java classes. Object is represented by org.jruby.RubyObject, String is org.jruby.RubyString, and so on. Normally, if you extend one of the core classes, we don't actually create a new "native" class to represent it; instead, all user-created classes that extend Object simply show up as RubyObject in memory. This is still incredibly useful; you can look into RubyObject and see the metaClass field, which indicates the actual Ruby type.

Let's see what that looks like, so we know where we're starting from. We'll run a simple script that creates a custom class, instantiates and saves 10000 instances of it, and then sleeps.

<pre>~/projects/jruby ➔ cat foo_heap_example.rb 
class Foo
end

ary = []
10000.times { ary &lt;&lt; Foo.new }

puts "ready for analysis!"
sleep

~/projects/jruby ➔ jruby foo_heap_example.rb 
ready for analysis!</pre>

So we have our test subject ready to go. To use the jmap tool, we need the pid of this process. Of course we can use the usual shell tricks to get it, but the JDK comes with a nice tool for finding all JVM pids active on the system: jps

<pre>~/projects/jruby ➔ jps -l
52862 sun.tools.jps.Jps
52857 org/jruby/Main
48716 com.sun.enterprise.glassfish.bootstrap.ASMain
</pre>

From this, you can see I have three JVMs running on my system right now: jps itself; our JRuby instance; and a GlassFish server I used for testing earlier today. We're interested in the JRuby instance, pid 52857. Let's see what jmap can do with that.

<pre>~/projects/jruby ➔ jmap
Usage:
    jmap [option] &lt;pid&gt;
        (to connect to running process)
    jmap [option] &lt;executable &lt;core&gt;
        (to connect to a core file)
    jmap [option] [server_id@]&lt;remote server IP or hostname&gt;
        (to connect to remote debug server)

where &lt;option&gt; is one of:
    &lt;none&gt;               to print same info as Solaris pmap
    -heap                to print java heap summary
    -histo[:live]        to print histogram of java object heap; if the "live"
                         suboption is specified, only count live objects
    -permstat            to print permanent generation statistics
    -finalizerinfo       to print information on objects awaiting finalization
    -dump:&lt;dump-options&gt; to dump java heap in hprof binary format
                         dump-options:
                           live         dump only live objects; if not specified,
                                        all objects in the heap are dumped.
                           format=b     binary format
                           file=&lt;file&gt;  dump heap to &lt;file&gt;
                         Example: jmap -dump:live,format=b,file=heap.bin &lt;pid&gt;
    -F                   force. Use with -dump:&lt;dump-options&gt; &lt;pid&gt; or -histo
                         to force a heap dump or histogram when &lt;pid&gt; does not
                         respond. The "live" suboption is not supported
                         in this mode.
    -h | -help           to print this help message
    -J&lt;flag&gt;             to pass &lt;flag&gt; directly to the runtime system

&lt;</pre>

The simplest option here is -histo, to print out a histogram of the objects on the heap. Let's run that against our JRuby instance.

<pre>~/projects/jruby ➔ jmap -histo:live 52857

 num     #instances         #bytes  class name
----------------------------------------------
   1:         22677        3192816  &lt;constMethodKlass&gt;
   2:         22677        1816952  &lt;methodKlass&gt;
   3:         35089        1492992  &lt;symbolKlass&gt;
   4:          2860        1389352  &lt;instanceKlassKlass&gt;
   5:          2860        1193536  &lt;constantPoolKlass&gt;
   6:          2798         739264  &lt;constantPoolCacheKlass&gt;
   7:          5861         465408  [B
   8:          5399         298120  [C
   9:          3042         292032  java.lang.Class
  10:          4037         261712  [S
  11:         10002         240048  org.jruby.RubyObject
  12:          3994         179928  [[I
  13:          5474         131376  java.lang.String
  14:          1661          95912  [I
...</pre>

The resulting output is a listing of literally <span style="font-weight:bold">every</span> object in the system...not just Ruby objects even! The value of this should be apparent; not only can you start to investigate the memory overhead of code you've written, you'll also be able to investigate the memory overhead of every library and every piece of code running in the same process, right down to byte arrays (the "[B" above) and "native" Java strings ("java.lang.String" above). And so far we haven't had to do anything special to JRuby. Nice, eh?

So, back to the matter at hand: the Foo class from our example. Where is it?

Well, the answer is that it's right there; 10000 of those 10002 org.jruby.RubyObject instances are our Foo objects; the other two are probably objects constructed for JRuby runtime purposes. But obviously, there's nothing in this output that tells us how to find our Foo instances. This is what I'm remedying in JRuby 1.6.

On JRuby master, there's now a flag you can pass that will stand up a JVM class for every user-created Ruby class. Among the many benefits of doing this, we also get a more useful profile. Let's see how to use the flag (which will either be default or very easy to access by the time we release JRuby 1.6).

<pre>~/projects/jruby ➔ jruby -J-Djruby.reify.classes=true foo_heap_example.rb 
ready for analysis!</pre>

If we run jmap against this new instance, we see a more interesting result.

<pre> num     #instances         #bytes  class name
----------------------------------------------
   1:         22677        3192816  &lt;constMethodKlass&gt;
   2:         22677        1816952  &lt;methodKlass&gt;
   3:         35089        1492992  &lt;symbolKlass&gt;
   4:          2860        1389352  &lt;instanceKlassKlass&gt;
   5:          2860        1193536  &lt;constantPoolKlass&gt;
   6:          2798         739264  &lt;constantPoolCacheKlass&gt;
   7:          5863         465456  [B
   8:          5401         298208  [C
   9:          3042         292032  java.lang.Class
  10:          4037         261712  [S
<b>  11:         10000         240000  ruby.Foo</b>
  12:          3994         179928  [[I
  13:          5476         131424  java.lang.String
  14:          1661          95912  [I</pre>

A-ha! There's our Foo instances! The "reify classes" option generates a JVM class of the same name as the Ruby class, prefixed by "ruby." to separate it from other JVM classes. Now we can start to see the real power of the tools, and we're just at the beginning. Let's see what a simple Rails application looks like.

<pre>~/projects/jruby ➔ jmap -histo:live 52926 | grep " ruby."
  29:         11685         280440  ruby.TZInfo.TimezoneTransitionInfo
  97:           970          23280  ruby.Gem.Version
  98:           914          21936  ruby.Gem.Requirement
 122:           592          14208  ruby.TZInfo.TimezoneOffsetInfo
 138:           382           9168  ruby.Gem.Dependency
 159:           265           6360  ruby.Gem.Specification
 201:           142           3408  ruby.ActiveSupport.TimeZone
 205:           118           2832  ruby.TZInfo.DataTimezoneInfo
 206:           118           2832  ruby.TZInfo.DataTimezone
 273:            41            984  ruby.Gem.Platform
 383:            14            336  ruby.Mime.Type
 403:            13            312  ruby.Set
 467:             8            192  ruby.ActionController.MiddlewareStack.Middleware
 476:             8            192  ruby.ActionView.Template
 487:             7            168  ruby.ActionController.Routing.DividerSegment
 508:             6            144  ruby.TZInfo.LinkedTimezoneInfo
 523:             6            144  ruby.TZInfo.LinkedTimezone
 810:             4             96  ruby.ActionController.Routing.DynamicSegment
2291:             2             48  ruby.ActionController.Routing.Route
2292:             2             48  ruby.I18n.Config
2293:             2             48  ruby.ActiveSupport.Deprecation.DeprecatedConstantProxy
2298:             2             48  ruby.ActionController.Routing.ControllerSegment
...</pre>

This time I've opted to grep out just the "ruby." items in the histogram, and the results are pretty impressive! We can see the baffling fact that there's 970 instance of Gem::Version, using at least 23280 bytes of memory. We can see the even more depressing fact that there's 11685 <span style="font-weight:bold">live</span> instances of TZInfo::TimezoneTransitionInfo, using at least 280440 bytes.

Now that we're getting useful data, let's look at the first of our tools in more detail: jmap and jhat.

<strong>jmap and jhat</strong>

As you might guess, I do a lot of profiling in the process of developing JRuby. I've used probably a dozen different tools at different times. But the first tool I always reach for is the jmap/jhat combination.

You've seen the simple case of using jmap above, generating a histogram of the live heap. Let's take a look at an offline heap dump.

<pre>~/projects/jruby ➔ jmap -dump:live,format=b,file=heap.bin 52926
Dumping heap to /Users/headius/projects/jruby/heap.bin ...
Heap dump file created</pre>

That's how easy it is! The binary dump in heap.bin is supported by several tools: jhat (obviously), VisualVM, the Eclipse Memory Analysis Tool, and others. It's not officially a "standard" format, but it hasn't changed in a long time. Let's have a look at jhat options.

<pre>~/projects/jruby ➔ jhat
ERROR: No arguments supplied
Usage:  jhat [-stack &lt;bool&gt;] [-refs &lt;bool&gt;] [-port &lt;port&gt;] [-baseline &lt;file&gt;] [-debug &lt;int&gt;] [-version] [-h|-help] &lt;file&gt;

 -J&lt;flag&gt;          Pass &lt;flag&gt; directly to the runtime system. For
     example, -J-mx512m to use a maximum heap size of 512MB
 -stack false:     Turn off tracking object allocation call stack.
 -refs false:      Turn off tracking of references to objects
 -port &lt;port&gt;:     Set the port for the HTTP server.  Defaults to 7000
 -exclude &lt;file&gt;:  Specify a file that lists data members that should
     be excluded from the reachableFrom query.
 -baseline &lt;file&gt;: Specify a baseline object dump.  Objects in
     both heap dumps with the same ID and same class will
     be marked as not being "new".
 -debug &lt;int&gt;:     Set debug level.
       0:  No debug output
       1:  Debug hprof file parsing
       2:  Debug hprof file parsing, no server
 -version          Report version number
 -h|-help          Print this help and exit
 &lt;file&gt;            The file to read

For a dump file that contains multiple heap dumps,
you may specify which dump in the file
by appending "#&lt;number&gt;" to the file name, i.e. "foo.hprof#3".

All boolean options default to "true"</pre>

Generally you can just point jhat at a heap dump and away it goes. Occasionally if the heap is large, you may need to use the -J option to increase the maximum heap size of the JVM jhat runs in. Since we're running a Rails app, we'll bump the heap up a little bit.

<pre>~/projects/jruby ➔ jhat -J-Xmx200M heap.bin
Reading from heap.bin...
Dump file created Fri Jul 09 02:07:46 CDT 2010
Snapshot read, resolving...
Resolving 604115 objects...
[much verbose logging elided for brevity]

Chasing references, expect 120 dots........................................................................................................................
Eliminating duplicate references........................................................................................................................
Snapshot resolved.
Started HTTP server on port 7000
Server is ready.</pre>

"Server is ready"? Damn you Java people! Does everything have to be a server with you?

In this case, it's actually an incredibly useful tool. jhat starts up a small web application on port 7000 that allows you to click through the dump file. Let's see what that looks like.

<a href="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbM3T7q-lI/AAAAAAAAADg/RZpaIJpL86k/s1600/Screen+shot+2010-07-09+at+2.15.35+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 335px" src="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbM3T7q-lI/AAAAAAAAADg/RZpaIJpL86k/s400/Screen+shot+2010-07-09+at+2.15.35+AM.png" border="0" /></a>

Here's the front page of the tool. We see a listing of all JVM classes in the system. If you scroll to the bottom, there's a few more general functions.

<a href="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbNTB2gDTI/AAAAAAAAADo/LLJAmpUJiIM/s1600/Screen+shot+2010-07-09+at+2.18.13+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 177px" src="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbNTB2gDTI/AAAAAAAAADo/LLJAmpUJiIM/s400/Screen+shot+2010-07-09+at+2.18.13+AM.png" border="0" /></a>

Let's go with what we know and view the heap histogram again.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbNz_OCMFI/AAAAAAAAADw/hqej4bZxrzA/s1600/Screen+shot+2010-07-09+at+2.20.09+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 269px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbNz_OCMFI/AAAAAAAAADw/hqej4bZxrzA/s400/Screen+shot+2010-07-09+at+2.20.09+AM.png" border="0" /></a>

Here we can see that there's lots of objects taking up memory, and they're a mix of JVM-native types, JRuby implementation classes, and actual Ruby classes. In fact, here we can see our friend TZInfo::TimezoneTransitionInfo again. Let's click through.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbOgzZ653I/AAAAAAAAAD4/DUpJF2xtB3Q/s1600/Screen+shot+2010-07-09+at+2.23.16+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 328px;height: 400px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbOgzZ653I/AAAAAAAAAD4/DUpJF2xtB3Q/s400/Screen+shot+2010-07-09+at+2.23.16+AM.png" border="0" /></a>

Pretty mundane stuff so far; basically just information about the class itself. But you see at the bottom of this screenshot that we can go from here to viewing all instances of TimezoneTransitionInfo. Let's try that.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbO_ofMtbI/AAAAAAAAAEA/D0lMZsk1EgQ/s1600/Screen+shot+2010-07-09+at+2.25.20+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 380px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbO_ofMtbI/AAAAAAAAAEA/D0lMZsk1EgQ/s400/Screen+shot+2010-07-09+at+2.25.20+AM.png" border="0" /></a>

Ahh, that's more like it! Now we can see that there's a heck of a lot of these things floating around. Let's investigate a bit more and click through the first instance.

<a href="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbPW_EcOlI/AAAAAAAAAEI/MujaHdUTDrI/s1600/Screen+shot+2010-07-09+at+2.26.53+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 380px" src="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbPW_EcOlI/AAAAAAAAAEI/MujaHdUTDrI/s400/Screen+shot+2010-07-09+at+2.26.53+AM.png" border="0" /></a>

Now this is some cool stuff!

We can see that the JVM class generated for TimezoneTransitionInfo has three fields: metaClass, which points at the Ruby Class object; varTable, which is an array of Object references used for instance variables and other "internal" variables; and a flags field containing runtime flags for the object, like whether it's frozen, tainted, and so on. We can see that this object has no special flags set, and we can dig deeper into those fields if we like. We'll skip that today.

Moving further down, we see a few more amazing links. First, there's a list of all references to this object. Ahh, now we can start to investigate why they're staying in memory, even though we're not using them. We can even have jhat show us the full chains of references keeping these objects alive; a series of objects leading all the way back to one "rooted" by a thread or by global JVM state. And we can explore the other direction as well, walking all objects reachable from this one.

This is only a small part of what you can do with jmap and jhat, and they're so simple to use it feels almost criminal. But what if we want to inspect an application while it's running? Dumping heaps and analyzing them offline can tell you much of the story, but sometimes you just want to see the objects coming and going yourself. Let's move on to VisualVM.

<strong>VisualVM</strong>

VisualVM spawned out of the NetBeans profiling tools. One of the biggest complaints about the JVMs of old were that all the built-in tooling seemed to be designed for JVM engineers alone. Because Sun had the foresight to build and own their own IDE and related modules, it eventually became a natural fit to pull out the profiling tools for use by everyone. And so VisualVM was born.

On most systems with Java 6 installed, you should have a "jvisualvm" command. Let's run it now.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbRjfKhrtI/AAAAAAAAAEQ/_vOOoMds6ec/s1600/Screen+shot+2010-07-09+at+2.36.21+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 263px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbRjfKhrtI/AAAAAAAAAEQ/_vOOoMds6ec/s400/Screen+shot+2010-07-09+at+2.36.21+AM.png" border="0" /></a>

When you start up VisualVM, you're presented with a list of running JVMs, similar to using the 'jps' command. You can also connect to remote machines, browse offline heap and core dump files, and look through memory and CPU profiling snapshots from previous runs. Today, we'll just open up our running Rails app and see what we can see.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbSJZbUp9I/AAAAAAAAAEY/mGwMpr2mMrA/s1600/Screen+shot+2010-07-09+at+2.38.59+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 229px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbSJZbUp9I/AAAAAAAAAEY/mGwMpr2mMrA/s400/Screen+shot+2010-07-09+at+2.38.59+AM.png" border="0" /></a>

VisualVM connects to the running process and brings up a basic information pane with process information, JVM information, and so on. We're interested in monitoring heap usage, so let's move to the "Monitor" tab.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbSmuGkczI/AAAAAAAAAEg/jQ6lizETlk4/s1600/Screen+shot+2010-07-09+at+2.40.55+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 227px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbSmuGkczI/AAAAAAAAAEg/jQ6lizETlk4/s400/Screen+shot+2010-07-09+at+2.40.55+AM.png" border="0" /></a>

Already we're getting some useful information. This view shows CPU usage (currently zero, since it's an idle Rails app), Heap usage over time, and the number of JVM classes and threads that are active. We can trigger a full GC, if we'd like to tidy things up before we start poking around. But most importantly, we can do the jmap/jhat dance in one step, by clicking the Heap Dump button. Tantalizing, isn't it?

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbTPcZVqoI/AAAAAAAAAEo/w4Bt_TiP7-U/s1600/Screen+shot+2010-07-09+at+2.43.36+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 228px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbTPcZVqoI/AAAAAAAAAEo/w4Bt_TiP7-U/s400/Screen+shot+2010-07-09+at+2.43.36+AM.png" border="0" /></a>

Initially, we see a basic summary of the heap: total size, number of classes and GC roots, and so on. We're looking for our friend TimezoneTransitionInfo, so let's look for it in the "Classes" pane.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbUOd8sliI/AAAAAAAAAE4/peOO8-U1SVw/s1600/Screen+shot+2010-07-09+at+2.47.52+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 196px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbUOd8sliI/AAAAAAAAAE4/peOO8-U1SVw/s400/Screen+shot+2010-07-09+at+2.47.52+AM.png" border="0" /></a>

Ahh, there it is, just a little ways down the list. The counts are as we expect, so let's double-click and dig a bit deeper.

<a href="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbUcM5s1vI/AAAAAAAAAFA/HMDT88mRIBs/s1600/Screen+shot+2010-07-09+at+2.48.41+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 176px" src="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbUcM5s1vI/AAAAAAAAAFA/HMDT88mRIBs/s400/Screen+shot+2010-07-09+at+2.48.41+AM.png" border="0" /></a>

Here we have a lot of the same information about object instances that we did with jhat, but presented in a much richer format. Almost everything is active; you can jump around the heap and do analysis that would take a lot of manual work very easily. Let's try another tool: the Retained Size calculator.

Because our JVM tools see all objects equally, the reported size for a Ruby object on the heap is only part of the story. There's also the variable table, the object's instance variables, and objects they reference to consider. Let's jump to a different object now, Gem::Version.

We don't want to have to scroll through the list of classes to find ruby.Gem.Version, so let's make use of the Object Query Language console. With the OQL console, you can write SQL-like queries to retrieve listings of objects in the heap. We'll search for all instances of ruby.Gem.Version.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbVuq380RI/AAAAAAAAAFI/aXCwt3BRsWU/s1600/Screen+shot+2010-07-09+at+2.53.44+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 261px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbVuq380RI/AAAAAAAAAFI/aXCwt3BRsWU/s400/Screen+shot+2010-07-09+at+2.53.44+AM.png" border="0" /></a>

The query runs and we get a listing of Gem::Version objects. Let's dig deeper and see how much retained memory each Version object is keeping alive.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbWHvnwVEI/AAAAAAAAAFQ/q4X3F07ZvTc/s1600/Screen+shot+2010-07-09+at+2.55.53+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 162px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbWHvnwVEI/AAAAAAAAAFQ/q4X3F07ZvTc/s400/Screen+shot+2010-07-09+at+2.55.53+AM.png" border="0" /></a>

Clicking on the "Compute Retained Sizes" link in the "Instances" pane prompts us with this dialog. We're tough...we can take it.

<a href="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbWaK5bGOI/AAAAAAAAAFY/AnidY4PVb9M/s1600/Screen+shot+2010-07-09+at+2.57.08+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 348px;height: 106px" src="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbWaK5bGOI/AAAAAAAAAFY/AnidY4PVb9M/s400/Screen+shot+2010-07-09+at+2.57.08+AM.png" border="0" /></a>

Reticulating splines...

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDcBUC7ML_I/AAAAAAAAAFo/MYGEKCqzU2U/s1600/Screen+shot+2010-07-09+at+2.59.20+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 190px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDcBUC7ML_I/AAAAAAAAAFo/MYGEKCqzU2U/s400/Screen+shot+2010-07-09+at+2.59.20+AM.png" border="0" /></a>

So it looks like each of the Version objects take from 125 to 190 bytes for a total of 19400 bytes, most of which is from the variable table. What's in there?

<a href="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbXNQMb4nI/AAAAAAAAAFg/yWfkrni0snI/s1600/Screen+shot+2010-07-09+at+3.00.31+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 77px" src="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbXNQMb4nI/AAAAAAAAAFg/yWfkrni0snI/s400/Screen+shot+2010-07-09+at+3.00.31+AM.png" border="0" /></a>

Ahh...looks like there's a String and an Array. And of course we can poke around the heap ad infinatum, into and out of "native" JRuby and JVM classes, and truly get a complete picture of what our running applications look like. Now you're playing with power.

<strong>Your Turn</strong>

This is obviously only the tip of the iceberg. Tools like Eclipse Memory Analysis Tool include features for detecting leaks; VisualVM and NetBeans both allow you to turn on allocation tracing, to show <span style="font-weight:bold">where</span> in your code all those objects are being created. There's tools for monitoring live GC behavior, and many of these tools even allow you to dig into a running heap and <span style="font-weight:bold">modify live objects</span>. If you can dream it, there's a tool that can do it. And you get all that for free by using JRuby.

If you'd like to play with this, it all works with JRuby 1.5.1 but you won't get the nice JVM classes for Ruby classes. For that, you can pull and build JRuby master, download a 1.6.0.dev snapshot, or just wait for JRuby 1.6. And if you do play with these or other tools, I hope you'll let us know and blog about your experience!

In the future, I'll try to show some of the other tools plus some of the CPU profiling capabilities they bring to the table. For now, rest assured that if you're using JRuby, you really do have the best tools available to you.

<div class="note">

This article was originally <a href="http://blog.headius.com/2010/07/browsing-memory-jruby-way.html">published</a> on Charles Nutter's blog Headius.

</div><p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[There's been a lot of fuss made lately over memory inspection and profiling tools for Ruby implementations. And it's not without reason; inspecting a Ruby application's memory profile, much less diagnosing problems, has traditionally been very difficult. At least, difficult if you don't use JRuby.

Because JRuby runs on the JVM, we benefit from the dozens of tools that have been written for the JVM. Among these tools are numerous memory inspection, profiling, and reporting tools, some built into the JDK itself. Want a heap dump? Check out the jmap (Java memory map) and jhat (Java heap analysis tool) shipped with Hotspot-based JVMs (Sun, OpenJDK). Looking for a bit more? There's the Memory Analysis Tool based on Eclipse, the YourKit memory and CPU profiling app, VisualVM, now also shipped with Hotspot JVMs...and many more. There's literally dozens of these tools, and they provide just about everything you can imagine for investigating memory.

In this post, I'll show how you can use two of these tools: VisualVM, a simple, graphical tool for exploring a <b>running</b> JVM; and the jmap/jhat combination, which allows you to dump the memory heap to disk for inspection offline.

<strong>Getting JRuby Prepared</strong>

All these tools work with any version of JRuby, but as part of JRuby 1.6 development I've been adding some enhancements. Specifically, I've made some modifications that allow Ruby objects to show up side-by-side with Java objects in memory profiles. A little explanation is in order.

In JRuby, all the core classes are represented by "native" Java classes. Object is represented by org.jruby.RubyObject, String is org.jruby.RubyString, and so on. Normally, if you extend one of the core classes, we don't actually create a new "native" class to represent it; instead, all user-created classes that extend Object simply show up as RubyObject in memory. This is still incredibly useful; you can look into RubyObject and see the metaClass field, which indicates the actual Ruby type.

Let's see what that looks like, so we know where we're starting from. We'll run a simple script that creates a custom class, instantiates and saves 10000 instances of it, and then sleeps.

<pre>~/projects/jruby ➔ cat foo_heap_example.rb 
class Foo
end

ary = []
10000.times { ary &lt;&lt; Foo.new }

puts "ready for analysis!"
sleep

~/projects/jruby ➔ jruby foo_heap_example.rb 
ready for analysis!</pre>

So we have our test subject ready to go. To use the jmap tool, we need the pid of this process. Of course we can use the usual shell tricks to get it, but the JDK comes with a nice tool for finding all JVM pids active on the system: jps

<pre>~/projects/jruby ➔ jps -l
52862 sun.tools.jps.Jps
52857 org/jruby/Main
48716 com.sun.enterprise.glassfish.bootstrap.ASMain
</pre>

From this, you can see I have three JVMs running on my system right now: jps itself; our JRuby instance; and a GlassFish server I used for testing earlier today. We're interested in the JRuby instance, pid 52857. Let's see what jmap can do with that.

<pre>~/projects/jruby ➔ jmap
Usage:
    jmap [option] &lt;pid&gt;
        (to connect to running process)
    jmap [option] &lt;executable &lt;core&gt;
        (to connect to a core file)
    jmap [option] [server_id@]&lt;remote server IP or hostname&gt;
        (to connect to remote debug server)

where &lt;option&gt; is one of:
    &lt;none&gt;               to print same info as Solaris pmap
    -heap                to print java heap summary
    -histo[:live]        to print histogram of java object heap; if the "live"
                         suboption is specified, only count live objects
    -permstat            to print permanent generation statistics
    -finalizerinfo       to print information on objects awaiting finalization
    -dump:&lt;dump-options&gt; to dump java heap in hprof binary format
                         dump-options:
                           live         dump only live objects; if not specified,
                                        all objects in the heap are dumped.
                           format=b     binary format
                           file=&lt;file&gt;  dump heap to &lt;file&gt;
                         Example: jmap -dump:live,format=b,file=heap.bin &lt;pid&gt;
    -F                   force. Use with -dump:&lt;dump-options&gt; &lt;pid&gt; or -histo
                         to force a heap dump or histogram when &lt;pid&gt; does not
                         respond. The "live" suboption is not supported
                         in this mode.
    -h | -help           to print this help message
    -J&lt;flag&gt;             to pass &lt;flag&gt; directly to the runtime system

&lt;</pre>

The simplest option here is -histo, to print out a histogram of the objects on the heap. Let's run that against our JRuby instance.

<pre>~/projects/jruby ➔ jmap -histo:live 52857

 num     #instances         #bytes  class name
----------------------------------------------
   1:         22677        3192816  &lt;constMethodKlass&gt;
   2:         22677        1816952  &lt;methodKlass&gt;
   3:         35089        1492992  &lt;symbolKlass&gt;
   4:          2860        1389352  &lt;instanceKlassKlass&gt;
   5:          2860        1193536  &lt;constantPoolKlass&gt;
   6:          2798         739264  &lt;constantPoolCacheKlass&gt;
   7:          5861         465408  [B
   8:          5399         298120  [C
   9:          3042         292032  java.lang.Class
  10:          4037         261712  [S
  11:         10002         240048  org.jruby.RubyObject
  12:          3994         179928  [[I
  13:          5474         131376  java.lang.String
  14:          1661          95912  [I
...</pre>

The resulting output is a listing of literally <span style="font-weight:bold">every</span> object in the system...not just Ruby objects even! The value of this should be apparent; not only can you start to investigate the memory overhead of code you've written, you'll also be able to investigate the memory overhead of every library and every piece of code running in the same process, right down to byte arrays (the "[B" above) and "native" Java strings ("java.lang.String" above). And so far we haven't had to do anything special to JRuby. Nice, eh?

So, back to the matter at hand: the Foo class from our example. Where is it?

Well, the answer is that it's right there; 10000 of those 10002 org.jruby.RubyObject instances are our Foo objects; the other two are probably objects constructed for JRuby runtime purposes. But obviously, there's nothing in this output that tells us how to find our Foo instances. This is what I'm remedying in JRuby 1.6.

On JRuby master, there's now a flag you can pass that will stand up a JVM class for every user-created Ruby class. Among the many benefits of doing this, we also get a more useful profile. Let's see how to use the flag (which will either be default or very easy to access by the time we release JRuby 1.6).

<pre>~/projects/jruby ➔ jruby -J-Djruby.reify.classes=true foo_heap_example.rb 
ready for analysis!</pre>

If we run jmap against this new instance, we see a more interesting result.

<pre> num     #instances         #bytes  class name
----------------------------------------------
   1:         22677        3192816  &lt;constMethodKlass&gt;
   2:         22677        1816952  &lt;methodKlass&gt;
   3:         35089        1492992  &lt;symbolKlass&gt;
   4:          2860        1389352  &lt;instanceKlassKlass&gt;
   5:          2860        1193536  &lt;constantPoolKlass&gt;
   6:          2798         739264  &lt;constantPoolCacheKlass&gt;
   7:          5863         465456  [B
   8:          5401         298208  [C
   9:          3042         292032  java.lang.Class
  10:          4037         261712  [S
<b>  11:         10000         240000  ruby.Foo</b>
  12:          3994         179928  [[I
  13:          5476         131424  java.lang.String
  14:          1661          95912  [I</pre>

A-ha! There's our Foo instances! The "reify classes" option generates a JVM class of the same name as the Ruby class, prefixed by "ruby." to separate it from other JVM classes. Now we can start to see the real power of the tools, and we're just at the beginning. Let's see what a simple Rails application looks like.

<pre>~/projects/jruby ➔ jmap -histo:live 52926 | grep " ruby."
  29:         11685         280440  ruby.TZInfo.TimezoneTransitionInfo
  97:           970          23280  ruby.Gem.Version
  98:           914          21936  ruby.Gem.Requirement
 122:           592          14208  ruby.TZInfo.TimezoneOffsetInfo
 138:           382           9168  ruby.Gem.Dependency
 159:           265           6360  ruby.Gem.Specification
 201:           142           3408  ruby.ActiveSupport.TimeZone
 205:           118           2832  ruby.TZInfo.DataTimezoneInfo
 206:           118           2832  ruby.TZInfo.DataTimezone
 273:            41            984  ruby.Gem.Platform
 383:            14            336  ruby.Mime.Type
 403:            13            312  ruby.Set
 467:             8            192  ruby.ActionController.MiddlewareStack.Middleware
 476:             8            192  ruby.ActionView.Template
 487:             7            168  ruby.ActionController.Routing.DividerSegment
 508:             6            144  ruby.TZInfo.LinkedTimezoneInfo
 523:             6            144  ruby.TZInfo.LinkedTimezone
 810:             4             96  ruby.ActionController.Routing.DynamicSegment
2291:             2             48  ruby.ActionController.Routing.Route
2292:             2             48  ruby.I18n.Config
2293:             2             48  ruby.ActiveSupport.Deprecation.DeprecatedConstantProxy
2298:             2             48  ruby.ActionController.Routing.ControllerSegment
...</pre>

This time I've opted to grep out just the "ruby." items in the histogram, and the results are pretty impressive! We can see the baffling fact that there's 970 instance of Gem::Version, using at least 23280 bytes of memory. We can see the even more depressing fact that there's 11685 <span style="font-weight:bold">live</span> instances of TZInfo::TimezoneTransitionInfo, using at least 280440 bytes.

Now that we're getting useful data, let's look at the first of our tools in more detail: jmap and jhat.

<strong>jmap and jhat</strong>

As you might guess, I do a lot of profiling in the process of developing JRuby. I've used probably a dozen different tools at different times. But the first tool I always reach for is the jmap/jhat combination.

You've seen the simple case of using jmap above, generating a histogram of the live heap. Let's take a look at an offline heap dump.

<pre>~/projects/jruby ➔ jmap -dump:live,format=b,file=heap.bin 52926
Dumping heap to /Users/headius/projects/jruby/heap.bin ...
Heap dump file created</pre>

That's how easy it is! The binary dump in heap.bin is supported by several tools: jhat (obviously), VisualVM, the Eclipse Memory Analysis Tool, and others. It's not officially a "standard" format, but it hasn't changed in a long time. Let's have a look at jhat options.

<pre>~/projects/jruby ➔ jhat
ERROR: No arguments supplied
Usage:  jhat [-stack &lt;bool&gt;] [-refs &lt;bool&gt;] [-port &lt;port&gt;] [-baseline &lt;file&gt;] [-debug &lt;int&gt;] [-version] [-h|-help] &lt;file&gt;

 -J&lt;flag&gt;          Pass &lt;flag&gt; directly to the runtime system. For
     example, -J-mx512m to use a maximum heap size of 512MB
 -stack false:     Turn off tracking object allocation call stack.
 -refs false:      Turn off tracking of references to objects
 -port &lt;port&gt;:     Set the port for the HTTP server.  Defaults to 7000
 -exclude &lt;file&gt;:  Specify a file that lists data members that should
     be excluded from the reachableFrom query.
 -baseline &lt;file&gt;: Specify a baseline object dump.  Objects in
     both heap dumps with the same ID and same class will
     be marked as not being "new".
 -debug &lt;int&gt;:     Set debug level.
       0:  No debug output
       1:  Debug hprof file parsing
       2:  Debug hprof file parsing, no server
 -version          Report version number
 -h|-help          Print this help and exit
 &lt;file&gt;            The file to read

For a dump file that contains multiple heap dumps,
you may specify which dump in the file
by appending "#&lt;number&gt;" to the file name, i.e. "foo.hprof#3".

All boolean options default to "true"</pre>

Generally you can just point jhat at a heap dump and away it goes. Occasionally if the heap is large, you may need to use the -J option to increase the maximum heap size of the JVM jhat runs in. Since we're running a Rails app, we'll bump the heap up a little bit.

<pre>~/projects/jruby ➔ jhat -J-Xmx200M heap.bin
Reading from heap.bin...
Dump file created Fri Jul 09 02:07:46 CDT 2010
Snapshot read, resolving...
Resolving 604115 objects...
[much verbose logging elided for brevity]

Chasing references, expect 120 dots........................................................................................................................
Eliminating duplicate references........................................................................................................................
Snapshot resolved.
Started HTTP server on port 7000
Server is ready.</pre>

"Server is ready"? Damn you Java people! Does everything have to be a server with you?

In this case, it's actually an incredibly useful tool. jhat starts up a small web application on port 7000 that allows you to click through the dump file. Let's see what that looks like.

<a href="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbM3T7q-lI/AAAAAAAAADg/RZpaIJpL86k/s1600/Screen+shot+2010-07-09+at+2.15.35+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 335px" src="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbM3T7q-lI/AAAAAAAAADg/RZpaIJpL86k/s400/Screen+shot+2010-07-09+at+2.15.35+AM.png" border="0" /></a>

Here's the front page of the tool. We see a listing of all JVM classes in the system. If you scroll to the bottom, there's a few more general functions.

<a href="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbNTB2gDTI/AAAAAAAAADo/LLJAmpUJiIM/s1600/Screen+shot+2010-07-09+at+2.18.13+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 177px" src="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbNTB2gDTI/AAAAAAAAADo/LLJAmpUJiIM/s400/Screen+shot+2010-07-09+at+2.18.13+AM.png" border="0" /></a>

Let's go with what we know and view the heap histogram again.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbNz_OCMFI/AAAAAAAAADw/hqej4bZxrzA/s1600/Screen+shot+2010-07-09+at+2.20.09+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 269px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbNz_OCMFI/AAAAAAAAADw/hqej4bZxrzA/s400/Screen+shot+2010-07-09+at+2.20.09+AM.png" border="0" /></a>

Here we can see that there's lots of objects taking up memory, and they're a mix of JVM-native types, JRuby implementation classes, and actual Ruby classes. In fact, here we can see our friend TZInfo::TimezoneTransitionInfo again. Let's click through.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbOgzZ653I/AAAAAAAAAD4/DUpJF2xtB3Q/s1600/Screen+shot+2010-07-09+at+2.23.16+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 328px;height: 400px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbOgzZ653I/AAAAAAAAAD4/DUpJF2xtB3Q/s400/Screen+shot+2010-07-09+at+2.23.16+AM.png" border="0" /></a>

Pretty mundane stuff so far; basically just information about the class itself. But you see at the bottom of this screenshot that we can go from here to viewing all instances of TimezoneTransitionInfo. Let's try that.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbO_ofMtbI/AAAAAAAAAEA/D0lMZsk1EgQ/s1600/Screen+shot+2010-07-09+at+2.25.20+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 380px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbO_ofMtbI/AAAAAAAAAEA/D0lMZsk1EgQ/s400/Screen+shot+2010-07-09+at+2.25.20+AM.png" border="0" /></a>

Ahh, that's more like it! Now we can see that there's a heck of a lot of these things floating around. Let's investigate a bit more and click through the first instance.

<a href="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbPW_EcOlI/AAAAAAAAAEI/MujaHdUTDrI/s1600/Screen+shot+2010-07-09+at+2.26.53+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 380px" src="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbPW_EcOlI/AAAAAAAAAEI/MujaHdUTDrI/s400/Screen+shot+2010-07-09+at+2.26.53+AM.png" border="0" /></a>

Now this is some cool stuff!

We can see that the JVM class generated for TimezoneTransitionInfo has three fields: metaClass, which points at the Ruby Class object; varTable, which is an array of Object references used for instance variables and other "internal" variables; and a flags field containing runtime flags for the object, like whether it's frozen, tainted, and so on. We can see that this object has no special flags set, and we can dig deeper into those fields if we like. We'll skip that today.

Moving further down, we see a few more amazing links. First, there's a list of all references to this object. Ahh, now we can start to investigate why they're staying in memory, even though we're not using them. We can even have jhat show us the full chains of references keeping these objects alive; a series of objects leading all the way back to one "rooted" by a thread or by global JVM state. And we can explore the other direction as well, walking all objects reachable from this one.

This is only a small part of what you can do with jmap and jhat, and they're so simple to use it feels almost criminal. But what if we want to inspect an application while it's running? Dumping heaps and analyzing them offline can tell you much of the story, but sometimes you just want to see the objects coming and going yourself. Let's move on to VisualVM.

<strong>VisualVM</strong>

VisualVM spawned out of the NetBeans profiling tools. One of the biggest complaints about the JVMs of old were that all the built-in tooling seemed to be designed for JVM engineers alone. Because Sun had the foresight to build and own their own IDE and related modules, it eventually became a natural fit to pull out the profiling tools for use by everyone. And so VisualVM was born.

On most systems with Java 6 installed, you should have a "jvisualvm" command. Let's run it now.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbRjfKhrtI/AAAAAAAAAEQ/_vOOoMds6ec/s1600/Screen+shot+2010-07-09+at+2.36.21+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 263px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbRjfKhrtI/AAAAAAAAAEQ/_vOOoMds6ec/s400/Screen+shot+2010-07-09+at+2.36.21+AM.png" border="0" /></a>

When you start up VisualVM, you're presented with a list of running JVMs, similar to using the 'jps' command. You can also connect to remote machines, browse offline heap and core dump files, and look through memory and CPU profiling snapshots from previous runs. Today, we'll just open up our running Rails app and see what we can see.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbSJZbUp9I/AAAAAAAAAEY/mGwMpr2mMrA/s1600/Screen+shot+2010-07-09+at+2.38.59+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 229px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbSJZbUp9I/AAAAAAAAAEY/mGwMpr2mMrA/s400/Screen+shot+2010-07-09+at+2.38.59+AM.png" border="0" /></a>

VisualVM connects to the running process and brings up a basic information pane with process information, JVM information, and so on. We're interested in monitoring heap usage, so let's move to the "Monitor" tab.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbSmuGkczI/AAAAAAAAAEg/jQ6lizETlk4/s1600/Screen+shot+2010-07-09+at+2.40.55+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 227px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbSmuGkczI/AAAAAAAAAEg/jQ6lizETlk4/s400/Screen+shot+2010-07-09+at+2.40.55+AM.png" border="0" /></a>

Already we're getting some useful information. This view shows CPU usage (currently zero, since it's an idle Rails app), Heap usage over time, and the number of JVM classes and threads that are active. We can trigger a full GC, if we'd like to tidy things up before we start poking around. But most importantly, we can do the jmap/jhat dance in one step, by clicking the Heap Dump button. Tantalizing, isn't it?

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbTPcZVqoI/AAAAAAAAAEo/w4Bt_TiP7-U/s1600/Screen+shot+2010-07-09+at+2.43.36+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 228px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbTPcZVqoI/AAAAAAAAAEo/w4Bt_TiP7-U/s400/Screen+shot+2010-07-09+at+2.43.36+AM.png" border="0" /></a>

Initially, we see a basic summary of the heap: total size, number of classes and GC roots, and so on. We're looking for our friend TimezoneTransitionInfo, so let's look for it in the "Classes" pane.

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbUOd8sliI/AAAAAAAAAE4/peOO8-U1SVw/s1600/Screen+shot+2010-07-09+at+2.47.52+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 196px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDbUOd8sliI/AAAAAAAAAE4/peOO8-U1SVw/s400/Screen+shot+2010-07-09+at+2.47.52+AM.png" border="0" /></a>

Ahh, there it is, just a little ways down the list. The counts are as we expect, so let's double-click and dig a bit deeper.

<a href="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbUcM5s1vI/AAAAAAAAAFA/HMDT88mRIBs/s1600/Screen+shot+2010-07-09+at+2.48.41+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 176px" src="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbUcM5s1vI/AAAAAAAAAFA/HMDT88mRIBs/s400/Screen+shot+2010-07-09+at+2.48.41+AM.png" border="0" /></a>

Here we have a lot of the same information about object instances that we did with jhat, but presented in a much richer format. Almost everything is active; you can jump around the heap and do analysis that would take a lot of manual work very easily. Let's try another tool: the Retained Size calculator.

Because our JVM tools see all objects equally, the reported size for a Ruby object on the heap is only part of the story. There's also the variable table, the object's instance variables, and objects they reference to consider. Let's jump to a different object now, Gem::Version.

We don't want to have to scroll through the list of classes to find ruby.Gem.Version, so let's make use of the Object Query Language console. With the OQL console, you can write SQL-like queries to retrieve listings of objects in the heap. We'll search for all instances of ruby.Gem.Version.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbVuq380RI/AAAAAAAAAFI/aXCwt3BRsWU/s1600/Screen+shot+2010-07-09+at+2.53.44+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 261px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbVuq380RI/AAAAAAAAAFI/aXCwt3BRsWU/s400/Screen+shot+2010-07-09+at+2.53.44+AM.png" border="0" /></a>

The query runs and we get a listing of Gem::Version objects. Let's dig deeper and see how much retained memory each Version object is keeping alive.

<a href="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbWHvnwVEI/AAAAAAAAAFQ/q4X3F07ZvTc/s1600/Screen+shot+2010-07-09+at+2.55.53+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 162px" src="http://3.bp.blogspot.com/_HWobMsJuRHc/TDbWHvnwVEI/AAAAAAAAAFQ/q4X3F07ZvTc/s400/Screen+shot+2010-07-09+at+2.55.53+AM.png" border="0" /></a>

Clicking on the "Compute Retained Sizes" link in the "Instances" pane prompts us with this dialog. We're tough...we can take it.

<a href="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbWaK5bGOI/AAAAAAAAAFY/AnidY4PVb9M/s1600/Screen+shot+2010-07-09+at+2.57.08+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 348px;height: 106px" src="http://4.bp.blogspot.com/_HWobMsJuRHc/TDbWaK5bGOI/AAAAAAAAAFY/AnidY4PVb9M/s400/Screen+shot+2010-07-09+at+2.57.08+AM.png" border="0" /></a>

Reticulating splines...

<a href="http://2.bp.blogspot.com/_HWobMsJuRHc/TDcBUC7ML_I/AAAAAAAAAFo/MYGEKCqzU2U/s1600/Screen+shot+2010-07-09+at+2.59.20+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 190px" src="http://2.bp.blogspot.com/_HWobMsJuRHc/TDcBUC7ML_I/AAAAAAAAAFo/MYGEKCqzU2U/s400/Screen+shot+2010-07-09+at+2.59.20+AM.png" border="0" /></a>

So it looks like each of the Version objects take from 125 to 190 bytes for a total of 19400 bytes, most of which is from the variable table. What's in there?

<a href="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbXNQMb4nI/AAAAAAAAAFg/yWfkrni0snI/s1600/Screen+shot+2010-07-09+at+3.00.31+AM.png"><img style="margin:0 10px 10px 0;cursor:pointer;cursor:hand;width: 400px;height: 77px" src="http://1.bp.blogspot.com/_HWobMsJuRHc/TDbXNQMb4nI/AAAAAAAAAFg/yWfkrni0snI/s400/Screen+shot+2010-07-09+at+3.00.31+AM.png" border="0" /></a>

Ahh...looks like there's a String and an Array. And of course we can poke around the heap ad infinatum, into and out of "native" JRuby and JVM classes, and truly get a complete picture of what our running applications look like. Now you're playing with power.

<strong>Your Turn</strong>

This is obviously only the tip of the iceberg. Tools like Eclipse Memory Analysis Tool include features for detecting leaks; VisualVM and NetBeans both allow you to turn on allocation tracing, to show <span style="font-weight:bold">where</span> in your code all those objects are being created. There's tools for monitoring live GC behavior, and many of these tools even allow you to dig into a running heap and <span style="font-weight:bold">modify live objects</span>. If you can dream it, there's a tool that can do it. And you get all that for free by using JRuby.

If you'd like to play with this, it all works with JRuby 1.5.1 but you won't get the nice JVM classes for Ruby classes. For that, you can pull and build JRuby master, download a 1.6.0.dev snapshot, or just wait for JRuby 1.6. And if you do play with these or other tools, I hope you'll let us know and blog about your experience!

In the future, I'll try to show some of the other tools plus some of the CPU profiling capabilities they bring to the table. For now, rest assured that if you're using JRuby, you really do have the best tools available to you.

<div class="note">

This article was originally <a href="http://blog.headius.com/2010/07/browsing-memory-jruby-way.html">published</a> on Charles Nutter's blog Headius.

</div><p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/8X9Y0N6XsCM" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/monitoring-memory-with-jruby-part-1-jhat-and-visualvm/feed/</wfw:commentRss>
		<slash:comments>7</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/monitoring-memory-with-jruby-part-1-jhat-and-visualvm/</feedburner:origLink></item>
		<item>
		<title>A Gentle Introduction to Isolation Levels</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/k4pRZIC80J4/</link>
		<comments>http://www.engineyard.com/blog/2010/a-gentle-introduction-to-isolation-levels/#comments</comments>
		<pubDate>Wed, 21 Jul 2010 10:16:23 +0000</pubDate>
		<dc:creator>Xavier Shay</dc:creator>
				<category><![CDATA[Events]]></category>
		<category><![CDATA[Partners]]></category>
		<category><![CDATA[Technology]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4145</guid>
		<description><![CDATA[<div class="note">

Hello all,

Our latest post is from a special guest and Engine Yard partner Xavier Shay. He'll be running a pair of training sessions on "using your database to make your Ruby on Rails applications rock solid" at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com">www.dbisyourfriend.com</a> for course and registration details.

</div>
Bob opens a database transaction and selects everything from the books table. Tom comes along and adds a new book, then Bob, in his same transaction, repeats his same query for all the books. Does Bob see the new book that Tom added?

The answer is that you get to choose! It's important to understand what your choices are (and what choice your preferred database makes for you) so that you can ensure your code executes in a way that you intend.

The SQL standard specifies levels for how "isolated" transactions running at the same time are, all the way from being able to see uncommitted changes (not isolated) to effectively running the transactions in serial (full isolation). Academically there are eight levels of isolation, but for most purposes you only need to worry about the four defined by the standard. MySQL implements all four, PostgreSQL only two. You can specify a global isolation level for your database, but also override it for individual transactions.

The easiest to understand are the extreme levels: no isolation and total isolation. The first of these is known as *read uncommitted*, and it allows Bob to read the new book that Tom is adding _even before Tom has committed his changes_. As you can imagine this level is mostly useless, however it can very occasionally be handy in some reporting situations.

At the other end of the spectrum is full isolation, known in the spec as **serializable**. Bob will never see the new book that Tom is adding until he starts a new transaction. The database Bob sees is consistent---within the one transaction, the same query will always return the same result. At first glance this level seems like a great option but there's a lot of overhead involved, it drastically reduces the amount of concurrency you can achieve, and for most purposes the serializable level is overkill.

There are two isolation levels in between read uncommitted and serializable, they are *read committed* and *read repeatable*, and this is where it gets interesting. Read committed is the default isolation level in PostgreSQL and Oracle, and is one step up from read uncommitted. It is the most "common sense" level: Bob will not see any changes made by Tom until Tom commits them.

MySQL defaults to *read repeatable*. In this level, Bob will not see any _updates_ Tom commits, but will see any _inserts_. Say in Bob's first select he sees one book titled "The Odessey". Tom then fixes the spelling mistake to "The Odyssey", and also add Homer's other epic poem "The Iliad". When Bob selects all books again, he will see "The Odessey" (old title, no spelling fix) and "The Iliad" (the inserted book).

To summarize, the four levels from least isolated to most isolated are: *read uncommitted*, *read committed*, *repeatable read*, and *serializable*. They define what types of changes made by Tom that Bob will be able to see within a single transaction.

h2. In Practice

Say the books we are selecting are ordered based on an arbitrary position column (they're on our bookshelf, for instance). Assume read committed isolation level.
<pre>Title       | Position
----------------------
The Odyssey | 1
The Iliad   | 2
The Nostoi  | 3</pre>
Bob wants to move "The Odyssey" to the bottom position. To do this, he needs to update its position to the bottom of the list (position 4), then subtract 1 from all positions. At the same time, Tom is adding a new book "The Cypria". Working this through:

# Bob checks the bottom position, finds it to be 4
# Tom inserts "The Cypria" in the bottom position of 4
# Bob updates the position of "The Odyssey" to 4
# Bob subtracts 1 from all positions, and since he is using *read committed* he will "see" and update the newly inserted book.
# Both "The Odyssey" and "The Cypria" have a position of 3
<pre>Title       | Position
----------------------
The Iliad   | 1
The Nostoi  | 2
The Odyssey | 3
The Cypria  | 3</pre>
If Bob had used the *serializable* level, the list would have remained consistent for his entire transaction, so his update would not have affected "The Cypria" that Tom inserted, and so would not have updated its position from 4 to 3. (In practice the way databases normally handle this is to actually abort one of the transactions with an error.)

For those using Rails, you may have recognized the above scenario as a typical @acts_as_list@ scenario, and you'd be correct. In a default configuration, the @acts_as_list@ plugin makes the same mistake outlined above, and will leave you with inconsistent data. The quickest fix is to wrap all list operations in a serializable transaction.
<pre>Book.transaction do
  Book.connection.execute("SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE")
  @book = Book.find_by_name("The Odyssey")
  @book.move_to_bottom
end</pre>
(It may have occurred to you that some locking or a unique index on position could avoid the exact scenario above, but that breaks @acts_as_list@ and fails to address some other edge cases left as an exercise for the reader. The main point for the purpose of this article is to understand why it breaks under read committed, but works under serializable.)

As a general rule, read committed is a sensible default. It's easy to reason about, fast, and forces you to be explicit about your locking strategy. Jump up to serializable when needed, usually when dealing with ranges. MySQL's repeatable read default can be confusing and deadlock in unintuitive ways, as such it is not recommended.

This has been a very brief introduction to the four standard SQL isolation levels: read uncommitted, read committed, repeatable read, and serializable. Hopefully it has helped you get your head around them. I'll be going into much more detail with practical hands on exercises in my training days at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com/">www.dbisyourfriend.com</a> for course and registration details.<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[<div class="note">

Hello all,

Our latest post is from a special guest and Engine Yard partner Xavier Shay. He'll be running a pair of training sessions on "using your database to make your Ruby on Rails applications rock solid" at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com">www.dbisyourfriend.com</a> for course and registration details.

</div>
Bob opens a database transaction and selects everything from the books table. Tom comes along and adds a new book, then Bob, in his same transaction, repeats his same query for all the books. Does Bob see the new book that Tom added?

The answer is that you get to choose! It's important to understand what your choices are (and what choice your preferred database makes for you) so that you can ensure your code executes in a way that you intend.

The SQL standard specifies levels for how "isolated" transactions running at the same time are, all the way from being able to see uncommitted changes (not isolated) to effectively running the transactions in serial (full isolation). Academically there are eight levels of isolation, but for most purposes you only need to worry about the four defined by the standard. MySQL implements all four, PostgreSQL only two. You can specify a global isolation level for your database, but also override it for individual transactions.

The easiest to understand are the extreme levels: no isolation and total isolation. The first of these is known as *read uncommitted*, and it allows Bob to read the new book that Tom is adding _even before Tom has committed his changes_. As you can imagine this level is mostly useless, however it can very occasionally be handy in some reporting situations.

At the other end of the spectrum is full isolation, known in the spec as **serializable**. Bob will never see the new book that Tom is adding until he starts a new transaction. The database Bob sees is consistent---within the one transaction, the same query will always return the same result. At first glance this level seems like a great option but there's a lot of overhead involved, it drastically reduces the amount of concurrency you can achieve, and for most purposes the serializable level is overkill.

There are two isolation levels in between read uncommitted and serializable, they are *read committed* and *read repeatable*, and this is where it gets interesting. Read committed is the default isolation level in PostgreSQL and Oracle, and is one step up from read uncommitted. It is the most "common sense" level: Bob will not see any changes made by Tom until Tom commits them.

MySQL defaults to *read repeatable*. In this level, Bob will not see any _updates_ Tom commits, but will see any _inserts_. Say in Bob's first select he sees one book titled "The Odessey". Tom then fixes the spelling mistake to "The Odyssey", and also add Homer's other epic poem "The Iliad". When Bob selects all books again, he will see "The Odessey" (old title, no spelling fix) and "The Iliad" (the inserted book).

To summarize, the four levels from least isolated to most isolated are: *read uncommitted*, *read committed*, *repeatable read*, and *serializable*. They define what types of changes made by Tom that Bob will be able to see within a single transaction.

h2. In Practice

Say the books we are selecting are ordered based on an arbitrary position column (they're on our bookshelf, for instance). Assume read committed isolation level.
<pre>Title       | Position
----------------------
The Odyssey | 1
The Iliad   | 2
The Nostoi  | 3</pre>
Bob wants to move "The Odyssey" to the bottom position. To do this, he needs to update its position to the bottom of the list (position 4), then subtract 1 from all positions. At the same time, Tom is adding a new book "The Cypria". Working this through:

# Bob checks the bottom position, finds it to be 4
# Tom inserts "The Cypria" in the bottom position of 4
# Bob updates the position of "The Odyssey" to 4
# Bob subtracts 1 from all positions, and since he is using *read committed* he will "see" and update the newly inserted book.
# Both "The Odyssey" and "The Cypria" have a position of 3
<pre>Title       | Position
----------------------
The Iliad   | 1
The Nostoi  | 2
The Odyssey | 3
The Cypria  | 3</pre>
If Bob had used the *serializable* level, the list would have remained consistent for his entire transaction, so his update would not have affected "The Cypria" that Tom inserted, and so would not have updated its position from 4 to 3. (In practice the way databases normally handle this is to actually abort one of the transactions with an error.)

For those using Rails, you may have recognized the above scenario as a typical @acts_as_list@ scenario, and you'd be correct. In a default configuration, the @acts_as_list@ plugin makes the same mistake outlined above, and will leave you with inconsistent data. The quickest fix is to wrap all list operations in a serializable transaction.
<pre>Book.transaction do
  Book.connection.execute("SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE")
  @book = Book.find_by_name("The Odyssey")
  @book.move_to_bottom
end</pre>
(It may have occurred to you that some locking or a unique index on position could avoid the exact scenario above, but that breaks @acts_as_list@ and fails to address some other edge cases left as an exercise for the reader. The main point for the purpose of this article is to understand why it breaks under read committed, but works under serializable.)

As a general rule, read committed is a sensible default. It's easy to reason about, fast, and forces you to be explicit about your locking strategy. Jump up to serializable when needed, usually when dealing with ranges. MySQL's repeatable read default can be confusing and deadlock in unintuitive ways, as such it is not recommended.

This has been a very brief introduction to the four standard SQL isolation levels: read uncommitted, read committed, repeatable read, and serializable. Hopefully it has helped you get your head around them. I'll be going into much more detail with practical hands on exercises in my training days at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com/">www.dbisyourfriend.com</a> for course and registration details.<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/k4pRZIC80J4" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/a-gentle-introduction-to-isolation-levels/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/a-gentle-introduction-to-isolation-levels/</feedburner:origLink></item>
		<item>
		<title>Engine Yard AppCloud CLI</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/NikTyNB4nFE/</link>
		<comments>http://www.engineyard.com/blog/2010/engine-yard-appcloud-cli/#comments</comments>
		<pubDate>Tue, 20 Jul 2010 09:00:32 +0000</pubDate>
		<dc:creator>Corey Donohoe</dc:creator>
				<category><![CDATA[Cloud]]></category>
		<category><![CDATA[News]]></category>
		<category><![CDATA[Technology]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4104</guid>
		<description><![CDATA[At Engine Yard we've been helping developers ship Ruby applications for almost four years. Our approach to deployment has changed a few times but at its core our focus has always been helping people deploy and scale Ruby on Rails applications on virtualized hardware. Almost two years ago, we started experimenting with <a href="http://aws.amazon.com/">Amazon's AWS</a> service and realized that people wanted more of a self service setup. For the first time, we decided to take a stab at providing the same kind of service on other people's hardware instead of our own. This has grown into our <a href="http://www.engineyard.com/products/appcloud">AppCloud</a> offering. Today, we're happy to announce an awesome new addition to AppCloud that enables developers to ship code faster, easier, and straight from the command line.

<h2>A Bit of EY History</h2>

In our early days, we provided our customers with customized <a href="http://www.capify.org/index.php/Capistrano">capistrano</a> recipes to deploy their ruby applications to our clusters. A problem quickly arose because we also needed to help them maintain this recipe as we helped them scale their applications. We learned that keeping our customers' capistrano recipes up to date was a truly painful exercise, so when we built AppCloud we went with a more centralized approach.

<h2>Early AppCloud Direction</h2>

We thought that solving the problem of keeping most of the deployment related information in sync was so painful that we built a web based deployment strategy. It wasn't the worst idea ever, but the disconnect between leaving your shell and going to a web browser isn't really what developers want. In addition, we were so excited about the idempotency that <a href="http://opscode.com/chef/">chef</a> offered at a configuration level that we felt it was imperative to "verify" the state of the system with a chef run each time we shipped code. This made pushing code slower than necessary and occasionally created panic situations if the chef run failed for some strange reason. People could still use capistrano with AppCloud, but it required them to re-download their deployment recipes every time their environment changed. There also wasn't an easy way to maintain customizations if customers kept having to re-download the capistrano recipe. Over and over again, we kept hearing the same complaints from customers. Customers liked the provisioning flexibility on AWS but shipping code on AppCloud was suboptimal. A few months ago, we finally admitted that our intentions were correct but we hadn't been doing the best things for our customers. We started working on a way to help our customers ship code more effectively.

<h2>Customer Feedback is Awesome</h2>

We accepted that idempotency is extremely important when it comes to system configuration but that doesn't mean you need to re-run chef each time you ship application code. We realized that people want to see their code running on their servers ASAP. Finally, we embraced the idea that people want to ship code with a command line tool similar to the way most people use rake to run their test suite. We're happy to introduce a more pleasant way to ship code to AppCloud, the <a href="http://rubygems.org/gems/engineyard">engineyard</a> gem.

<h2>A Better Workflow</h2>

The old way of deploying with chef works, but it forces you to reconfigure your servers every single time you deploy. The workflow looked like this:

* Boot some instances (provision, configure, deploy)
* Ship code (run configuration, deploy code)
* Ship code (run configuration, deploy code)
* Ship code (run configuration, deploy code)
* Tweak system configuration (configure)
* Ship code (configure, deploy)
* Ship code (configure, deploy)
* ...

With the Engine Yard CLI, you can deploy without verifying your system's configuration, so it's quite a bit faster to ship new code.

The new workflow looks like this:

* Boot some instances (provision, configure)
* Ship code (deploy)
* Ship code (deploy)
* Ship code (deploy)
* Tweak system configuration (configure)
* Ship code (deploy)
* Ship code (deploy)
* ...

We really think our customers are going to prefer this approach because, let's face it, we ship code way more often than we reconfigure systems.

<h2>Get Started</h2>

* <code>gem install engineyard</code>
* <code>cd ~/myapp</code>
* <code>ey deploy</code>

One of the things we like most about the new CLI is that it shows you, in real time, what's going on with your deploy. If something goes wrong, you don't have to scroll through a huge log in your browser; the error messages are right there in your terminal. When it succeeds, the process exits, so you know immediately that it's done. No more staring at the dashboard waiting for a spinning dot to turn into a green one. How about <code>ey deploy &amp;&amp; mpg123 woohoo.mp3 || mpg123 sad-trombone.mp3</code>? That's immediate, unmistakable, annoying, audible feedback. You can't get that from a green dot.

<h2>Other Great Features</h2>

* <a href="http://docs.engineyard.com/cli-user-guide/home#gem-bundling">Full Bundler Support</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#maintenance-pages">Maintenance Pages</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#deploy-hooks">Deploy Hooks for Extra Configuration</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#advanced-customization">Advanced Deployment Customization</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#ey-rebuild">Ensure System Configuration is Current</a>

You can do a lot more than just deploy with the engineyard gem. Check out the <a href="http://docs.engineyard.com/cli-user-guide/home">docs</a> and the <a href="http://docs.engineyard.com/cli-user-guide/faq">FAQ</a>.

Go forth and ship!
<img src="http://img.skitch.com/20100714-d6q52xajfh4cimxr3888yb77ru.jpg" alt="ship it squirrel" style="width:97%;" /><p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[At Engine Yard we've been helping developers ship Ruby applications for almost four years. Our approach to deployment has changed a few times but at its core our focus has always been helping people deploy and scale Ruby on Rails applications on virtualized hardware. Almost two years ago, we started experimenting with <a href="http://aws.amazon.com/">Amazon's AWS</a> service and realized that people wanted more of a self service setup. For the first time, we decided to take a stab at providing the same kind of service on other people's hardware instead of our own. This has grown into our <a href="http://www.engineyard.com/products/appcloud">AppCloud</a> offering. Today, we're happy to announce an awesome new addition to AppCloud that enables developers to ship code faster, easier, and straight from the command line.

<h2>A Bit of EY History</h2>

In our early days, we provided our customers with customized <a href="http://www.capify.org/index.php/Capistrano">capistrano</a> recipes to deploy their ruby applications to our clusters. A problem quickly arose because we also needed to help them maintain this recipe as we helped them scale their applications. We learned that keeping our customers' capistrano recipes up to date was a truly painful exercise, so when we built AppCloud we went with a more centralized approach.

<h2>Early AppCloud Direction</h2>

We thought that solving the problem of keeping most of the deployment related information in sync was so painful that we built a web based deployment strategy. It wasn't the worst idea ever, but the disconnect between leaving your shell and going to a web browser isn't really what developers want. In addition, we were so excited about the idempotency that <a href="http://opscode.com/chef/">chef</a> offered at a configuration level that we felt it was imperative to "verify" the state of the system with a chef run each time we shipped code. This made pushing code slower than necessary and occasionally created panic situations if the chef run failed for some strange reason. People could still use capistrano with AppCloud, but it required them to re-download their deployment recipes every time their environment changed. There also wasn't an easy way to maintain customizations if customers kept having to re-download the capistrano recipe. Over and over again, we kept hearing the same complaints from customers. Customers liked the provisioning flexibility on AWS but shipping code on AppCloud was suboptimal. A few months ago, we finally admitted that our intentions were correct but we hadn't been doing the best things for our customers. We started working on a way to help our customers ship code more effectively.

<h2>Customer Feedback is Awesome</h2>

We accepted that idempotency is extremely important when it comes to system configuration but that doesn't mean you need to re-run chef each time you ship application code. We realized that people want to see their code running on their servers ASAP. Finally, we embraced the idea that people want to ship code with a command line tool similar to the way most people use rake to run their test suite. We're happy to introduce a more pleasant way to ship code to AppCloud, the <a href="http://rubygems.org/gems/engineyard">engineyard</a> gem.

<h2>A Better Workflow</h2>

The old way of deploying with chef works, but it forces you to reconfigure your servers every single time you deploy. The workflow looked like this:

* Boot some instances (provision, configure, deploy)
* Ship code (run configuration, deploy code)
* Ship code (run configuration, deploy code)
* Ship code (run configuration, deploy code)
* Tweak system configuration (configure)
* Ship code (configure, deploy)
* Ship code (configure, deploy)
* ...

With the Engine Yard CLI, you can deploy without verifying your system's configuration, so it's quite a bit faster to ship new code.

The new workflow looks like this:

* Boot some instances (provision, configure)
* Ship code (deploy)
* Ship code (deploy)
* Ship code (deploy)
* Tweak system configuration (configure)
* Ship code (deploy)
* Ship code (deploy)
* ...

We really think our customers are going to prefer this approach because, let's face it, we ship code way more often than we reconfigure systems.

<h2>Get Started</h2>

* <code>gem install engineyard</code>
* <code>cd ~/myapp</code>
* <code>ey deploy</code>

One of the things we like most about the new CLI is that it shows you, in real time, what's going on with your deploy. If something goes wrong, you don't have to scroll through a huge log in your browser; the error messages are right there in your terminal. When it succeeds, the process exits, so you know immediately that it's done. No more staring at the dashboard waiting for a spinning dot to turn into a green one. How about <code>ey deploy &amp;&amp; mpg123 woohoo.mp3 || mpg123 sad-trombone.mp3</code>? That's immediate, unmistakable, annoying, audible feedback. You can't get that from a green dot.

<h2>Other Great Features</h2>

* <a href="http://docs.engineyard.com/cli-user-guide/home#gem-bundling">Full Bundler Support</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#maintenance-pages">Maintenance Pages</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#deploy-hooks">Deploy Hooks for Extra Configuration</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#advanced-customization">Advanced Deployment Customization</a>
* <a href="http://docs.engineyard.com/cli-user-guide/home#ey-rebuild">Ensure System Configuration is Current</a>

You can do a lot more than just deploy with the engineyard gem. Check out the <a href="http://docs.engineyard.com/cli-user-guide/home">docs</a> and the <a href="http://docs.engineyard.com/cli-user-guide/faq">FAQ</a>.

Go forth and ship!
<img src="http://img.skitch.com/20100714-d6q52xajfh4cimxr3888yb77ru.jpg" alt="ship it squirrel" style="width:97%;" /><p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/NikTyNB4nFE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/engine-yard-appcloud-cli/feed/</wfw:commentRss>
		<slash:comments>6</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/engine-yard-appcloud-cli/</feedburner:origLink></item>
		<item>
		<title>Concurrency and the AASM Gem</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/uKVzvBbkquQ/</link>
		<comments>http://www.engineyard.com/blog/2010/concurrency-and-the-aasm-gem/#comments</comments>
		<pubDate>Mon, 19 Jul 2010 09:13:28 +0000</pubDate>
		<dc:creator>Xavier Shay</dc:creator>
				<category><![CDATA[Events]]></category>
		<category><![CDATA[Partners]]></category>
		<category><![CDATA[Technology]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=4084</guid>
		<description><![CDATA[<div class="note">

Hello all,

The Engine Yard blog is back in action after taking a break following JRuby 1.5, Rubinius 1.0, the introduction of xCloud, RailsConf and (very soon) Rails 3.

Our latest post is from a special guest and Engine Yard partner Xavier Shay. He’ll be running training sessions on ‘using your database to make your Ruby on Rails applications rock solid’ at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com">www.dbisyourfriend.com</a> for course and registration details.

</div>
Your Ruby on Rails code is run concurrently, whether you like it or not.

Concurrency is a staple term when talking about hosting infrastructure, but it is too often brushed aside when discussing actual code bases. This attitude is especially prevalent in the Ruby on Rails community: I can't name one popular plugin that gets it right. In this post I will address problems with the typical state machine pattern used by Rails applications, and show you how to address them and make your code bullet-proof.
<h2>The Problem</h2>
Consider the following controller action, backing a big green "ship button" next to a purchase order:
<pre>def ship
  @order = PurchaseOrder.find(params[:id])
  @order.ship!
  redirect_to order_path(@order)
end</pre>
Imagine two users both press the "ship" button at the same time. (Or as often happen, one user double clicks the button.) The two requests will hit the load balancer and be distributed out to run on different processes. What happens when the above code---typical of many rails applications---is run in two different places at the same time?

Both processes will load the order from the database at line 2. At line 3 when the <code>ship!</code> method is run, both processes will check the attributes of the order and see that it is currently unshipped. As a result, both execute shipping code, which may include sending emails, updating caches, and transferring funds. As a result, the customer will receive duplicate emails, or worse, be charged twice. All versions of acts_as_state_machine (AASM) exhibit this behavior.
<h2>The Fix</h2>
Any time you read data from the database with the intention of making changes based on that data ("ship the order if it isn't already shipped") you must obtain an exclusive database lock on the row (or employ some form of optimistic locking strategy when updating, a topic not covered in this post). The database will block any processes trying to access that row until the session that obtained the lock concludes its transaction (COMMIT or ROLLBACK). ActiveRecord allows us to do this using the <code>:lock</code> flag:
<pre>def ship
  PurchaseOrder.transaction do
    @order = PurchaseOrder.find(params[:id], :lock =&gt; true)
    @order.ship!
  end
  redirect_to order_path(@order)
end</pre>
Working through the above example again, the first process to execute the <code>find</code> will issue the following SQL:
<pre>SELECT * FROM purchase_orders WHERE id = 1 FOR UPDATE</pre>
Notice the "FOR UPDATE" on the end; this instructs the database to place an exclusive lock on the row. When the second process executes the <code>find</code> and submits the above SQL to the database, the database will wait for the first transaction to complete (after calling <code>ship!</code> and updating the state of the order) before reading and returning the row. The returned row will now have a state of "shipped", and as such the <code>ship!</code> method will effectively be a noop (no operation). The customer will only receive one email.

It is also possible using ActiveRecord to lock an object that has been already loaded from the database:
<pre>def ship
  @order = PurchaseOrder.find(params[:id])
  PurchaseOrder.transaction do
    @order.lock!
    @order.ship!
  end
  redirect_to order_path(@order)
end</pre>
This is equivalent to a <code>reload</code>, but adds the "FOR UPDATE" suffix necessary for a database lock. It is an extra SQL statement (the order is selected twice), but is an easier pattern to abstract away.
<pre>class Order &lt; ActiveRecord::Base
  # This method is usually provided by AASM
  def ship!
    return if shipped?
  # Important emails and computations
  end
  def ship_with_lock!
    transaction do
      lock!
      ship_without_lock!
    end
  end
  alias_method_chain :ship!, :lock
end</pre>
With <code>alias_method_chain</code>, we can continue to use exactly the same controller code we started with (just a plain call to <code>ship!</code>), and locking is handled for us in the background.

Lost updates or duplicate execution won't be a problem for every website, but if you are starting to worry about the concurrency of your hosting infrastructure, it's worth having a look over your code too.

If you’d like to join me for some hands-on work with this, I’ll be running classes at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com">www.dbisyourfriend.com</a> for course and registration details.<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p>]]></description>
			<content:encoded><![CDATA[<div class="note">

Hello all,

The Engine Yard blog is back in action after taking a break following JRuby 1.5, Rubinius 1.0, the introduction of xCloud, RailsConf and (very soon) Rails 3.

Our latest post is from a special guest and Engine Yard partner Xavier Shay. He’ll be running training sessions on ‘using your database to make your Ruby on Rails applications rock solid’ at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com">www.dbisyourfriend.com</a> for course and registration details.

</div>
Your Ruby on Rails code is run concurrently, whether you like it or not.

Concurrency is a staple term when talking about hosting infrastructure, but it is too often brushed aside when discussing actual code bases. This attitude is especially prevalent in the Ruby on Rails community: I can't name one popular plugin that gets it right. In this post I will address problems with the typical state machine pattern used by Rails applications, and show you how to address them and make your code bullet-proof.
<h2>The Problem</h2>
Consider the following controller action, backing a big green "ship button" next to a purchase order:
<pre>def ship
  @order = PurchaseOrder.find(params[:id])
  @order.ship!
  redirect_to order_path(@order)
end</pre>
Imagine two users both press the "ship" button at the same time. (Or as often happen, one user double clicks the button.) The two requests will hit the load balancer and be distributed out to run on different processes. What happens when the above code---typical of many rails applications---is run in two different places at the same time?

Both processes will load the order from the database at line 2. At line 3 when the <code>ship!</code> method is run, both processes will check the attributes of the order and see that it is currently unshipped. As a result, both execute shipping code, which may include sending emails, updating caches, and transferring funds. As a result, the customer will receive duplicate emails, or worse, be charged twice. All versions of acts_as_state_machine (AASM) exhibit this behavior.
<h2>The Fix</h2>
Any time you read data from the database with the intention of making changes based on that data ("ship the order if it isn't already shipped") you must obtain an exclusive database lock on the row (or employ some form of optimistic locking strategy when updating, a topic not covered in this post). The database will block any processes trying to access that row until the session that obtained the lock concludes its transaction (COMMIT or ROLLBACK). ActiveRecord allows us to do this using the <code>:lock</code> flag:
<pre>def ship
  PurchaseOrder.transaction do
    @order = PurchaseOrder.find(params[:id], :lock =&gt; true)
    @order.ship!
  end
  redirect_to order_path(@order)
end</pre>
Working through the above example again, the first process to execute the <code>find</code> will issue the following SQL:
<pre>SELECT * FROM purchase_orders WHERE id = 1 FOR UPDATE</pre>
Notice the "FOR UPDATE" on the end; this instructs the database to place an exclusive lock on the row. When the second process executes the <code>find</code> and submits the above SQL to the database, the database will wait for the first transaction to complete (after calling <code>ship!</code> and updating the state of the order) before reading and returning the row. The returned row will now have a state of "shipped", and as such the <code>ship!</code> method will effectively be a noop (no operation). The customer will only receive one email.

It is also possible using ActiveRecord to lock an object that has been already loaded from the database:
<pre>def ship
  @order = PurchaseOrder.find(params[:id])
  PurchaseOrder.transaction do
    @order.lock!
    @order.ship!
  end
  redirect_to order_path(@order)
end</pre>
This is equivalent to a <code>reload</code>, but adds the "FOR UPDATE" suffix necessary for a database lock. It is an extra SQL statement (the order is selected twice), but is an easier pattern to abstract away.
<pre>class Order &lt; ActiveRecord::Base
  # This method is usually provided by AASM
  def ship!
    return if shipped?
  # Important emails and computations
  end
  def ship_with_lock!
    transaction do
      lock!
      ship_without_lock!
    end
  end
  alias_method_chain :ship!, :lock
end</pre>
With <code>alias_method_chain</code>, we can continue to use exactly the same controller code we started with (just a plain call to <code>ship!</code>), and locking is handled for us in the background.

Lost updates or duplicate execution won't be a problem for every website, but if you are starting to worry about the concurrency of your hosting infrastructure, it's worth having a look over your code too.

If you’d like to join me for some hands-on work with this, I’ll be running classes at Engine Yard's San Francisco office on the 24th and 31st of July. Visit <a href="http://www.dbisyourfriend.com">www.dbisyourfriend.com</a> for course and registration details.<p><a href="http://www.engineyard.com/blog"><img height="98" width="61" title="logo-engineyard" alt="" class="attachment-post-thumbnail wp-post-image" src="http://www.engineyard.com/blog/?getfile=4050"/></a></p><img src="http://feeds.feedburner.com/~r/engineyard/~4/uKVzvBbkquQ" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2010/concurrency-and-the-aasm-gem/feed/</wfw:commentRss>
		<slash:comments>19</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2010/concurrency-and-the-aasm-gem/</feedburner:origLink></item>
	</channel>
</rss>
