<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;AkUDQHcyfSp7ImA9WhRUGUQ.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175</id><updated>2012-01-31T03:24:31.995-05:00</updated><category term="logging" /><category term="transition debt" /><category term="super" /><category term="erlang" /><category term="rsync" /><category term="mock objects" /><category term="javadoc" /><category term="development" /><category term="junit" /><category term="evidence-based scheduling" /><category term="cycle time" /><category term="maven" /><category term="hosting" /><category term="TestUtil" /><category term="resolution" /><category term="restful web services" /><category term="service" /><category term="scrum master" /><category term="war" /><category term="pmd" /><category term="user stories" /><category term="hadoop" /><category term="sprint" /><category term="inheritance" /><category term="pre-planning" /><category term="lean engineering" /><category term="backlog" /><category term="easymock" /><category term="setters" /><category term="microservices" /><category term="spring" /><category term="nosql" /><category term="burndown" /><category term="xhtml" /><category term="product owner" /><category term="aws" /><category term="lab week" /><category term="auto-deploy" /><category term="backup" /><category term="MBeanExporter" /><category term="littles law" /><category term="java" /><category term="riak" /><category term="refactoring" /><category term="cobertura" /><category term="confidence" /><category term="tomcat deploy script" /><category term="kaizen" /><category term="openssl" /><category term="CRUD" /><category term="release management" /><category term="maven2" /><category term="concurrency" /><category term="subclassing" /><category term="teams" /><category term="joel on software" /><category term="getters" /><category term="archives" /><category term="mvc" /><category term="rest" /><category term="trac" /><category term="controller" /><category term="timeboxing" /><category term="negotiation" /><category term="process improvement" /><category term="scrum of scrums" /><category term="backups for mac" /><category term="innovation" /><category term="planning poker" /><category term="optimization" /><category term="atom" /><category term="waterfall" /><category term="taglib" /><category term="INVEST" /><category term="architecture" /><category term="hbase" /><category term="prototype" /><category term="pig" /><category term="jce" /><category term="rest programming" /><category term="iteration length" /><category term="design patterns" /><category term="jsp method calls" /><category term="PaaS" /><category term="ken schwaber" /><category term="base types" /><category term="sprint goal" /><category term="redis" /><category term="sprint pre planning" /><category term="rsa" /><category term="inheritance hierarchy" /><category term="cross-team dependencies" /><category term="availability" /><category term="MBean" /><category term="web applications" /><category term="HTMLParser" /><category term="tomcat" /><category term="curl" /><category term="http" /><category term="IDE" /><category term="DTO" /><category term="tasks" /><category term="interface" /><category term="download" /><category term="design philosophy" /><category term="flow" /><category term="agile" /><category term="unit test" /><category term="python" /><category term="metrics" /><category term="public key cryptography" /><category term="neo4j" /><category term="voldemort" /><category term="cruisecontrol" /><category term="cross-functional" /><category term="kanban" /><category term="tdd" /><category term="simple mac backups" /><category term="monte carlo simulation" /><category term="business case" /><category term="csv" /><category term="jmx" /><category term="story points" /><category term="app engine" /><category term="cascading" /><category term="deploy" /><category term="shell script" /><category term="cassandra" /><category term="emacs" /><category term="commons-logging" /><category term="cloud computing" /><category term="scrum pre-planning" /><category term="ajax" /><category term="architectural debt" /><category term="sashimi" /><category term="scrum story planning" /><category term="esr" /><category term="integration test" /><category term="jsp" /><category term="ssh" /><category term="software design" /><category term="oop" /><category term="abstract data type" /><category term="key-value stores" /><category term="hypermedia" /><category term="scrum planning session" /><category term="tco" /><category term="sprint review" /><category term="mac osx backups" /><category term="nosqleast" /><category term="SOAP" /><category term="technical debt" /><category term="wip" /><category term="REST API" /><category term="cargo" /><category term="blogger" /><category term="scrum" /><category term="domain object method" /><category term="web2.0" /><category term="unix" /><category term="configuration debt" /><category term="throughput" /><category term="sprint planning" /><category term="caching" /><category term="message passing" /><category term="prioritization" /><category term="jconsole" /><category term="cactus" /><category term="queuing theory" /><category term="boolean" /><category term="management" /><category term="estimation" /><title>codeartisan</title><subtitle type="html">The Art of Writing Software</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://codeartisan.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>55</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/codeartisan" /><feedburner:info uri="codeartisan" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;A0MHRHk5fip7ImA9Wx9SEU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-8677414750617399992</id><published>2010-11-27T09:17:00.009-05:00</published><updated>2010-11-30T10:03:55.726-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-30T10:03:55.726-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="process improvement" /><category scheme="http://www.blogger.com/atom/ns#" term="queuing theory" /><category scheme="http://www.blogger.com/atom/ns#" term="metrics" /><category scheme="http://www.blogger.com/atom/ns#" term="development" /><category scheme="http://www.blogger.com/atom/ns#" term="management" /><category scheme="http://www.blogger.com/atom/ns#" term="flow" /><title>Managing Software Development Flow</title><content type="html">&lt;p&gt;This is the third post in a series about &lt;i&gt;software development flow&lt;/i&gt;, which I'm describing as the conversion of customer requests (both for new features as well as bug reports) into working software. In the &lt;a href="http://codeartisan.blogspot.com/2010/11/how-to-go-faster.html"&gt;first post&lt;/a&gt; on this topic, we talked about how a software development organization can be viewed as a request processing engine, and how &lt;a href="http://en.wikipedia.org/wiki/Queueing_theory"&gt;queuing theory&lt;/a&gt; (and &lt;a href="http://en.wikipedia.org/wiki/Little's_law"&gt;Little's Law&lt;/a&gt; in particular) can be applied to optimize overall throughput and time-to-market (also called "cycle time"). In the &lt;a href="http://codeartisan.blogspot.com/2010/11/intuitions-about-software-development.html"&gt;second article&lt;/a&gt;, we revisited these same concepts with a more intuitive explanation and started to identify the management tradeoffs that come into play. This article will focus mostly on this final area: what metrics are important for management to understand, and what are some mechanisms/levers they can apply to try to optimize throughput?&lt;/p&gt;

&lt;p&gt;I am being intentionally vague about particular processes here, since these principles can be applied regardless of particular process. I will also not talk about particular functional specialities like UX, Development, QA, or Ops; whether you have a staged waterfall process or a fully cross-functional agile team, the underlying theory still applies. Now, the adjustments we talk about here will have the greatest effect when applied to the greatest organizational scope (for example, including everything from customer request intake all the way to delivering working software), but Little's Law says they can also be applied to individual subsystems (for example, perhaps just Dev/QA/Ops taken together, or even just Ops) as well. Of course, the more you focus on individual parts of the system, the more likely you are to locally optimize, perhaps to the detriment of the system as a whole.&lt;/p&gt;

&lt;h2&gt;Managing Queuing Delay&lt;/h2&gt;

&lt;p&gt;As we've seen previously, at least some of the time a customer request is moving through our organization, it isn't being actively worked on; this time is known as &lt;i&gt;queuing delay&lt;/i&gt;. There are different potential causes of queuing delay, including:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;&lt;b&gt;batching:&lt;/b&gt; if we group individual, independent features together into batches (like sprints or releases), then some of the time an individual feature will either be waiting for its turn to get worked on or it will be waiting for the other features in the batch to get finished&lt;/li&gt;
&lt;li&gt;&lt;b&gt;multitasking:&lt;/b&gt; if people can have more than one task assigned, they can still only work on one thing at a time, so their other tasks will be in a wait state&lt;/li&gt;
&lt;li&gt;&lt;b&gt;backlogs:&lt;/b&gt; these are explicit queues where features wait their turn for implementation&lt;/li&gt;
&lt;li&gt;etc.&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The simplest way to observe queuing delay is to measure it directly: &lt;b&gt;what percentage of my in-flight items don't have someone actively working on them?&lt;/b&gt; If your process is visualized, perhaps with a &lt;a href="http://www.infoq.com/articles/agile-kanban-boards"&gt;kanban board&lt;/a&gt;, and you use avatars for your people to show what they are working on, then this is no harder than counting how many in-flight items don't have an avatar on them.&lt;/p&gt;

&lt;p&gt;[ Side note: if you have also measured your overall delivery throughput &lt;i&gt;X&lt;/i&gt;, Little's Law says:&lt;/p&gt;
&lt;blockquote&gt;
&lt;i&gt;N&lt;span style="vertical-align:sub;text-size:smaller"&gt;Q&lt;/span&gt; / N = XR&lt;span style="vertical-align:sub;text-size:smaller"&gt;Q&lt;/span&gt; / XR = R&lt;span style="vertical-align:sub;text-size:smaller"&gt;Q&lt;/span&gt;/R&lt;/i&gt;
&lt;/blockquote&gt;
&lt;p&gt;In other words, your queuing delay &lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller"&gt;Q&lt;/span&gt;&lt;/i&gt; is the same percentage of your overall cycle time &lt;i&gt;R&lt;/i&gt; as the number of queued items &lt;i&gt;N&lt;span style="vertical-align:sub;text-size:smaller"&gt;Q&lt;/span&gt;&lt;/i&gt; is to the overall number of in-flight items &lt;i&gt;N&lt;/i&gt;. So you can actually &lt;b&gt;measure&lt;/b&gt; your queuing delay pretty easily this way.]&lt;/p&gt;

&lt;p&gt;The primary mechanism, then, for reducing queuing delay is to reduce the number of in-flight items allowed in the system. One simple mechanism for managing this is to adopt a "one-in-one-out" mechanism that admits new feature requests only when a previous feature has been delivered; this puts a cap on the number of in-flight items &lt;i&gt;N&lt;/i&gt;. We can then periodically (perhaps once a week, or once an iteration) reduce &lt;i&gt;N&lt;/i&gt; by taking a slot out: in essence, when we finish one feature request, we &lt;i&gt;don't&lt;/i&gt; admit a new one, thus reducing the number of overall requests in-flight.&lt;/p&gt;

&lt;p&gt;Undoubtedly there will come a time when a high priority request shows up, and there would be too much opportunity cost to waiting for something else to finish up so it can be inserted. One possibility here is to flag this request as an emergency, perhaps by attaching a red flag to it on a kanban board to note its priority, and temporarily allow that new request in, with the notion that we will &lt;i&gt;not&lt;/i&gt; admit a new feature request once the emergency feature finishes.&lt;/p&gt;

&lt;h2&gt;Managing Failure Demand&lt;/h2&gt;

&lt;p&gt;Recall that failure demand consists of requests we have to deal with because we didn't deliver something quite right the first time--think production outages or user bug reports. Failure demand can be quite expensive: according to &lt;a href="http://www.superwebdeveloper.com/2009/11/25/the-incredible-rate-of-diminishing-returns-of-fixing-software-bugs/"&gt;one estimate&lt;/a&gt;, fixing a bug in production can be more than 15 times as expensive than correcting it during development. In other words, &lt;b&gt;having work show up as failure demand is probably the most expensive possible way to get that work done.&lt;/b&gt; Cost aside, however, any amount of failure demand that shows up detracts from our ability to service value demand--the new features our customers want.&lt;/p&gt;

&lt;p&gt;From a monitoring and metrics perspective, we simply compute the percentage of all in-flight requests that are failure demand. Now the question is how to manage that percentage downward so that we aren't paying as much of a failure demand tax on new development.&lt;/p&gt;

&lt;p&gt;To get rid of existing failure demand, we need to address the root causes for these issues. Ideally, we would want to do a root cause analysis and fix for &lt;i&gt;every&lt;/i&gt; incident (this is the long-term cheapest way to deal with the problems), but for organizations already experiencing high failure demand, this might temporarily drag new feature development to a halt. An alternative is to have a single "root cause fix" token that circulates through the organization: if it is not being used, then the next incident to arrive gets the token assigned. We do a root cause analysis and fix for &lt;i&gt;that issue&lt;/i&gt;, then when we've finished that, the token frees up and we look for the next issue to fix. This approach caps the labor investment in root cause analysis fixing, and will, probabilistically, end up fixing the most common issues first. Over time, this will gradually wear away at the causes of existing failure demand. It's worth noting that you may not have to go to the uber root cause to have a positive effect--just fixing the issue in a way that makes it less likely to occur again will ultimately reduce failure demand.&lt;/p&gt;

&lt;p&gt;However, we haven't addressed the upstream sources of failure demand yet; if we chip away at existing failure demand but continue to pile more on via new feature development, we'll ultimately lose ground. &lt;b&gt;The primary cause of new failure demand is  trying to hit an aggressive deadline with a fixed scope&lt;/b&gt;--something has to give here, and what usually gives is quality. There may well be reasons that this is the right tradeoff to make; perhaps there are marketing campaigns scheduled to start or contractual obligations that must be met (we'll save a discussion for how those dates got planned for another time). At any rate, management needs to understand the tradeoffs that are being made, and needs to be given the readouts to responsibly govern the process. "Percent failure demand" turns out to be a pretty simple and informative metric.&lt;/p&gt;
 
&lt;h2&gt;Managing Cycle Time&lt;/h2&gt;

&lt;p&gt;Draining queuing delay and tackling failure demand are pretty much no-brainers: they are easy to track, and there are easy-to-understand ways to reduce both. However, once we've gotten all the gains we can out of those two prongs of attack, all that is left is trying to further reduce cycle time (and hence raise throughput) via process change. This is much harder--there are no silver bullets here. Although there are any number of folks who will claim to "know" the process changes that are needed here, ranging from Agile consultants, to other managers, to the folks working on the software itself, the reality is that these ideas aren't really guaranteed solutions. They are, however, a really good source of process experiments to run.&lt;/p&gt;

&lt;p&gt;Measuring cycle time is important, because thanks to queuing theory and Little's Law, it directly corresponds to throughput in a system with a fixed set of work in-flight. Furthermore, it is very easy to measure average cycle time; the data can be collected by hand and run through a spreadsheet with little real effort. This makes it an ideal metric for evaluating a process change experiment:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;if cycle time decreases, keep the process change as an improvement&lt;/li&gt;
&lt;li&gt;if cycle time increases, revert back to the old process and try something different&lt;/li&gt;
&lt;li&gt;if cycle time is not affected, you might as well keep the change but still look for improvement somewhere else&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Keeping "no-effect" process changes in place sets the stage for a culture of continual process improvement; it encourages experimentation if nothing else (and the cycle time measurements have indicated it hasn't hurt). Now, regardless of the experiment, it's important to set a timebox around the experiment so that we can evaluate it: "let's try it this way for a month and see what happens". New processes take time to sink in, so it's important not to run experiments that are too short--we want to give the new process a chance to shake out and see what it can really do. It's also worth noting here that managers should expect some of the experiments to "fail" with increased cycle time or to have no appreciable effect. This is unfortunately the nature of the scientific method--if we could be prescient we'd just jump straight to the optimized process--but this is a tried and true method for learning.&lt;/p&gt;

&lt;p&gt;Now, process change requires effort to roll out, so a good question to ask here is how to find the time/people to carry this out. There's a related performance tuning concept here known as the &lt;a href="http://en.wikipedia.org/wiki/Theory_of_Constraints"&gt;Theory of Constraints&lt;/a&gt;, which I'll just paraphrase as "there's always a bottleneck somewhere." If we keep reducing work in-flight, and we have the end-to-end process visualized somewhere, we should be able to see where the bottleneck in the process is. The Theory of Constraints also says that you don't need to take on any more work than the bottleneck can process, which means, depending on your process and organizational structure, that we may find that we can apply folks both "upstream" and "downstream" of the bottleneck to a process change experiment &lt;b&gt;without actually decreasing overall throughput&lt;/b&gt;. Furthermore, by identifying the bottleneck, we have a good starting point for &lt;i&gt;selecting&lt;/i&gt; an experiment to run: let's try something that will alleviate the bottleneck (or, as the Theory of Constraints says, just move it elsewhere).&lt;/p&gt;

&lt;h2&gt;Conclusion&lt;/h2&gt;

&lt;p&gt;In this article, we've seen that managers really only need a few easy-to-collect metrics on an end-to-end software delivery flow to enable them to optimize throughput:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;total number of items in-flight&lt;/li&gt;
&lt;li&gt;number of "idle" in-flight items (not actively being worked)&lt;/li&gt;
&lt;li&gt;number of in-flight items that are failure demand&lt;/li&gt;
&lt;li&gt;end-to-end average cycle time&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;We've also identified several mechanisms, ranging from reducing work-in-progress to root cause fixes of failure demand, that can enable managers to perform optimizations on their process &lt;i&gt;at a pace that suits the business&lt;/i&gt;. This is the classic empirical process control ("inspect and adapt") model that has been demonstrated to work effectively time and again in many settings, from the shop floor of Toyota factories to the team rooms of agile development organizations.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-8677414750617399992?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/tK8U0rajEbAd9aIEqidKmTdE2GU/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tK8U0rajEbAd9aIEqidKmTdE2GU/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/tK8U0rajEbAd9aIEqidKmTdE2GU/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tK8U0rajEbAd9aIEqidKmTdE2GU/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/8677414750617399992/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=8677414750617399992" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8677414750617399992?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8677414750617399992?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/S5SHpRtzCsI/managing-software-development-flow.html" title="Managing Software Development Flow" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/11/managing-software-development-flow.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D08DQ3o-eip7ImA9Wx9TFkQ.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-5062957053625057189</id><published>2010-11-25T06:37:00.011-05:00</published><updated>2010-11-25T09:37:52.452-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-25T09:37:52.452-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="cycle time" /><category scheme="http://www.blogger.com/atom/ns#" term="throughput" /><category scheme="http://www.blogger.com/atom/ns#" term="optimization" /><category scheme="http://www.blogger.com/atom/ns#" term="flow" /><title>Intuitions about Software Development Flow</title><content type="html">&lt;p&gt;In a previous post, I described the &lt;a href="http://codeartisan.blogspot.com/2010/11/how-to-go-faster.html"&gt;underlying theory behind optimizing the throughput of a software development organization&lt;/a&gt;, which consists  of a three-pronged attack:&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;remove queuing delay by limiting the number of features in-flight&lt;/li&gt;
&lt;li&gt;remove failure demand by building in quality up front and fixing root causes of problems&lt;/li&gt;
&lt;li&gt;reduce average cycle time by experimenting with process improvements&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;In this article, I'd like to provide an alternative visualization to help motivate these changes. Let's start with some idealized flow, where we have sufficient throughput to deal with all of our incoming customer requests. Or, if we prefer, the rate at which our business stakeholders inject requests for new features is matched to the rate at which we can deliver them.&lt;/p&gt;

&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.flickr.com/photos/70245350@N00/5205968909/"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 300px; height: 301px;" src="http://farm6.static.flickr.com/5242/5205968909_4764b1fcf8_d.jpg" border="0" alt="" /&gt;&lt;/a&gt;

&lt;h2&gt;Queuing Delay&lt;/h2&gt;

&lt;p&gt;Now let's add some queuing delay, in the form of some extra water sitting in the sink:&lt;/p&gt;

&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.flickr.com/photos/70245350@N00/5206618200/"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 300px; height: 301px;" src="http://farm6.static.flickr.com/5047/5206618200_74e195ec6b_d.jpg" border="0" alt="" /&gt;&lt;/a&gt;

&lt;p&gt;If we leave the faucet of customer requests running at the same rate that the development organization can "drain" them out into working software, we can understand that the level of water in the sink will stay constant. Compared to our original diagram, features are still getting shipped at the same rate they were before; the only difference is that now for any particular feature, it takes longer to get out the other side, because it has to spend some time sitting around in the pool of queuing delay.&lt;/p&gt;

&lt;p&gt;Getting rid of queuing delay is as simple as turning the faucet down slightly so that the pool can start draining; once we've drained all the queuing delay out, we can turn the faucet back up again, with no net change other than improved time-to-market (cycle time). There's a management investment tradeoff here; the more we turn the faucet down, the faster the pool drains and the sooner we can turn the faucet back up to full speed at a faster cycle time. On the other hand, that requires (temporarily) slowing down feature development to let currently in-flight items "drain" a bit. Fortunately, this is something that can be done completely flexibly as business situations dictate--simply turn the knob on the faucet as desired, and adjust it as many times as needed.&lt;/p&gt;

&lt;h2&gt;Failure Demand&lt;/h2&gt;

&lt;p&gt;We can model failure demand as a tube that siphons some of the organization's throughput off and runs it back into the sink in the form of bug reports and production incidents:&lt;/p&gt;

&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.flickr.com/photos/70245350@N00/5206672202/"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 300px; height: 303px;" src="http://farm5.static.flickr.com/4154/5206672202_c7914e14a6_d.jpg" border="0" alt="" /&gt;&lt;/a&gt;

&lt;p&gt;Our sink intuition tells us that we'll have to turn the faucet down--even if only slightly--if we don't want queuing delay to start backing up in the system (otherwise we're adding new requests plus the bug fixing to the sink at a rate faster than the drain will accommodate). Now, every time we ship new features that have bugs or aren't robust to failure conditions (particularly common when rushing to hit a deadline), it's like making the failure demand siphon wider; ultimately we're stealing from our future throughput. When we fix the root cause of an issue, it's like making the failure demand siphon narrower, and we not only get happier customers, but we reclaim some of our overall throughput.&lt;/p&gt;

&lt;p&gt;Again, there are management tradeoffs to be made here: fixing the root cause of an issue may take longer than just triaging it, but it is ultimately an investment in higher throughput. Similarly, rushing not-quite-solid software out the door is ultimately borrowing against future throughput. However, it's not hard to see that if we &lt;span style="font-style:italic;"&gt;never&lt;/span&gt; invest in paying down the failure demand, eventually it will consume all of our throughput and severely reduce our ability to ship new features. This is why it is important for management to have a clear view of failure demand in comparison to overall throughput so that these tradeoffs can be managed responsibly.&lt;/p&gt;

&lt;h2&gt;Process Change&lt;/h2&gt;

&lt;p&gt;The final thing we can do is to improve our process, which is roughly like taking all the metal of the drain pipe (corresponding loosely to the people in our organization) and reconfiguring it into a shorter, fatter pipe:&lt;/p&gt;

&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.flickr.com/photos/70245350@N00/5206709944/"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 300px; height: 340px;" src="http://farm5.static.flickr.com/4128/5206709944_8e8b01cc74_d.jpg" border="0" alt="" /&gt;&lt;/a&gt;

&lt;p&gt;This shows the intuition that if we focus on cycle time (length of the pipe) for our process change experiments, it will essentially free up people (metal) to work on more things (pipe width) at a time, thus improving throughput. There is likewise a management tradeoff to make here: process change takes time and investment, and we'll need to back off feature development for a while to enable that. On the other hand, there's simply no way to improve throughput without changing your process somehow; underinvestment here compared to our competitors means eventually we'll get left in the dust, just as surely as failing to invest cash financially will eventually lead to an erosion of purchasing power due to inflation.&lt;/p&gt;

&lt;h2&gt;Summary&lt;/h2&gt;
&lt;p&gt;Hopefully, we've given some intuitive descriptions of the ways to improve time-to-market and throughput for a software development organization to complement the &lt;a href="http://codeartisan.blogspot.com/2010/11/how-to-go-faster.html"&gt;theory&lt;/a&gt; presented in the first post on this topic. We've also touched on some of the management tradeoffs these changes entail and some of the information management will need to guide things responsibly.&lt;/p&gt;

&lt;hr/&gt;
&lt;p style="font-size:smaller;"&gt;&lt;b&gt;Credits:&lt;/b&gt; Sink diagrams are available under a &lt;a href="http://creativecommons.org/licenses/by-sa/2.0/deed.en"&gt;Creative Commons Attribution-ShareAlike 2.0 Generic license&lt;/a&gt; and were created using photos by &lt;a href="http://www.flickr.com/photos/tudor/268918251/"&gt;tudor&lt;/a&gt; and &lt;a href="http://www.flickr.com/photos/doortoriver/2993206333/"&gt;doortoriver&lt;/a&gt;.&lt;/p&gt;
&lt;hr/&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-5062957053625057189?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/w9Fveh25s2QFRXwWsngvVOxTO3U/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/w9Fveh25s2QFRXwWsngvVOxTO3U/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/w9Fveh25s2QFRXwWsngvVOxTO3U/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/w9Fveh25s2QFRXwWsngvVOxTO3U/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/5062957053625057189/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=5062957053625057189" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/5062957053625057189?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/5062957053625057189?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/mHTdkitg95A/intuitions-about-software-development.html" title="Intuitions about Software Development Flow" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/11/intuitions-about-software-development.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04BQHs_fCp7ImA9Wx9TFkQ.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-7784093812384051574</id><published>2010-11-19T21:11:00.014-05:00</published><updated>2010-11-25T09:39:11.544-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-25T09:39:11.544-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="process improvement" /><category scheme="http://www.blogger.com/atom/ns#" term="kanban" /><category scheme="http://www.blogger.com/atom/ns#" term="cycle time" /><category scheme="http://www.blogger.com/atom/ns#" term="littles law" /><category scheme="http://www.blogger.com/atom/ns#" term="throughput" /><category scheme="http://www.blogger.com/atom/ns#" term="flow" /><title>How to Go Faster</title><content type="html">&lt;p&gt;Ok, I'm going to tell you how to make your software development organization go faster. I'm going to tell you how to get more done without adding people while improving your time to market and increasing your quality. &lt;em&gt;And&lt;/em&gt; I'm going to back it all up with &lt;a href="http://en.wikipedia.org/wiki/Queueing_theory"&gt;queuing theory&lt;/a&gt;. [ By actually explaining the relevant concepts of queuing theory, not just by ending sentences with "...which is obvious from queuing theory", which is usually a good bluff in a technical argument being had over beers. Generally a slam dunk in mixed technical/non-technical company. But I digress. ]&lt;/p&gt;

&lt;h3&gt;An Important Perspective&lt;/h3&gt;

&lt;p&gt;It's worth saying that this article assumes you've figured out how to deliver software incrementally &lt;em&gt;somehow&lt;/em&gt;, even if that's just by doing &lt;a href="http://www.gaslightandsteam.com/blog/processes/agile/scrumfall-and-other-methodologies"&gt;Scrumfall&lt;/a&gt;. The point is that you are familiar with breaking your overall feature set down into discretely deliverable &lt;a href="http://www.softwarebynumbers.org/"&gt;minimum marketable features (MMFs)&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/User_story"&gt;user stories&lt;/a&gt;, epics, tasks, and the like. If you have any customers, you are probably also familiar with production incidents and bugs, which are also discrete chunks of work to do. Now, here's the important perspective:&lt;/p&gt;

&lt;blockquote&gt;
&lt;em&gt;Your software development organization is a request processing system.&lt;/em&gt;
&lt;/blockquote&gt;

&lt;p&gt;In this case, the requests come from customers or their proxies (product managers, etc.), and the organization processes the request by delivering the requested change as working software. This could end with a deployment to a live website, publishing an update to an app store, or just plain cutting a release and posting it somewhere for your customers to download and use. At any rate, the requests come into your organization, the software gets delivered, and then the request is essentially forgotten (closed out). Now, looking at your organization this way is important, because it means you can understand your capacity for delivery in terms borrowed from tuning other request processing systems (like websites, for example) for performance and scale. Most importantly, though, is that this mysterious branch of mathematics called &lt;em&gt;queuing theory&lt;/em&gt; applies to your organization (just as it applies to any request processing system).&lt;p&gt;

&lt;h3&gt;A Little Light Queuing Theory&lt;/h3&gt;

&lt;p&gt;One of the basic principles in queuing theory is &lt;a href="http://en.wikipedia.org/wiki/Little's_law"&gt;Little's Law&lt;/a&gt;, which says:&lt;/p&gt;
&lt;blockquote&gt;
&lt;i&gt;N=XR&lt;/i&gt;
&lt;/blockquote&gt;
&lt;p&gt;where &lt;i&gt;N&lt;/i&gt; is the average number of requests currently being processed by the system, &lt;i&gt;X&lt;/i&gt; is the transaction rate (requests processed per unit time), and &lt;i&gt;R&lt;/i&gt; is the average response time (how long it takes to process one request). In a software development setting, &lt;i&gt;R&lt;/i&gt; is sometimes called &lt;i&gt;cycle time&lt;/i&gt;.&lt;/p&gt;

&lt;p&gt;To put this in more familiar terms, suppose we have a walk-in bank with a number of tellers on staff. If customers arrive at an average rate of one person per minute (&lt;i&gt;X&lt;/i&gt;) and it takes a teller an average of 2 minutes to serve a customer (&lt;i&gt;R&lt;/i&gt;) then Little's Law says, on average, that we'll have &lt;i&gt;XR = 1(2) = 2&lt;/i&gt; tellers busy (&lt;i&gt;N&lt;/i&gt;) on average at any given point in time. We can similarly flip this around: if we have 3 tellers on staff, what's the maximum average customer arrival rate we can handle?&lt;/p&gt;

&lt;blockquote&gt;
&lt;i&gt;X = N/R = 3/2 = 1.5&lt;/i&gt; customers per minute
&lt;/blockquote&gt;

&lt;p&gt;Ok, the last thing we need to talk about is: what happens if we suddenly get a rush of customers coming in? Anyone who has entered a Starbucks or visited Disneyland knows the answer to this: a line forms. (The time a customer spends waiting in line is known as "queuing delay" if you want to get theoretical about it.) Let's go back to our bank. Suppose we just have 5 people suddenly walk in all at once, in addition to our regular arrival of one person per minute. What happens? Well, we get a line that is 5 people long. But if we only have 2 tellers on staff, then people come off the line at exactly the same rate that new people are entering from the back, which means: &lt;b&gt;the line never goes away and always stays 5 people long&lt;/b&gt;.&lt;/p&gt;

&lt;p&gt;What does this look like from the customers' point of view? Well, we know they'll spend 2 minutes with the teller once they get up to the front of the line, and we know that it will take 5 minutes to get to the front of the line, so my average response time is:&lt;/p&gt;

&lt;blockquote&gt;
&lt;i&gt;R = R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt; + R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;Q&lt;/span&gt; = 2 + 5 = 7&lt;/i&gt;
&lt;/blockquote&gt;

&lt;p&gt;where &lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt;&lt;/i&gt; is the "value added time" where the request (customer) is actually getting worked on/for, and &lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;Q&lt;/span&gt;&lt;/i&gt; is the amount of time spent waiting in line (queuing delay). Now we can see that on average, we'll have:&lt;/p&gt;

&lt;blockquote&gt;
&lt;i&gt;N = XR = X(R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt; + R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;Q&lt;/span&gt;) = 1(2 + 5) = 7&lt;/i&gt;
&lt;/blockquote&gt;

&lt;p&gt;people in the bank on average. Two people at the tellers, and five people waiting in line. We all know how frustrating an experience that is from the customer's point of view. Now, let me summarize this section (if you didn't follow all the math, don't worry, the important thing is that you understand these implications):&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;If you try to put more requests into a system than it can handle, lines start forming somewhere in the system.&lt;/li&gt;
&lt;li&gt;If the request rate never falls below the system's max capacity, the lines never go away.&lt;/li&gt;
&lt;li&gt;Time spent waiting in a line doesn't really serve much useful purpose from the customer's point of view.&lt;/li&gt;
&lt;/ol&gt;

&lt;h3&gt;Software development as customer request processing&lt;/h3&gt;

&lt;p&gt;If your experience is anything like mine, there is an infinite supply of things the business stakeholders would like the software to do, which means the transaction rate &lt;i&gt;X&lt;/i&gt; can be as high as we actually have capacity for. This means one of the primary goals of the organization is figuring out how to get &lt;i&gt;X&lt;/i&gt; as high as possible &lt;b&gt;so we can ship more stuff&lt;/b&gt;. At the same time, we're also concerned with getting &lt;i&gt;R&lt;/i&gt; as low as possible, since this represents our time-to-market and can be a major competitive advantage. If we can ship a feature in a week but it takes our competitors a month to get features through their system, who's more reactive? Every time the competition throws up a compelling feature, we can match them in a week. Every time &lt;i&gt;we&lt;/i&gt; ship a compelling feature, it takes them a month to catch up. Who's going to win that battle?&lt;/p&gt;

&lt;p&gt;Now, one of the tricky things here is that software development is often far more complicated than our example bank with tellers, since we tend to staff folks with different skillsets. If I have a team of one graphic designer, three developers, a tester, and a sysadmin, it's really hard to predict how long it will take that team to ship a feature, because they will have to collaborate. If I want to hire someone to help them, is it better to hire another tester or another designer? Probably I can't tell &lt;i&gt;a priori&lt;/i&gt;, because it depends on the nature of the features being worked on, and it's really hard to measure things like "this user story was 10% design, 25% development, 50% testing, and 15% operations." Nonetheless, we can look at this from another point of view, which is that I have a fixed number of people in the organization, and each person can only be working on one thing at a time (just as a teller can only actively serve one person at a time), and they are probably (hopefully) collaborating on them.&lt;/p&gt;

&lt;blockquote&gt;
&lt;b&gt;This means the maximum number of things you can realistically be actively working on is &lt;i&gt;less than&lt;/i&gt; the number of people in the organization.&lt;/b&gt;
&lt;/blockquote&gt;

&lt;p&gt;If we have more things in flight than that, we know at least some of the time those things are going to be sitting around waiting for someone to work on them (queuing delay). Perhaps they are sitting on a product backlog. Perhaps they are simply marked "Not Started" on a sprint taskboard. Perhaps they are marked "Done" on a sprint taskboard but they have to wait for a release to be rolled at the end of the sprint to move onwards towards production or QA. As we saw above, this queuing delay doesn't increase throughput, it just hurts our time-to-market. Why would we want that?&lt;/p&gt;

&lt;h3&gt;First optimization: get rid of queuing delay&lt;/h3&gt;

&lt;p&gt;Ok, as we saw above, we know that the total response time &lt;i&gt;R&lt;/i&gt; consists of two parts; actual value-adding work (&lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt;&lt;/i&gt;) and queuing delay (&lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller;"&gt;Q&lt;/span&gt;&lt;/i&gt;). Typically, it's really hard and time consuming to try to measure these two pieces separately without having lots of annoying people running around with stopwatches and taking furious notes. Fortunately, we don't have to resort to that. It &lt;i&gt;is&lt;/i&gt; really easy to measure &lt;i&gt;R&lt;/i&gt; overall for a feature/story: mark down when the request came in (e.g. got added to a backlog) and then mark down when it shipped. Simple.&lt;/p&gt;

&lt;p&gt;Now, let's think back to our bank example where we had a line of people. Most software development organizations have too much in flight, and they have lines all over the place inside, many of which aren't even readily apparent because that's just "the way we do things around here." Lines are bad. Now, we know the only way to drain these queues is if the incoming feature request rate is less than the rate at which we ship them. Sometimes we can try hiring more "tellers", but in a recession that's not always an option. Instead, for many organizations, the best option is &lt;i&gt;admission control&lt;/i&gt;, which is to say that we don't take on a new request until we've shipped one out the other side. You can think of this as having a certain number of feature delivery "slots" available, and you can't start something new until you've freed up a slot. This at least prevents you from having your lines get any bigger.&lt;/p&gt;

&lt;p&gt;In order to drain the lines out of the system, the easiest thing to do is to periodically &lt;i&gt;retire&lt;/i&gt; a slot after it ships. In other words, don't let something new in just that once. This will reduce the overall number of things in flight, and since presumably everyone is still working hard, what we've just gotten rid of &lt;i&gt;must&lt;/i&gt; be queuing delay. Magic! So we can just keep doing this and draining queuing delay out of the system, improving our time to market all the time, without necessarily having to change anything else about the way we do things. When do we stop? We stop once we have people standing around not doing anything. At that point, all the queuing delay is out of the system (for now), and we know that we're at a level where all of our "tellers" are busy. To summarize:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;We can remove queuing delay from our delivery process simply by limiting and reducing the amount of work in-flight; this improves time-to-market without having to change anything else.&lt;/li&gt;
&lt;li&gt;We can keep doing this until people run out of things to work on; at that point we've squeezed all the queuing delay out.&lt;/li&gt;
&lt;/ol&gt;

&lt;h3&gt;Second optimization: reduce failure demand&lt;/h3&gt;

&lt;p&gt;The next thing to realize is that the &lt;i&gt;N&lt;/i&gt; things we have in flight actually come in two flavors: &lt;i&gt;value demand&lt;/i&gt; and &lt;i&gt;failure demand&lt;/i&gt;. In our case, value demand consists of requests that create value for the customer: i.e. new and enhanced features. Failure demand, on the other hand, consists of requests that come from not doing something right previously. These are primarily things like website outages (production incidents), bug reports from users, or even support calls from users asking if you've fixed the problem they previously reported. If you have someone collecting these, then these are requests that your organization as a whole has to deal with. On the other hand, for each request of failure demand, someone is busy triaging/fixing it when then could be creating new value. In other words:&lt;/p&gt;

&lt;blockquote&gt;
&lt;i&gt;N = N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt; + N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;F&lt;/span&gt;&lt;/i&gt;
&lt;/blockquote&gt;

&lt;p&gt;where &lt;i&gt;N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt;&lt;/i&gt; is value demand and &lt;i&gt;N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;F&lt;/span&gt;&lt;/i&gt; is failure demand. Or, if we look at things this way:&lt;/p&gt;

&lt;blockquote&gt;
&lt;i&gt;X = N/R = (N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt; + N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;F&lt;/span&gt;)/R = N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;V&lt;/span&gt;/R + N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;F&lt;/span&gt;/R&lt;/i&gt; 
&lt;/blockquote&gt;

&lt;p&gt;we can see that the failure demand is stealing a portion (&lt;i&gt;N&lt;span style="vertical-align:sub;text-size:smaller;"&gt;F&lt;/span&gt;/R&lt;/i&gt;) of our organization's throughput! This is, incidentally, why spending extra energy on quality up front results in lower overall costs (as Toyota showed); failure demand essentially requires &lt;i&gt;rework&lt;/i&gt;.&lt;/p&gt;

&lt;p&gt;This means that another way to improve overall throughput of the organization is to reduce failure demand, reclaiming that portion of your throughput that's getting siphoned off. One way to do this involves figuring out how to "build quality in" on new development, but since software development is a creative process (different every time for every feature), it's not possible to actually completely prevent bugs. That said, there are many techniques like test-driven development and user experience testing that can help improve quality. The other way to reduce failure demand involves vigorously fixing root causes of failure as we experience them. In other words, when we fix a problem for a customer, we should fix it in a way that prevents that type of problem from ever occurring again, for any customer. This keeps overall failure demand down by preventing certain classes of it, thereby reserving that precious organizational throughput for delivering new value. To summarize this section:

&lt;ol&gt;
&lt;li&gt;Improve value delivery capacity by reducing failure demand (production incidents and bug reports).&lt;/li&gt;
&lt;li&gt;The cheapest way to reduce failure demand is by building in quality up-front.&lt;/li&gt;
&lt;li&gt;When serving a failure demand request, we can reduce overall failure demand by also fixing the root cause of the problem.&lt;/li&gt;
&lt;/ol&gt;

&lt;h3&gt;Final optimization: cycle time reduction&lt;/h3&gt;

&lt;p&gt;Ok, now we've gotten to the point where &lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller"&gt;Q&lt;/span&gt;&amp;nbsp;=&amp;nbsp;0&lt;/i&gt; (or near zero), so &lt;i&gt;R&amp;nbsp;=&amp;nbsp;R&lt;span style="vertical-align:sub;text-size:smaller"&gt;V&lt;/span&gt;&lt;/i&gt;. Now at this point, let's look back at Little's Law:&lt;/p&gt;

&lt;blockquote&gt;
&lt;i&gt;N = XR&lt;/i&gt;
&lt;/blockquote&gt;

&lt;p&gt;We've already established via draining out our queuing delay in the first phase what our target &lt;i&gt;N&lt;/i&gt; is (number of requests in-flight). But we still want to ship more with the same number of people; we want &lt;i&gt;X&lt;/i&gt; to go up. But recall that:&lt;/p&gt;

&lt;blockquote&gt;
&lt;i&gt;X = N/R&lt;/i&gt;
&lt;/blockquote&gt;

&lt;p&gt;If our &lt;i&gt;N&lt;/i&gt; is fixed due to the number of people we have on staff, then the &lt;b&gt;only&lt;/b&gt; way to increase throughput is to reduce &lt;i&gt;R&lt;/i&gt;. Now is where we start to look at process changes and automation. How do we make it so that it takes people less time to handle a request? Focusing on this improves not only time to market but also overall throughput. And furthermore, if we are measuring &lt;i&gt;R&lt;/i&gt; over time, we have an easy way to do this: change the process in a way you think will help, and then measure if &lt;i&gt;R&lt;/i&gt; went down or not. If it didn't help, try something else. If it made things worse, go back to the old way. Rinse, repeat. The things to try are going to be different for every organization, and one of the best sources of ideas will be the folks actually doing the work. But this doesn't require any kind of high-tech tracking software -- post-it notes on walls with the start and end dates written on them are more than sufficient to measure &lt;i&gt;R&lt;/i&gt; and carry these experiments out.&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;As failure demand and queuing delay are squeezed out of the system, the only way to improve throughput is by reducing response time.&lt;/li&gt;
&lt;li&gt;Response time can only be reduced by process changes.&lt;/li&gt;
&lt;li&gt;By measuring response time, we have a convenient experimental lab to understand if process changes help or not.&lt;/li&gt;
&lt;/ol&gt;

&lt;h3&gt;Say, haven't I heard this all before?&lt;/h3&gt;

&lt;p&gt;Well, yes. You may have heard pieces of this from all sorts of places. The feature "slots" we were talking about before as a means to &lt;a href="http://limitedwipsociety.org/"&gt;limit "work-in-progress" (WIP)&lt;/a&gt;, and are often called &lt;a href="http://en.wikipedia.org/wiki/Kanban"&gt;&lt;em&gt;kanban&lt;/em&gt;&lt;/a&gt;. The notion of continually adapting your process to improve it is a tenet of &lt;a href="http://controlchaos.com/"&gt;Scrum&lt;/a&gt;. &lt;a href="http://en.wikipedia.org/wiki/Test-driven_development"&gt;Test-driven development&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Pair_programming"&gt;pair programming&lt;/a&gt; are methods from &lt;a href="http://www.extremeprogramming.org/"&gt;Extreme Programming (XP)&lt;/a&gt; of building in quality up front. Failure demand is sometimes called out as a form of technical debt, and the list goes on and on.&lt;/p&gt;

&lt;p&gt;Hopefully what I've done here, though, without putting a name on any kind of methodology, is explain &lt;em&gt;why&lt;/em&gt; all these things are good ideas (or are good ideas to try). Ultimately, practices won't help unless they do one of three things:&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;drive out queuing delay (&lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller"&gt;Q&lt;/span&gt;&lt;/i&gt;);&lt;/li&gt;
&lt;li&gt;reduce value-adding response time (&lt;i&gt;R&lt;span style="vertical-align:sub;text-size:smaller"&gt;V&lt;/span&gt;&lt;/i&gt;); OR&lt;/li&gt;
&lt;li&gt;reduce failure demand (&lt;i&gt;N&lt;span style="vertical-align:sub;text-size:smaller"&gt;F&lt;/span&gt;/R&lt;/i&gt;)&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;In general, the easiest way to do these for an organization is:&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;reduce the number of things in-flight&lt;/li&gt;
&lt;li&gt;aggressively beat back failure demand by fixing root causes and building in quality up-front&lt;/li&gt;
&lt;li&gt;measure response (cycle) time and improve via process experimentation&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;Fortunately, all of those things are very, &lt;i&gt;very&lt;/i&gt; easy to measure. If you can mark a request as either value or failure demand, if you can count the number of things in-flight, and if you can measure the time between starting something and shipping it, that's all you need.&lt;/p&gt;
&lt;p&gt;&lt;b&gt;Update:&lt;/b&gt; See the next post on this topic for a &lt;a href="http://codeartisan.blogspot.com/2010/11/intuitions-about-software-development.html"&gt;more intuitive motivation&lt;/a&gt; of the theory presented in this article.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-7784093812384051574?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/ZrsaM1eU0Igg1iDJiMkeGriBlyU/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZrsaM1eU0Igg1iDJiMkeGriBlyU/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/ZrsaM1eU0Igg1iDJiMkeGriBlyU/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZrsaM1eU0Igg1iDJiMkeGriBlyU/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/7784093812384051574/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=7784093812384051574" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/7784093812384051574?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/7784093812384051574?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/Rzy5is3_VUo/how-to-go-faster.html" title="How to Go Faster" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/11/how-to-go-faster.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEIAR3w4eip7ImA9Wx5bFE0.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-843557200279799073</id><published>2010-10-29T21:11:00.002-04:00</published><updated>2010-10-29T21:35:46.232-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-10-29T21:35:46.232-04:00</app:edited><title>Tales of Test-Driven Development</title><content type="html">&lt;p&gt;Inspired by a &lt;a href="http://phillyemergingtech.com/sessions/clojure-s-approach-to-state-and-identity"&gt;talk&lt;/a&gt; about &lt;a href="http://clojure.org/"&gt;Clojure&lt;/a&gt; given by &lt;a href="http://twitter.com/#!/richhickey"&gt;Rich Hickey&lt;/a&gt; at &lt;a href="http://phillyemergingtech.com/"&gt;Philly Emerging Tech&lt;/a&gt; earlier this year, I've been toying with building a Java library of pure (immutable) data structures, starting with the Map implementation based on Phil Bagwell's&lt;a href="http://lamp.epfl.ch/papers/triesearches.pdf.gz"&gt; Hash Tries.&lt;/a&gt; Yes, I know I could probably just figure out how to use them straight out of Clojure by interoperating, but that would deprive me of an interesting coding exercise.&lt;/p&gt;
&lt;p&gt;At any rate, I hadn't really gotten around to doing this in any great detail yet, and as it turns out, I'm glad I didn't. I was fortunate to be able to attend a &lt;a href="http://www.refactoring.com/"&gt;refactoring&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Test-driven_development"&gt;test-driven development (TDD)&lt;/a&gt; class taught by &lt;a href="http://twitter.com/#!/unclebobmartin"&gt;Bob Martin&lt;/a&gt; this week, and one of the code examples we ran through was the "Bowling Game" of writing an algorithm to score ten frames of bowling. Prior to developing this with a TDD approach, we identified a pretty simple object-oriented design including things like Games, Frames, Rolls, TenthFrames, etc. Yet when we actually got down to it, it turned out we just needed a pretty simple algorithm built into the single Game class--ultimately a much simpler design.&lt;/p&gt;
&lt;p&gt;Rewinding to the pure hashmaps: I had previously been thinking about how to decompose the internal datastructure for the hash tries into classes like InternalNodes and LeafNodes that would implement a common TrieNode interface, and then mark all of those things as package private so I could hide all the messy implementation details from the client behind a PureHashMap &lt;a href="http://en.wikipedia.org/wiki/Facade_pattern"&gt;facade&lt;/a&gt;. Hoo boy.&lt;/p&gt;
&lt;p&gt;Instead, over the course of a very few hours today, I instead took the following approach: first, I used TDD to develop a brain-dead simple implementation of a PureHashMap using real HashMaps as the backing store, but cloning them on modification. Didn't even make an attempt at the Hash Trie implementation. Wouldn't work well &lt;em&gt;at all&lt;/em&gt; on large data sets, but it did allow me to &lt;b&gt;develop a set of unit tests that documented the required functional behavior&lt;/b&gt; of a PureHashMap, and it didn't really take long at all.&lt;/p&gt;
&lt;p&gt;Next, I did something crazy: I completely threw away the simple implementation of PureHashMap, breaking all the tests. Then I started hacking the hash trie implementation in there until I could get all the tests to pass one by one. Now, this was ugly, cut-n-pasted, massively high cyclomatic complexity code--I shudder to think of it. But it really didn't take too long to get that working either, with the tests as a guide. Finally, as any TDD afficionado would know, once I had all my tests working again, I was able to easily but mercilessly refactor until it was all cleaned up.&lt;/p&gt;
&lt;p&gt;The end result was something far better than I could have imagined: a relatively understandable PureHashMap hash trie implementation in a single class file with 100% unit test coverage, built in less than an afternoon. That's powerful stuff. Thanks, Uncle Bob.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-843557200279799073?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/ExSUBXsV8J_eWbYRKOA-t7QkQqY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ExSUBXsV8J_eWbYRKOA-t7QkQqY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/ExSUBXsV8J_eWbYRKOA-t7QkQqY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ExSUBXsV8J_eWbYRKOA-t7QkQqY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/843557200279799073/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=843557200279799073" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/843557200279799073?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/843557200279799073?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/Xz2mFRIva-g/tales-of-test-driven-development.html" title="Tales of Test-Driven Development" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/10/tales-of-test-driven-development.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkUBRXo_eCp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-4715356872960854277</id><published>2010-09-26T07:52:00.005-04:00</published><updated>2010-09-26T14:04:14.440-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:04:14.440-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="process improvement" /><category scheme="http://www.blogger.com/atom/ns#" term="agile" /><category scheme="http://www.blogger.com/atom/ns#" term="kanban" /><category scheme="http://www.blogger.com/atom/ns#" term="wip" /><category scheme="http://www.blogger.com/atom/ns#" term="scrum" /><title>The Power of Visualizing Iterative Waterfall</title><content type="html">&lt;p&gt;We're going through a process mapping exercise at work just to try to understand &lt;i&gt;how&lt;/i&gt; we get things done. Now, we are running what I would describe as "scrumfall"; doing &lt;a href="http://en.wikipedia.org/wiki/Scrum_(development)"&gt;Scrum&lt;/a&gt; for development but having that sit inside a traditional waterfall process. The waterfall is run iteratively and pipelined, although the degree of true pipeline isn't what most people think it is, due to developers having to support upstream and downstream activities like backlog grooming and addressing bugs in QA. I thought I would work through the exercise to try to define the value stream that our features actually experience.&lt;/p&gt;
&lt;table style="padding:10px; border: solid black 1px;"&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Recorded&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;user story appears on a backlog&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Defined&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;user story has acceptance criteria and estimate&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Prioritized&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;user story has been assigned a priority/rank&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Committed&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;user story has been pulled into a sprint&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Coded&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;user story has been marked 'complete' in the sprint&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Accepted&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;user story has been shown and accepted in a sprint review&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Released&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;user story has been included in a versioned release&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Tested&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;enclosing release has achieved an acceptable quality level&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Approved&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;enclosing release has been approved for launch (go/no-go)&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td style="background:cyan; border: solid black 1px;" align="center"&gt;Deployed&lt;/td&gt;
&lt;td style="padding-left:5px;"&gt;enclosing release has been deployed to production&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;

&lt;p&gt;Several of Scrum's standard meetings (plus some other common ones) show up here: backlog grooming moves stories from "recorded" to "defined", sprint planning moves stories from "prioritized" to "committed", daily scrum moves stories from "committed" to "coded", and the sprint review moves stories from "coded" to "accepted".&lt;/p&gt;

&lt;p&gt;Just having laid it out and thinking about it, some observations:
&lt;ol&gt;
&lt;li&gt;we ask our product owners and other stakeholders to sign off on a particular user story &lt;em&gt;twice&lt;/em&gt;, once at the sprint review, and once at the go/no-go meeting.&lt;/li&gt;
&lt;li&gt;user stories that may well be production-ready upon reaching "coded" get batched and bound to surrounding stories and thus become &lt;em&gt;deployment dependent&lt;/em&gt; on them afterwards, even though they may not be functionally dependent on them&lt;/li&gt;
&lt;li&gt;interestingly, even though good user stories are supposed to be independent of one another (the "I" in &lt;a href="http://codeartisan.blogspot.com/2008/02/investing-in-user-stories.html"&gt;INVEST&lt;/a&gt;), we nonetheless batch them together into sprints and treat them as a unit&lt;/li&gt;
&lt;li&gt;we don't have a good way to understand what happens to stories that don't get completed in a sprint, or bugs that are deemed non-launch blockers, or production incidents&lt;/li&gt;
&lt;/ol&gt;
&lt;/p&gt;

&lt;p&gt;Another thing as we think about batch size is whether a two week sprint iteration is actually tied to any relevant process capability metrics. For example, interesting metrics to consider here are some things that are per-batch (sprint or release); these are things that take roughly the same amount of time whether they contain one story or one hundred:
&lt;ul&gt;
&lt;li&gt;how long does it take us to produce a release?&lt;/li&gt;
&lt;li&gt;how long does it take us to deploy a release?&lt;/li&gt;
&lt;li&gt;how long does it take us to run a full regression test?&lt;/li&gt;
&lt;li&gt;what is the lead time for scheduling a meeting with all the necessary folks in it?&lt;/li&gt;
&lt;/ul&gt;
And then there are some activities that are dependent on the complexity of a particular story:
&lt;ul&gt;
&lt;li&gt;how long it takes to define acceptance criteria for the story&lt;/li&gt;
&lt;li&gt;how long it takes to code the story&lt;/li&gt;
&lt;li&gt;how long it takes to define and update test cases for the story&lt;/li&gt;
&lt;li&gt;how long it takes to discuss the story and determine if it was acceptably implemented&lt;/li&gt;
&lt;/ul&gt;
&lt;/p&gt;

&lt;p&gt;Batching makes sense if the organization's overall throughput bottleneck is on a batch-size-independent step, in which case, sizing the batch so that it runs in cadence with the cycle time of the bottleneck will maximize throughput. To make that more concrete, let's say we only have certain deployment windows available and can only do a deployment once a week; if this is the slowest part of our process, then we should take batches of work in a way so that upstream steps produce a deployable release once a week. Or, if the slowest part is running a full regression of manual tests over three days, then again, we should take batches that can be finished in three days. Perhaps the product owner is only available once a month to carry out sprint planning or sprint review; then we should batch at a month.&lt;/p&gt;

&lt;p&gt;It might seem weird that the calendar of your product manager might be the bottleneck in your software development process, or that it makes sense to roll a release of completed work every three days, but that's queuing theory for you. Optimizing an overall system's throughput means organizing the work according to the &lt;i&gt;current bottleneck's constraints&lt;/i&gt; (even if that means non-bottleneck parts might not be locally optimized) and/or moving the constraint elsewhere in the system (Theory of Constraints).&lt;/p&gt;

&lt;p&gt;Interestingly, putting the entire workflow up on a &lt;a href="http://leansoftwareengineering.com/"&gt;kanban board&lt;/a&gt; would make a lot of this very obvious, even if all we did was put up &lt;a href="http://limitedwipsociety.org/"&gt;WIP limits&lt;/a&gt; corresponding to obvious limitations (I can only deploy one release at a time, and I can only test as many releases as I have QA environments, etc.). The great thing about kanban-style development is that you don't have to change your process to start using it; you just model your current one, visualize it, and then watch what happens. You probably have all the information needed to track the metrics that matter, although you may have to start writing down times when various emails pass through your system (like whether the release went out, or whether the new build got deployed to QA, etc.).&lt;/p&gt;

&lt;p&gt;However, to me, the most powerful reason to start visualizing the flow is that it shows you exactly what parts of your process you should change, and when. There's nothing like being able to show a product manager that their availability is driving overall throughput to encourage spending more time with the team. Or being able to show a development manager that the amount of time being spent doing bugfixing rework in QA is the bottleneck--encouraging practices like &lt;a href="http://en.wikipedia.org/wiki/Test-driven_development"&gt;TDD&lt;/a&gt;. In other words, being able to make an &lt;span style="font-style:italic;"&gt;empirical&lt;/span&gt; case for the potential use of Agile practices that aren't currently in place, and then &lt;span style="font-style:italic;"&gt;being able to show that they worked&lt;/span&gt;. This is a good way to bring about an Agile evolution grounded in facts &lt;span style="font-style:italic;"&gt;relevant to the current organization&lt;/span&gt; and not just based on opinion or philosophy.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-4715356872960854277?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/T1SmuEHsh1hUpEg5Vw825tOsIOI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/T1SmuEHsh1hUpEg5Vw825tOsIOI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/T1SmuEHsh1hUpEg5Vw825tOsIOI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/T1SmuEHsh1hUpEg5Vw825tOsIOI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/4715356872960854277/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=4715356872960854277" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4715356872960854277?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4715356872960854277?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/1AoV9MjItVc/power-of-visualizing-iterative.html" title="The Power of Visualizing Iterative Waterfall" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/09/power-of-visualizing-iterative.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQDRn0yfyp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-4309079566512406021</id><published>2010-08-31T08:15:00.004-04:00</published><updated>2010-09-26T14:06:17.397-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:06:17.397-04:00</app:edited><title>Testable System Architecture</title><content type="html">&lt;p&gt;At work we were having a discussion about how we wanted to do SSL termination for a particular web service. We had narrowed the possibilities down to doing hardware SSL termination in our load balancer or doing software SSL termination in an &lt;a href="http://httpd.apache.org/"&gt;Apache&lt;/a&gt; layer sitting in front of our web apps.&lt;/p&gt;

&lt;p&gt;During the course of the conversation, we talked about factors like performance (would there be a noticeable effect on latency), capacity (were we already CPU bound on the servers that would run the Apaches), maintainability (is it easier to update configs on a single load balancer or to script config changes across a cluster with 40+ servers), cost (how much does the SSL card cost), and scalability (will we be able to expand the solution out to higher traffic levels easily).&lt;p&gt;

&lt;p&gt;I think this was a pretty typical example of taking a reasoned approach to system design and trying to cover all the potential points of view. However, it ended up that we left a big one off: &lt;span style="font-weight:bold;"&gt;testability&lt;/span&gt;.&lt;/p&gt;

&lt;p&gt;The business rules about which URLs need to be SSL terminated and which ones don't (or shouldn't) need to be encoded somewhere, and we'd already ruled out doing the SSL termination in the application itself for other reasons, so that means they'd be encoded in either a load balancer config or an Apache config. &lt;span style="font-style:italic;"&gt;Which one of these is easier to get under automated test on a developer workstation?&lt;/span&gt; For an agile shop where quality and time-to-market are of primary importance, this is a question we can't forget to ask when designing our system architecture.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-4309079566512406021?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/S9z24Hj4DdakwZ-e4tNdRvxKJfI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/S9z24Hj4DdakwZ-e4tNdRvxKJfI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/S9z24Hj4DdakwZ-e4tNdRvxKJfI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/S9z24Hj4DdakwZ-e4tNdRvxKJfI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/4309079566512406021/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=4309079566512406021" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4309079566512406021?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4309079566512406021?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/piyxyQyTDTo/testable-system-architecture.html" title="Testable System Architecture" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/08/testable-system-architecture.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE4NQ3k-fCp7ImA9Wx5bGUs.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-3983070538120094901</id><published>2010-08-13T22:14:00.007-04:00</published><updated>2010-11-05T09:16:32.754-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-05T09:16:32.754-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="http" /><category scheme="http://www.blogger.com/atom/ns#" term="caching" /><category scheme="http://www.blogger.com/atom/ns#" term="REST API" /><category scheme="http://www.blogger.com/atom/ns#" term="rest" /><category scheme="http://www.blogger.com/atom/ns#" term="restful web services" /><category scheme="http://www.blogger.com/atom/ns#" term="refactoring" /><title>RESTful Refactor: Combine Resources</title><content type="html">&lt;p&gt;I've been spending a lot of time thinking about &lt;a title="Wikipedia article about RESTful web services" href="http://en.wikipedia.org/wiki/Representational_State_Transfer#RESTful_web_services"&gt;RESTful web services&lt;/a&gt;, particularly &lt;a title="introductory article to hypermedia RESTful APIs" href="http://codeartisan.blogspot.com/2010/08/thoughts-on-hypermedia-apis.html"&gt;hypermedia APIs&lt;/a&gt;, and I've started to discover several design patterns as I've begun to play around with these in code. Today, I want to talk about the &lt;span style="font-style:italic;"&gt;granularity&lt;/span&gt; of resources, which is roughly "how much stuff shows up at a single resource". Generally speaking, RESTful architectures work better with coarser-grained resources, &lt;a title="when to use i.e. in a sentence" href="http://theoatmeal.com/comics/ie"&gt;i.e.&lt;/a&gt;, transferring more stuff in one response, and I'll walk through an example of that in this article.&lt;/p&gt;

&lt;p&gt;Now, in my &lt;a href="http://codeartisan.blogspot.com/2010/08/thoughts-on-hypermedia-apis.html"&gt;previous article&lt;/a&gt;, I suggested taking each domain object (or collection of domain objects) and making it a resource with an assigned URL. While  following this path (along with the other guidelines mentioned) does gets you to a &lt;a title="definition of the REST architectural style" href="http://www.ics.uci.edu/~fielding/pubs/dissertation/rest_arch_style.htm"&gt;RESTful architecture&lt;/a&gt;, it may not always be an optimal one, and you may want to &lt;a title="Wikipedia article on refactoring" href="http://en.wikipedia.org/wiki/Code_refactoring"&gt;&lt;em&gt;refactor&lt;/em&gt;&lt;/a&gt; your API to improve it.&lt;/p&gt;

&lt;p&gt;Let's take, for example, the canonical and oversimplified "list of favorite things" web service. There are potentially two resource types:
&lt;ul&gt;
&lt;li&gt;a favorite thing (/favorites/{id})&lt;/li&gt;
&lt;li&gt;a list of favorite things (/favorites)&lt;/li&gt;
&lt;/ul&gt;
All well and good, and I can model all sorts of actions here:
&lt;dl&gt;
&lt;dt&gt;adding a new favorite&lt;/dt&gt;
&lt;dd&gt;POST to /favorites&lt;/dd&gt;
&lt;dt&gt;removing a favorite&lt;/dt&gt;
&lt;dd&gt;DELETE to the specific /favorites/{id}&lt;/dd&gt;
&lt;dt&gt;editing a favorite&lt;/dt&gt;
&lt;dd&gt;PUT to the specific /favorites/{id}&lt;/dd&gt;
&lt;dt&gt;getting the full list&lt;/dt&gt;
&lt;dd&gt;GET to /favorites&lt;/dd&gt;
&lt;/dl&gt;
Fully RESTful, great. However, let's think about cache semantics, particularly the cache semantics we should assign to the GET to /favorites. This is probably the most common request we'd have to serve, and in fact it ought to be quite cacheable, as in practice (as with a lot of user-maintained preferences or data) there are going to be lots of read accesses between writes. &lt;/p&gt;

&lt;p&gt;There's a problem here, though: some of the actions that would cause an update to the list don't operate on the list's URL (namely, editing a single entry or deleting an entry). This means an intermediary HTTP cache won't &lt;a title="rules for cache invalidation on writes in HTTP/1.1" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10"&gt;invalidate&lt;/a&gt; the cache entry for the list when those updates happen. If we want a subsequent fetch of the list by a user to reflect an immediate update, we either have to put '&lt;a title="definition of the max-age Cache-Control directive" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.3"&gt;Cache-Control: max-age=0&lt;/a&gt;' on the list and require &lt;a title="definition of cache entry validation in HTTP/1.1" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3"&gt;validation&lt;/a&gt; on each access, or we need the client to remember to send '&lt;a title="definition of client-requested reload in HTTP/1.1" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.4"&gt;Cache-Control: no-cache&lt;/a&gt;' when fetching a list after an update.&lt;/p&gt;

&lt;p&gt;Putting 'Cache-Control: max-age=0' on the list resource really seems a shame; most RESTful APIs are set up to cross &lt;a title="Wikipedia article on wide-area networks (WANs)" href="http://en.wikipedia.org/wiki/Wide_area_network"&gt;WAN&lt;/a&gt; links, and so you may be paying most of the latency of a full fetch that returned a &lt;a title="definition of the HTTP/1.1 200 response code" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.2.1"&gt;200 OK&lt;/a&gt; even if you are getting a &lt;a title="definition of the HTTP/1.1 304 response code" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5"&gt;304 Not Modified&lt;/a&gt; response, especially if you have fine-grained resources that don't have a lot of data (and a textual list of 10 or so favorite items isn't a lot of data!).&lt;/p&gt;

&lt;p&gt;Requiring the client to send 'Cache-Control: no-cache' is also problematic: the cache semantics of the resources are really supposed to be the server's concern, yet we are relying on the client to understand something extra about the relationship between various resources and their caching semantics. This is a road that leads to tight coupling between client and server, thus throwing away one of the really useful properties of a REST architecture: allowing the server and client to evolve largely independently.&lt;/p&gt;

&lt;p&gt;Instead, let me offer the following rule of thumb: &lt;b&gt;if a change to one resource should cause a cache invalidation of another resource, maybe they shouldn't be separate resources&lt;/b&gt;. I'll call this a "RESTful refactoring": &lt;em&gt;Combining Resources&lt;/em&gt;.&lt;/p&gt;

&lt;p&gt;In our case, I would suggest that we only need &lt;em&gt;one&lt;/em&gt; resource:
&lt;ul&gt;
&lt;li&gt;the list of favorites&lt;/li&gt;
&lt;/ul&gt;
We can still model all of our actions:
&lt;dl&gt;
&lt;dt&gt;adding a new favorite&lt;/dt&gt;
&lt;dd&gt;PUT to /favorites a list containing the new item&lt;/dd&gt;
&lt;dt&gt;removing a favorite&lt;/dt&gt;
&lt;dd&gt;PUT to /favorites a new list with the offending item removed&lt;/dd&gt;
&lt;dt&gt;editing a favorite&lt;/dt&gt;
&lt;dd&gt;PUT to /favorites a list containing an updated item&lt;/dd&gt;
&lt;dt&gt;getting the full list&lt;/dt&gt;
&lt;dd&gt;GET to /favorites&lt;/dd&gt;
&lt;/dl&gt;
But now, I can put a much longer cache timeout on the /favorites resource, because if a client does something to change its state, it will do a PUT to /favorites, invalidating its own cache (assuming the client has its own non-shared/private cache). If the resource represents a user-specific list, then I can probably set the cache timeout considering:
&lt;ul&gt;
&lt;li&gt;how long am I willing to wait for another user to see the results of this user's updates?
&lt;li&gt;if the same user accesses the resource from a different computer, how long am I willing to allow those two views to stay out of sync? (bearing in mind that the user can usually, and pretty intuitively, hit refresh on a browser page that looks out of date)?
&lt;/ul&gt;
Probably these values are a lot larger than the zero seconds we were using via 'Cache-Control: max-age=0'. When you can figure out how to assign longer expiration times to your responses, you can get a much bigger win for performance and scale. While revalidating a cached response is probably faster than fetching the resource anew, not having to send a request at all to the origin is &lt;em&gt;waaaaaaay&lt;/em&gt; better.&lt;/p&gt;

&lt;p&gt;The extreme case, here, of course, would be a web service where a user could just get all their "stuff" in one big blob with one request (as we modelled above). There are many domains where this is quite possible, and when you factor in gzip encoding, you can start to contemplate pushing around quite verbose documents, which can be a big win assuming your server can render the response reasonably quickly enough.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-3983070538120094901?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/w5oMa7TkBx9dEkt8LkDtI_wzHNI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/w5oMa7TkBx9dEkt8LkDtI_wzHNI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/w5oMa7TkBx9dEkt8LkDtI_wzHNI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/w5oMa7TkBx9dEkt8LkDtI_wzHNI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/3983070538120094901/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=3983070538120094901" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/3983070538120094901?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/3983070538120094901?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/1326foeMDm0/restful-refactor-combine-resources.html" title="RESTful Refactor: Combine Resources" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/08/restful-refactor-combine-resources.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkAMRH86fip7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-8609677559489166151</id><published>2010-08-11T21:34:00.010-04:00</published><updated>2010-09-26T14:13:05.116-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:13:05.116-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="xhtml" /><category scheme="http://www.blogger.com/atom/ns#" term="REST API" /><category scheme="http://www.blogger.com/atom/ns#" term="rest" /><category scheme="http://www.blogger.com/atom/ns#" term="hypermedia" /><category scheme="http://www.blogger.com/atom/ns#" term="architecture" /><category scheme="http://www.blogger.com/atom/ns#" term="restful web services" /><title>Thoughts on Hypermedia APIs</title><content type="html">&lt;p&gt;The &lt;a title="high level description of REST" href="http://en.wikipedia.org/wiki/Representational_State_Transfer"&gt;REST&lt;/a&gt; architectural style is defined in &lt;a title="Roy Fielding's blog" href="http://roy.gbiv.com/untangled/"&gt;Roy Fielding's&lt;/a&gt; &lt;a title="Roy Fielding's Ph.D. thesis" href="http://www.ics.uci.edu/~fielding/pubs/dissertation/top.htm"&gt;thesis&lt;/a&gt;, primarily &lt;a title="chapter 5 of Roy Fielding's thesis, where the Representational State Transfer (REST) architectural style is defined" href="http://www.ics.uci.edu/~fielding/pubs/dissertation/rest_arch_style.htm#sec_5_3"&gt;chapter 5&lt;/a&gt;, where the style is described as a set of architectural constraints. A quick summary of these constraints is:
&lt;dl&gt;
&lt;dt&gt;client-server&lt;/dt&gt;
&lt;dd&gt;The system is divided into client and server portions.&lt;/dd&gt;
&lt;dt&gt;stateless&lt;/dt&gt;
&lt;dd&gt;Each request from client to server must contain all of the information necessary to understand the request.
&lt;dt&gt;cache&lt;/dt&gt;
&lt;dd&gt;Response data is implicitly or explicitly marked as cacheable or non-cacheable.&lt;/dd&gt;
&lt;dt&gt;uniform interface&lt;/dt&gt;
&lt;dd&gt;All interactions through the system happen via a standard, common interface. This is achieved by adhering to four sub-constraints:
  &lt;dl&gt;
  &lt;dt&gt;identification of resources&lt;/dt&gt;
  &lt;dd&gt;Domain objects are assigned resource identifiers (e.g. &lt;a title="Wikipedia article on Uniform Resource Identifiers (URIs)" href="http://en.wikipedia.org/wiki/Uniform_Resource_Identifier"&gt;URIs&lt;/a&gt;)&lt;/dd&gt;
  &lt;dt&gt;manipulation via representations&lt;/dt&gt;
  &lt;dd&gt;Actions occur by exchanging representations of current or intended resource state.&lt;/dd&gt;
  &lt;dt&gt;self-descriptive messages&lt;/dt&gt;
  &lt;dd&gt;Messages include control data (e.g. cache-related), resource metadata (e.g. alternates), and representation metadata (e.g. media type) in addition to a representation itself.&lt;/dd&gt;
  &lt;dt&gt;hypermedia as the engine of application state&lt;/dt&gt;
  &lt;dd&gt;Clients move from one state to the next by selecting and following state transitions described in the current set of representations.&lt;/dd&gt;
  &lt;/dl&gt;
&lt;/dd&gt;
&lt;dt&gt;layered system&lt;/dt&gt;
&lt;dd&gt;Components can only "see" the component with which they are directly interacting.&lt;/dd&gt;
&lt;dt&gt;code-on-demand (optional)&lt;/dt&gt;
&lt;dd&gt;Clients can by dynamically extended by downloading and running code.&lt;/dd&gt;
&lt;/dl&gt;
&lt;/p&gt;

&lt;h2&gt;Achieving a RESTful architecture with XHTML&lt;/h2&gt;
&lt;p&gt;&lt;a title="Mike Amundsen's blog" href="http://www.amundsen.com/blog/"&gt;Mike Amundsen&lt;/a&gt; &lt;a title="blog article describing the use of XHTML as a document format for RESTful web services" href="http://www.amundsen.com/blog/archives/1043"&gt;proposed&lt;/a&gt; using &lt;a title="W3C XHTML 1.1 specification" href="http://www.w3.org/TR/xhtml11/"&gt;XHTML&lt;/a&gt; as a media-type of choice for web APIs rather than the ubiquitous &lt;a title="IETF RFC 4287 describing the Atom publising protocol" href="http://www.ietf.org/rfc/rfc4287.txt"&gt;Atom&lt;/a&gt; (or other application-specific &lt;a title="Wikipedia article about Extensible Markup Language (XML)" href="http://en.wikipedia.org/wiki/XML"&gt;XML&lt;/a&gt;) or &lt;a title="more information about Javascript Object Notation (JSON)" href="http://www.json.org/"&gt;JSON&lt;/a&gt; representations commonly seen. By using &lt;a title="one method for describing an XHTML profile" href="http://gmpg.org/xmdp/description"&gt;XHTML profiles&lt;/a&gt;, we are able to define the semantics of the data contained within a particular document, as well as the semantics of contained link relations and form types.&lt;/p&gt;

&lt;p&gt;Now, let's throw a few simple rules into the system:
&lt;ol&gt;
&lt;li&gt;all domain objects (including collections of domain objects) are resources and get assigned a URL&lt;/li&gt;
&lt;li&gt;beyond an HTTP GET to the API's "home page", a client simply follows standard XHMTL semantics from returned documents; namely, doing a GET to follow a link, and constructing a GET or POST request by filling out and submitting a form.&lt;/li&gt;
&lt;li&gt;retrieval (read) of resource state should be accomplished by GET, and modification of resource state should happen with POST (via a form).&lt;/li&gt;
&lt;/ol&gt;
&lt;/p&gt;
&lt;p&gt;Interestingly, this means that in addition to programmatic clients being able to parse XHTML (as a subset of XML) and apply standard XHTML semantics for interactions, it is possible for a human to use a &lt;em&gt;browser&lt;/em&gt; to interact with the resources (or, as my colleague &lt;a title="Karl Martino's blog" href="http://www.paradox1x.org/"&gt;Karl Martino&lt;/a&gt; put it, &amp;quot;you can &lt;em&gt;surf&lt;/em&gt; an API!&amp;quot;).&lt;/p&gt;

&lt;h2&gt;Evaluation&lt;/h2&gt;
&lt;p&gt;So how well does this match up against the REST constraints? By leveraging &lt;a title="HTTP/1.1 specification" href="http://www.w3.org/Protocols/rfc2616/rfc2616.html"&gt;HTTP&lt;/a&gt; directly as an application protocol, we can get a lot of constraints for free, namely: client-server, statelessness, caching, layered system, and self-descriptive messages.&lt;/p&gt;

&lt;p&gt;Now, we also get a uniform interface, because all of our domain objects are modelled as resources with identifiers, reads are accomplished by retrieving XHTML documents as representations, and writes are accomplished by sending form-encoded inputs as representations. Finally, because a client accomplishes its goals by "clicking links and submitting forms", the hypermedia features of XHTML let us model the available state transitions to the client, who can then select what to do next and know how to follow one of the available transitions. Also, because an update to a resource is modelled as a PUT to the same URL we would use to GET its state, this plays nicely and naturally with standard HTTP/1.1 cache semantics (&lt;a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10"&gt;invalidation on write-through&lt;/a&gt;).&lt;/p&gt;

&lt;p&gt;Finally, we're not using code-on-demand, in our case, although we &lt;em&gt;could&lt;/em&gt; include Javascript with our XHTML representations to provide additional functionality for that human "surfing" our API, even if a programmatic client would ignore the Javascript. However, code-on-demand is listed as an optional constraint anyway.&lt;/p&gt;

&lt;h2&gt;Coming soon...&lt;/h2&gt;
&lt;p&gt;This is an intentionally high-level post that I'm intending will be the first in a series of posts that go over specific examples and examine some practical considerations and implementation patterns that are useful. Hopefully, we'll also be able to illustrate some of the architectural strengths and weaknesses that the REST architectural style is purported to have. Stay tuned!&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-8609677559489166151?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/jVKsTLixajolpPMGbS0KdDfJdKU/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/jVKsTLixajolpPMGbS0KdDfJdKU/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/jVKsTLixajolpPMGbS0KdDfJdKU/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/jVKsTLixajolpPMGbS0KdDfJdKU/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/8609677559489166151/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=8609677559489166151" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8609677559489166151?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8609677559489166151?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/bEKvUUs3UvE/thoughts-on-hypermedia-apis.html" title="Thoughts on Hypermedia APIs" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/08/thoughts-on-hypermedia-apis.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Dk4BQ3o-fSp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-1494789287268298568</id><published>2010-03-26T20:12:00.004-04:00</published><updated>2010-09-26T14:15:52.455-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:15:52.455-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="lab week" /><category scheme="http://www.blogger.com/atom/ns#" term="innovation" /><title>Why Lab Week is So...Awesome</title><content type="html">&lt;p&gt;At work, once a quarter, we have "lab week": folks are allowed to form groups and work on self-directed projects. We usually finish up at the end of the week with a "science fair" where folks set up posters and demos of what they've worked on for the week (we even had cookies and lemonade at today's science fair!). I am always amazed at the amount of innovation and progress that comes out of these weeks; in some ways it outshines what our organization manages to do over the rest of the quarter. In this post I'd like to reflect a bit on what makes lab week so awesome.&lt;/p&gt;

&lt;h2&gt;Merit-based Project Ideas&lt;/h2&gt;

&lt;p&gt;Where we work, you have to actually &lt;em&gt;recruit&lt;/em&gt; for lab week. This means you typically have to pitch your idea if you have one to enough people to get them to join your team. We actually set up special lunchtime meetings just for "Pitch Day". Ultimately this means that the projects that get worked on are the most innovative--because those are the most exciting ones and the easiest to recruit for. This is wisdom of the crowds at its finest; rather than having a select and small group of management identify a roadmap, it's a free-for-all where anyone can submit an idea and the best ones attract teams and get worked on. [Ed: this is not to say that management isn't needed to guide the execution and selection of ideas, just that they need not be the only source for idea generation]&lt;/p&gt;

&lt;h2&gt;Self-Selecting Teams&lt;/h2&gt;

&lt;p&gt;Again, due to the recruiting nature of lab week, groups are self-forming. As I think about it, it's amazing how well the group dynamics and team composition work out. I just re-read the section of Malcolm Gladwell's &lt;a href="http://www.amazon.com/Tipping-Point-Little-Things-Difference/dp/0316346624"&gt;&lt;em&gt;Tipping Point&lt;/em&gt;&lt;/a&gt; where he talks about &lt;a href="http://en.wikipedia.org/wiki/Dunbar%27s_number"&gt;Dunbar's number&lt;/a&gt; of 150: in a group smaller than that, you know your relationship to everyone as well as everyone's relationship to each other. In practice, this means when you are recruiting for lab week, you are consciously and unconsciously choosing folks that will bring the needed skills and experience to your project in a way that's compatible with the rest of the group.&lt;/p&gt;

&lt;p&gt;When I look at it this way, it's not surprising that lab week teams gel very quickly and immediately start working together well. When you have responsibility for deciding who you work with, you end up &lt;em&gt;wanting&lt;/em&gt; to work with your team. The group dynamics just sort themselves out effortlessly. Apparently &lt;a href="http://www.gore.com/en_xx/"&gt;Gore&lt;/a&gt; (makers of Gore-Tex) organize their high-tech development in this fashion.&lt;/p&gt;

&lt;h2&gt;Ownership and Buy-in&lt;/h2&gt;

&lt;p&gt;With a self-selected team and a self-selected project, folks on a lab week team are implicitly engaged in what they do, because it is &lt;em&gt;their work&lt;/em&gt;. They own it, front to back, and pour their effort into it. You can see the pride in the demos, in the cute homemade posters (it &lt;em&gt;does&lt;/em&gt; bear a striking resemblence to a stereotypical grade school science fair!).&lt;/p&gt;

&lt;p&gt;On the well-known &lt;a href="http://www.gallup.com/consulting/52/employee-engagement.aspx"&gt;Gallup Q12&lt;/a&gt; survey for how engaged your employees are, lab week covers: knowing what's expected of you (set by you!), having the needed raw supplies (often because the work is chosen with an eye to the possible), having an opportunity to do what you do best (self-selected projects), having your opinions count (recruiting, self-organizing teams), having dedicated teammates (self-selecting teams with ownership), having opportunities to learn (the whole point of lab week). That's six of the twelve right there. No wonder people willingly put in overtime on their lab week projects.&lt;/p&gt;

&lt;h2&gt;Timeboxed Exploration&lt;/h2&gt;

&lt;p&gt;Lab week lasts exactly a week. You don't have time to fully productionize what you do, and you have to focus. Ultimately this forces you to separate out all the chaff and focus on the real core of your idea and just deliver &lt;em&gt;that&lt;/em&gt;, because that's all the time you have. This is the &lt;a href="http://www.extremeprogramming.org/"&gt;XP&lt;/a&gt; notion of &lt;a href="http://www.extremeprogramming.org/rules/spike.html"&gt;Design Spikes&lt;/a&gt;, but in reality it's the &lt;a href="http://en.wikipedia.org/wiki/Pareto_principle"&gt;Pareto Principle&lt;/a&gt; in full effect: do the 20% of the work that has the 80% impact.&lt;/p&gt;

&lt;p&gt;Looked at it in another way, because the work effort is limited to a week, it is a way for the company to do rapid exploration with minimal risk or expense. I'd wager the company gets way more value in terms of idea creation during lab week than they miss out on from not doing "normal" work. It's clear the executives agree, because after each lab week, they agree to have the next one!&lt;/p&gt;

&lt;h2&gt;Thinking Outside the Box&lt;/h2&gt;

&lt;p&gt;Anything goes during lab week; this is a chance for folks to play with new technologies (it seems there is never a shortage of new technologies or of engineers who want to tinker with them) or practices. Our group used &lt;a href="http://en.wikipedia.org/wiki/Class-Responsibility-Collaboration_card"&gt;CRC Design Cards&lt;/a&gt; and did full-on &lt;a href="http://en.wikipedia.org/wiki/Test-driven_development"&gt;TDD&lt;/a&gt; for our project, a first for many of us, and while I think we built a pretty cool project, the main benefit (echoed by group members) was what we learned about development this week. We ran a &lt;a href="http://cobertura.sourceforge.net/"&gt;Cobertura&lt;/a&gt; report at the end of lab week and discovered we had 91% branch coverage in our code and an average &lt;a href="http://en.wikipedia.org/wiki/Cyclomatic_complexity"&gt;cyclomatic complexity&lt;/a&gt; of 3 (without measuring during the week or even shooting for particular measures here). On a lab week project, no less. Wild.&lt;/p&gt;

&lt;p&gt;In many cases, I think the science fair shouldn't be "What we built during lab week" so much as a presentation of "What we learned during lab week", which I suspect is actually the majority of the value offered to the company and the participants.&lt;/p&gt;

&lt;h2&gt;Permission to Experiment&lt;/h2&gt;

&lt;p&gt;Finally, it is clear that lab week is a no-pressure situation. There are no contracts to fulfill, no launch deadlines to meet (other than the science fair, I guess!), and you don't have to get approval from any more people than it takes to form a team. Almost anything goes (I have heard that underwater basketweaving is off limits, but not much else). If it doesn't work out, no problem, you go back to your "day job" next week, until the next lab week rolls around. There is nothing like this kind of no-risk environment (even the name "lab week" is suggestive) to foster creativity.&lt;/p&gt;

&lt;h2&gt;In Summary&lt;/h2&gt;

&lt;p&gt;Someone remarked at how much positive energy permeated the science fair. It was &lt;em&gt;fun&lt;/em&gt;, and there were a &lt;em&gt;lot&lt;/em&gt; of really cool ideas. People get &lt;em&gt;into&lt;/em&gt; lab week where I work, and it makes the experience...awesome.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-1494789287268298568?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/kLykaJJPTUcz_CLWaJ9x123aV4c/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/kLykaJJPTUcz_CLWaJ9x123aV4c/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/kLykaJJPTUcz_CLWaJ9x123aV4c/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/kLykaJJPTUcz_CLWaJ9x123aV4c/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/1494789287268298568/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=1494789287268298568" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1494789287268298568?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1494789287268298568?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/PWfQwaJkJTA/why-lab-week-is-soawesome.html" title="Why Lab Week is So...Awesome" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/03/why-lab-week-is-soawesome.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0cGSHw6eyp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-748826185554684109</id><published>2010-03-19T22:42:00.005-04:00</published><updated>2010-09-26T14:17:09.213-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:17:09.213-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="queuing theory" /><category scheme="http://www.blogger.com/atom/ns#" term="kanban" /><category scheme="http://www.blogger.com/atom/ns#" term="wip" /><title>Agile Architecture Kanban</title><content type="html">&lt;p&gt;We've recently spun up a new software architecture group at work, and at least some of what the architects are expected to do is provide "consulting" services: providing feedback on technical designs and approaches, doing technical research, providing technical opinions to product managers, etc. Since many of these are similarly sized, and "cycle time" for getting a response to our clients is an important metric, we opted to manage this work using a &lt;a title="article describing how to use kanban boards to visualize agile development processes" href="http://www.infoq.com/articles/agile-kanban-boards"&gt;kanban&lt;/a&gt; system.&lt;/p&gt;

&lt;p&gt;After a month-long iteration, we stopped to take a look at some of the data we had collected. We were able to produce a &lt;a title="wikipedia article about statistical process control and the control charts used therein" href="http://en.wikipedia.org/wiki/Control_chart"&gt;statistical process control chart&lt;/a&gt;, indicating our cycle time in business days (measuring the time between when a customer asked for something to be added to our consulting backlog and the time when we finished it), something like this one:&lt;/p&gt;

&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://farm5.static.flickr.com/4069/4446340725_ddc660495e_o.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 397px; height: 295px;" src="http://farm5.static.flickr.com/4069/4446340725_ddc660495e_o.png" border="0" alt="" /&gt;&lt;/a&gt;

&lt;p&gt;This shows our average cycle time was around 6 days, and that our process was under statistical control; all samples were less than the upper control limit (red line) at 11 days (3 standard deviations above the average). This means that we had a relatively predictable process. Now, at the same time, we were able to produce a &lt;a title="article describing how to use cumulative flow diagrams to infer characteristics of a development process" href="http://edn.embarcadero.com/article/32410"&gt;cumulative flow diagram&lt;/a&gt;, like this one:&lt;/p&gt;

&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_bgilpjYwX_0/S6RDb8AbZBI/AAAAAAAAATg/tpcyjdBzHxM/s1600-h/cumulative-flow-diagram.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 320px; height: 221px;" src="http://1.bp.blogspot.com/_bgilpjYwX_0/S6RDb8AbZBI/AAAAAAAAATg/tpcyjdBzHxM/s320/cumulative-flow-diagram.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5450555596268594194" /&gt;&lt;/a&gt;

&lt;p&gt;which showed the number of consulting "stories" in each state of the workflow. One of the things we were able to derive is the average arrival rate for the stories, by finding the slope of the line between the starting and ending points on the "ready" line. We were also able to find our average throughput by finding the similar slope between the starting and ending points of the "done" line. What we found (and which you can see on the graph), was that the request rate was higher than our throughput (by about 0.2 stories per day), which resulted in a slowly but persistently growing backlog. Now, we happened to measure our average cycle time about halfway through the month, and found that it was 4.5 instead of 6 back then. In the ten business days between measurements, our average cycle time went up by around the amount our backlog length grew, as predicted by the difference between our customers' request rate and our service rate.&lt;/p&gt;

&lt;p&gt;&lt;em&gt;It would appear even architects are subject to queuing theory.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Going forward, in order to remain responsive to our clients (many of our engineering teams run two week sprints, so we wanted to shoot for an average cycle time of 3 days), we realized we were going to have to limit the size of our backlog. In other words, we were going to have to essentially issue a &lt;a title="description of the HTTP/1.1 503 response code" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.4"&gt;503 (Temporarily Unavailable)&lt;/a&gt; response to some of our clients and simply not take their request onto our backlog and ask them to come back later, so as to remain responsive to our other customers. Just like we'd do in a web application server that was overloaded. Perhaps we'll even develop a cute picture of a flying acquatic mammal to try to soften the "not yets" we'll have to start handing out.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-748826185554684109?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/qiyim1AcD_ndQT1CFc6QhXGtDUY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/qiyim1AcD_ndQT1CFc6QhXGtDUY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/qiyim1AcD_ndQT1CFc6QhXGtDUY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/qiyim1AcD_ndQT1CFc6QhXGtDUY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/748826185554684109/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=748826185554684109" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/748826185554684109?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/748826185554684109?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/dRrLuIEL31s/agile-architecture-kanban.html" title="Agile Architecture Kanban" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_bgilpjYwX_0/S6RDb8AbZBI/AAAAAAAAATg/tpcyjdBzHxM/s72-c/cumulative-flow-diagram.png" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2010/03/agile-architecture-kanban.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0MGSH85eCp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-4696919759495493117</id><published>2009-10-31T21:06:00.006-04:00</published><updated>2010-09-26T14:23:49.120-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:23:49.120-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="riak" /><category scheme="http://www.blogger.com/atom/ns#" term="nosqleast" /><category scheme="http://www.blogger.com/atom/ns#" term="neo4j" /><category scheme="http://www.blogger.com/atom/ns#" term="key-value stores" /><category scheme="http://www.blogger.com/atom/ns#" term="voldemort" /><category scheme="http://www.blogger.com/atom/ns#" term="cassandra" /><category scheme="http://www.blogger.com/atom/ns#" term="pig" /><category scheme="http://www.blogger.com/atom/ns#" term="nosql" /><category scheme="http://www.blogger.com/atom/ns#" term="hbase" /><category scheme="http://www.blogger.com/atom/ns#" term="redis" /><category scheme="http://www.blogger.com/atom/ns#" term="cascading" /><category scheme="http://www.blogger.com/atom/ns#" term="hadoop" /><title>NoSQL East 2009 Redux</title><content type="html">&lt;p&gt;I just attended &lt;a href="https://nosqleast.com/2009/"&gt;NoSQL East&lt;/a&gt; down it Atlanta over the last two days. This was a fantastic conference, well-organized, with not only great content and speakers, but also a very well-educated audience. There was a telling moment in the first non-keynote talk where the speaker asked the audience "How many people have read the &lt;a href="http://s3.amazonaws.com/AllThingsDistributed/sosp/amazon-dynamo-sosp2007.pdf"&gt;Dynamo paper&lt;/a&gt;?" and easily 95% of the audience put their hands up.&lt;/p&gt;

&lt;p&gt;I'd divide the major focus areas into the following groups:
&lt;ul&gt;
&lt;li&gt;key-value stores (&lt;a href="http://riak.basho.com/"&gt;Riak&lt;/a&gt;, &lt;a href="http://project-voldemort.com/"&gt;Voldemort&lt;/a&gt;, &lt;a href="http://incubator.apache.org/cassandra/"&gt;Cassandra&lt;/a&gt;)&lt;/li&gt;
&lt;li&gt;massively parallel data mining (&lt;a href="http://hadoop.apache.org/pig/"&gt;Pig&lt;/a&gt;, &lt;a href="http://www.cascading.org/"&gt;Cascading&lt;/a&gt;)&lt;/li&gt;
&lt;li&gt;column-oriented datastores (Cassandra, &lt;a href="http://hadoop.apache.org/hbase/"&gt;HBase&lt;/a&gt;)&lt;/li&gt;
&lt;li&gt;document collection databases (&lt;a href="http://couchdb.apache.org/"&gt;CouchDB&lt;/a&gt;, &lt;a href="http://www.mongodb.org/display/DOCS/Home"&gt;MongoDB&lt;/a&gt;)&lt;/li&gt;
&lt;li&gt;stuff I couldn't make up my mind about (&lt;a href="http://neo4j.org/"&gt;Neo4J&lt;/a&gt;, &lt;a href="http://code.google.com/p/redis/"&gt;Redis&lt;/a&gt;, &lt;a href="http://www.slideshare.net/timanglade/tin"&gt;Tin&lt;/a&gt;)&lt;/li&gt;
&lt;/ul&gt;
&lt;/p&gt;

&lt;p&gt;As one of the speakers put it, which one you need depends on the shape of your data; these are all isomorphic in one way or another to each other, just as Turing complete languages are all essentially complete. But you still want to choose the right tool for the job. One nice touch was that a number of the systems were introduced not by one of the project committers, but rather by people who were using them to get stuff done. It was a very practical focus, much appreciated.&lt;/p&gt;

&lt;p&gt;There were a couple of interesting tidbits from the conference, including a rollicking, confusing talk by &lt;a href="http://en.wikipedia.org/wiki/John_Day_%28computer_scientist%29"&gt;John "My first network address was '12'" Day&lt;/a&gt; about network architecture and how TCP/IP had really gotten it all wrong. This guy was about 3 planes of existence above what I could follow at 9:15am--I had a distinct feeling he knew exactly what he was talking about, and was probably right, but there was no way he was making sense even to a very technical audience. Then there was the poor guy from Microsoft Research who had done some interesting distributed IDE work...for the .NET framework. In a sea of Mac and Linux laptops across the audience, he had a bunch of people who really couldn't make practical use of what he'd done.&lt;/p&gt;

&lt;h2&gt;Key-Value Stores&lt;/h2&gt;

&lt;p&gt;I've got to give a pretty big nod to Riak here for a straight-up key-value store; &lt;a href="http://twitter.com/justinsheehy"&gt;Justin Sheehy&lt;/a&gt; basically gave a talk about decentralized, scalable, lights-out system design that might well have been a good keynote, and it was clear that Riak was designed with high priority on those aspects. Major advantages over Voldemort in my mind are: ability to add/subtract nodes from running clusters, tunable (per-bucket, per-request!) quorum parameters (N,R,W), plus support for pluggable consistent hashing modules, partition distribution modules, and conflict resolution modules. For most of my use cases, with N=3, I'd probably do R=1,W=3 to optimize for read latency, as most of our writes would either be a user clicking "save" or would be the result of an asynchronous background process. On the other hand, if I wanted to build something like &lt;a href="http://aws.amazon.com/sqs/"&gt;SQS&lt;/a&gt; on top of this (which I conjecture is possible), I'd probably do W=1,R=3 to optimize for the "post a message" latency.&lt;/p&gt;

&lt;p&gt;While I was there, I had a great talk with them where we went over how there were going to expose the conflict resolution out up to the client via their HTTP/JSON interface. There's no out-of-the-box support for multiple datacenter awareness, although it seemed possible to add it via the pluggable partition distribution module, although their inter-node communication leverages &lt;a href="http://www.erlang.org/"&gt;Erlang&lt;/a&gt;'s native networking, meaning some kind of VPN/IPSec tunnel would be the main way to make this work across sites. I'm pretty sure no matter what you would want to use in this space you'll probably have to end up using a VPN over WAN to give the appearance of a seamless LAN cluster (although the Cassandra guys piped up in the conference IRC channel that Cassandra has multi-site support already).&lt;/p&gt;

&lt;p&gt;I can't tell whether being written in Erlang is a plus or a minus. On the one hand, it won't plug in well to our JMX-based monitoring frameworks, and I'm pretty sure very few folks within our organization have ever built or deployed an Erlang system. On the other hand, Erlang is the perfect choice for highly concurrent, fault-tolerant programming, and would probably be right up the alley of several of our programmers (I'm proud to say that while we are primarily a Java shop, we have a lot of other language proficiencies in-house spanning Ruby, Python, Clojure, and Scheme. Based on my brief escapade with it last night, I'm confident that (1) we have a number of folks who could pick it up easily and (2) who would jump at the chance). This is probably a wash.&lt;/p&gt;

&lt;p&gt;Very interesting in my mind is that some of the key-value stores have brought over some features from other design spaces; Riak allows some basic link structure in their values, including a version of targeted &lt;a href="http://en.wikipedia.org/wiki/MapReduce"&gt;MapReduce&lt;/a&gt; that can follow links, which starts to make it feel like a graph-oriented database like Neo4j. Similarly, Cassandra has support for column-oriented indices like HBase does. It's clear there's probably a project out there to scratch your particular itch.&lt;/p&gt;

&lt;h2&gt;Massive-scale data mining&lt;/h2&gt;

&lt;p&gt;We saw a couple of talks from folks who were using &lt;a href="http://hadoop.apache.org/"&gt;Hadoop&lt;/a&gt; for analyzing very large data sets. But furthermore, they were using frameworks like Pig and Cascading on top of Hadoop to do a lot of ad-hoc queries. &lt;a href="http://twitter.com/"&gt;Twitter&lt;/a&gt; in fact uses Pig to do all of their interesting web analytics, and they've taken all their analytics BAs who are used to doing ad-hoc SQL queries and trained them up with little problem in Pig. This is probably somewhere on our horizon, although there are larger cats to skin at the moment.&lt;/p&gt;

&lt;h2&gt;Column-oriented datastores&lt;/h2&gt;

&lt;p&gt;Cassandra and HBase were the major players here. &lt;a href="http://digg.com/"&gt;Digg&lt;/a&gt; is moving all of their backend storage over to Cassandra after some successful pilots, and we saw a great talk by &lt;a href="http://twitter.com/markgunnels"&gt;Mark Gunnels&lt;/a&gt; about how he is using HBase because it makes it "simple to get sh*t done". Apparently recent releases of HBase have made strides in fault tolerance (there is now an election algorithm for some of the special name nodes) and latency (apparently performance optimization was a major focus of the 0.20 release). There's an interesting article that describes some wide-area replication schemes that are available with HBase that sound intriguing (although once you are beyond two datacenters, I am convinced that you are better off with the VPN LAN solution if you want to have any hope of achieving eventual consistency).&lt;/p&gt;

&lt;h2&gt;Document-oriented databases&lt;/h2&gt;

&lt;p&gt;While the column-oriented datastores are good at organizing semi-structured data, the document-oriented guys are really all about organizing largely unstructured documents, and focus on doing some wild ad-hoc MapReduce queries. I still need to be convinced about the scaling, replication, and geographic distribution capabilities in this space, so it may be a while before we dip our toes in here.&lt;/p&gt;

&lt;h2&gt;Other&lt;/h2&gt;

&lt;p&gt;There was a very interesting talk about Tin ("the database so tiny they had to shorten its name"). This basically seemed to be a very clever approach for delivering stock data that leveraged a basic filesystem with range queries and rewriting rules via &lt;a href="http://www.sinatrarb.com/"&gt;Sinatra&lt;/a&gt;. Turns out web servers serving static files goes pretty fast, if that's a suitable representation for your stuff (seems like his primary use case is for delivering read-only out to clients, where the data gets updated asynchronously in the background by the system). We actually have some datasets that are like that, so this is intriguing!&lt;/p&gt;

&lt;p&gt;There was a talk from Neo4j, a graph-oriented database, which I just can't wrap my head around yet. I think I need to read up on some of the background research papers in this area. Certainly, the notion of a datastore based around the notion of link relations would likely be easy to expose as a REST HTTP interface, which is attractive. Our particular domain model can actually be nicely normalized, however (we are currently running off a traditional RDBMS after all), so I'm not sure we need the full semantic flexibility this offers.&lt;/p&gt;

&lt;p&gt;There was also a talk from Redis; this is primarily an in-memory storage system with blazing-fast speeds, and the ability to write out to disk is really an afterthought. During the talk he showed a screen snapshot of a "top" running on a cloud-hosted virtual node with 60GB of memory. I cannot make up my mind whether this is ultimately just a badass memcached, or if he's on the cusp of something: if you have enough memory to hold your whole dataset, and you are sufficiently fault-tolerant, why bother writing to disk at all? Especially if you can easily get a &lt;em&gt;periodic&lt;/em&gt; snapshot out to disk in the background that can be properly backed up for disaster recovery.&lt;/p&gt;

&lt;h2&gt;Conclusion&lt;/h2&gt;

&lt;p&gt;As folks pointed out, "NoSQL" might better be written "NOSQL" to mean "Not Only SQL". I didn't sense a lot of MySQL hatred in here; quite the contrary, many people were very complementary that there were certain things it does *really* well, and that the MySQL hammer was something that was staying firmly in their toolboxes. However, it is clear that there is a maturing community of very practically-minded folks that are looking for a new set of tools to drive their particular screws. Although to be sure (and this is something that I think some of the conference tweets corroborated), this also implies we all have a screw loose or two....&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-4696919759495493117?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/1fJiB3kldCVXliV9CjptURo667U/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1fJiB3kldCVXliV9CjptURo667U/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/1fJiB3kldCVXliV9CjptURo667U/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1fJiB3kldCVXliV9CjptURo667U/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/4696919759495493117/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=4696919759495493117" title="10 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4696919759495493117?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4696919759495493117?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/fh8mTSXTrMY/nosql-east-2009-redux.html" title="NoSQL East 2009 Redux" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>10</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2009/10/nosql-east-2009-redux.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0IHRn07eSp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-2728493035425647082</id><published>2009-10-30T11:53:00.005-04:00</published><updated>2010-09-26T14:25:37.301-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:25:37.301-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="erlang" /><category scheme="http://www.blogger.com/atom/ns#" term="message passing" /><category scheme="http://www.blogger.com/atom/ns#" term="oop" /><category scheme="http://www.blogger.com/atom/ns#" term="concurrency" /><title>Object Calls == Message Passing in Erlang</title><content type="html">&lt;p&gt;I started playing around with &lt;a title="Official site for the Erlang programming language" href="http://www.erlang.org/"&gt;Erlang&lt;/a&gt; last night as a result of learning about &lt;a title="home page for Basho Technologies, Inc." href="http://www.basho.com/"&gt;Basho&lt;/a&gt;'s key-value store &lt;a title="home page for the Riak distributed key-value store" href="http://riak.basho.com/"&gt;Riak&lt;/a&gt; at &lt;a title="2009 conference about non-relational data storage" href="https://nosqleast.com/2009/"&gt;NoSQL East&lt;/a&gt; yesterday (more specifically, it was due to &lt;a title="Justin Sheehy's twitter profile" href="http://twitter.com/justinsheehy"&gt;Justin Sheehy&lt;/a&gt;'s talk, and two realizations: (1) this guys *gets* a lot of important *operational* design choices in this space, and (2) he decided to build his system in Erlang).&lt;/p&gt;

&lt;p&gt;So I decided to read the &lt;a href="http://www.erlang.org/course/course.html"&gt;online Erlang "course"&lt;/a&gt; and started working on the &lt;a title="concurrency exercises for the online Erlang course" href="http://www.erlang.org/course/exercises.html#conc"&gt;exercises&lt;/a&gt;. One of them was:
&lt;blockquote&gt;
Write a function which starts N processes in a ring, and sends a message M times around all the processes in the ring. After the messages have been sent the processes should terminate gracefully.
&lt;img alt="picture of a unidirectional ring of nodes" src="http://www.erlang.org/course/ex2.gif"/&gt;
&lt;/blockquote&gt;
&lt;/p&gt;

&lt;p&gt;And so, summoning vaguely-remembered lectures by &lt;a title="Bob Harper's home page at Carnegie Mellon University" href="http://www.cs.cmu.edu/~rwh/"&gt;Bob Harper&lt;/a&gt; in "Fundamentals of Computer Science II" at CMU on doing object-oriented programming in &lt;a href="http://en.wikipedia.org/wiki/Scheme_%28programming_language%29"&gt;Scheme&lt;/a&gt;, and remembering that the original &lt;a href="http://www.smalltalk.org/smalltalk/whatissmalltalk.html"&gt;Smalltalk&lt;/a&gt; OOP guys always said "send an object a message" rather than "invoke a method on an object", I set to work. [Editor's note: please feel free to post comments showing me better ways, I have known Erlang for all of about 12 hours at this point!]&lt;/p&gt;

&lt;p&gt;Let's get the declarations out of the way. I need to define the entry point function which creates the ring of N nodes and sends the message around it M times, and I know I'm going to need a function representing a node in the ring, since I'm going to have to spawn processes for them.
&lt;pre&gt;
-module(ring).
-export([ring_msg/3, ring_node/1]).
&lt;/pre&gt;
&lt;/p&gt;

&lt;p&gt;Ok, what job does a node in the ring have? Well, most of the time, when it receives a message, it just needs to pass it on to the next guy. So my node process is going to need to know about its next neighbor. Now in Erlang, what I would normally think of as an object can be modelled as a recursive function that passes its current state back into itself as an argument, and processes "method calls" by receiving messages. Interestingly, not all method calls actually have to send something back to the caller!
&lt;pre&gt;
ring_node(Next) -&gt;
  receive
    { pass, M, Msg } -&gt;
      % Note that we got this message. 
      io:format("Node ~w~n",[Msg]),
      % Pass the message on around the ring.
      Next ! { pass, M, Msg },
      % If the count was down to zero, I can
      % exit, otherwise, I loop and wait for
      % the next incoming message.
      if M == 0 -&gt; ok;
         true -&gt; ring_node(Next)
      end
  end.
&lt;/pre&gt;
Ok, seems pretty straightforward. But if I had a ring of these set up, a message would just keep running around the ring. At least one node needs to be special, so that it can decrement the count M as the message comes through. It's pretty similar to the ring_node above, but is a little different.
&lt;pre&gt;
init_node(Next) -&gt; receive
    % message has been all the way around
    % the last time, so I can quit
    { pass, 0, _ } -&gt; ok;
    % otherwise, log the message and pass
    % it on, decrementing the count
    { pass, M, Msg } -&gt;
      io:format("Node ~w~n",[Msg]),
      Next ! { pass, M-1, Msg },
      init_node(Next)
  end.
&lt;/pre&gt;
Now an interesting thing here is that the init_node and the ring_node can both handle the "pass" message, and that when they send the message on, they don't actually care &lt;span style="font-style:italic;"&gt;what&lt;/span&gt; their "Next" process is. It's like both of these "objects" implement the following interface:
&lt;pre&gt;
public interface MessagePasser {
  void pass(int count, Object msg);
}
&lt;/pre&gt;
Ok, so now if we can create a ring with 1 init_node and (N-1) ring_nodes, we're all set if we inject the initial Msg into the init_node. So let's think about constructing a ring of nodes; if we have a node handy, we can pass that in as the initial argument (think "constructor") to a ring_node process to use as its Next node, then we just count down:
&lt;pre&gt;
ring(Last, 0) -&gt; Last;
ring(Last, N) -&gt; 
  RN = spawn(ring, ring_node, [Last]),
  ring(RN, N-1).
&lt;/pre&gt;
Hmm, that's close, but that's a linked-list of nodes, not a ring. But we can't pass a node in as a constructor argument to the first node we create, because we don't have any yet! So it seems like we'll need to construct a linked-list of nodes, and then "close the loop" by stitching the front and the back together. Our init_node is already a special node, so maybe we can extend it this way:
&lt;pre&gt;
init_node(Next) -&gt; receive
    % acknowledge the request, update state
    { setNext, N, From } -&gt; From ! ok, init_node(N);
...
&lt;/pre&gt;
In other words, the init_node can get a special message telling it to "update" its Next state. In some sense, we've just done this:
&lt;pre&gt;
public interface InitNode extends MessagePasser {
  void setNext(MessagePasser N);
}
&lt;/pre&gt;
We want to acknowledge the request so our ring construction knows when that message has been processed -- we don't want to hand the ring back until it's all stitched together, and we can't guarantee ordering of message delivery unless we specifically wait for a response. So here's the full ring construction:
&lt;pre&gt;
ring(N) when is_integer(N) -&gt;
  % just pass in a placeholder for Next
  RN0 = spawn(ring, init_node, [nil]),
  ring(RN0, RN0, N-1);
% finished stitching, can return our
% init node to the caller
ring(Init) -&gt; receive ok -&gt; Init end.
ring(Init, Last, 0) -&gt; Init ! { setNext, Last, self()}, ring(Init);
ring(Init, Last, N) -&gt;
  RN = spawn(ring, ring_node, [Last]),
  ring(Init, RN, N-1).
&lt;/pre&gt;
Finally, the thing we're trying to do (including optimizing the degenerate cases):
&lt;pre&gt;
ring_msg(0, _, _) -&gt; ok;
ring_msg(_, 0, _) -&gt; ok;
ring_msg(N, M, Msg) -&gt;
  Init = ring(N), Init ! { pass, M, Msg }, ok.
&lt;/pre&gt;
Actually runs, too! It's pretty neat to see polymorphism via being able to accept the same message, and I've always loved the pattern matching in &lt;a href="http://en.wikipedia.org/wiki/ML_%28programming_language%29"&gt;ML&lt;/a&gt; (both &lt;a href="http://www.smlnj.org/"&gt;SML&lt;/a&gt; and &lt;a href="http://caml.inria.fr/ocaml/"&gt;OCaml&lt;/a&gt; variants!). Some pretty serious systems programs are getting written in this language; it's clear that the process spawning methodology lends itself well to a &lt;a href="http://www.eecs.harvard.edu/~mdw/proj/seda/"&gt;SEDA&lt;/a&gt;-style approach which is great for graceful degradation of service, and the fully-functional style (no mutation) means that you have &lt;span style="font-style:italic;"&gt;no locks&lt;/span&gt;, &lt;span style="font-style:italic;"&gt;no shared state&lt;/span&gt;, and hence safe concurrency (as long as you can model what you're doing properly).&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-2728493035425647082?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/iAfc9Gj1JTyfzmVdGRJherZlr08/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/iAfc9Gj1JTyfzmVdGRJherZlr08/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/iAfc9Gj1JTyfzmVdGRJherZlr08/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/iAfc9Gj1JTyfzmVdGRJherZlr08/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/2728493035425647082/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=2728493035425647082" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/2728493035425647082?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/2728493035425647082?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/nazHZT-wEBE/object-calls-message-passing-in-erlang.html" title="Object Calls == Message Passing in Erlang" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2009/10/object-calls-message-passing-in-erlang.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0EBQ3w6cCp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-5871308384921062660</id><published>2009-08-26T21:54:00.005-04:00</published><updated>2010-09-26T14:27:32.218-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:27:32.218-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="agile" /><category scheme="http://www.blogger.com/atom/ns#" term="architecture" /><title>Agile Architecture Anti-Patterns</title><content type="html">&lt;p&gt;I have been the solution architect for an enterprise-spanning project which is now getting geared up for its next phase. This project touches more than 10 different development teams across at least three company divisions and involves at least one external ISV. So, you know, a pretty small project.&lt;/p&gt;

&lt;p&gt;As I reflect on the process of hashing out all the technical details, I realized I got stretched pretty thin. I really didn't have time to write much down in any formal fashion this time around, due to the time pressure on the project and the number of other things I was also trying to juggle. I have instead been spending a lot of time drawing stuff on whiteboards for various technical audiences. The things I did have time to write up were only the most general concepts and patterns - i.e. the really high-level architecture.&lt;/p&gt;

&lt;p&gt;At least part of the reason I didn't have time to write anything else down was that I was spending all my time in technical design discussions with all the various groups, hashing out nitty-gritty integration details and then bringing full integration designs back to the development groups to run with. The last time through, this worked out well, because the developers in my home organization are used to agile development and being responsible for their own technical design, so they were able to take my whiteboard sketches, continue to consult with me, and produce solid, working code. It also worked out ok because the last phase was actually the first phase of this project, and I had enough lead time to work out all the details ahead of time.&lt;/p&gt;

&lt;p&gt;This time around, the time pressure is pretty high, plus the work-ahead time I would have had in a traditional "up-front" architecture process was actually consumed by helping to get the first phase out the door. So I find I actually don't have enough time to do the same detailed design that I did the first time around before the development teams are scheduled to start. This would seem to be a major conundrum.&lt;/p&gt;

&lt;p&gt;&lt;span style="font-weight:bold;"&gt;Antipattern 1:&lt;/span&gt; waterfall, up-front architecture doesn't pipeline well if the architecture phase extends concurrently all the way through the development phase.&lt;/p&gt;

&lt;p&gt;&lt;span style="font-weight:bold;"&gt;Corollary:&lt;/span&gt; trying to waterfall development and QA doesn't pipeline well either for exactly the same reason (because developers have to keep working their code to fix bugs).&lt;/p&gt;

&lt;p&gt;The other anti-pattern that I was running into was with development teams in some of the other divisions; I would work with the enterprise architects to hash out a pretty detailed design, but when we brought that to the development teams, there was a ton of convincing, resistance, and redesign that ended up happening. Now, I'm not sure exactly which anti-pattern we were running into, but it was one of the following:&lt;/p&gt;

&lt;p&gt;&lt;span style="font-weight:bold;"&gt;Antipattern 2:&lt;/span&gt; up-front architecture doesn't work if the architects aren't familiar enough with the details of the systems/teams they are designing for, because the designs won't "fit right".&lt;/p&gt;

&lt;p&gt;&lt;span style="font-weight:bold;"&gt;Antipattern 3:&lt;/span&gt; up-front architecture doesn't work if the architects haven't been able to build up sufficient technical cred to sell their designs to the developers.&lt;/p&gt;

&lt;p&gt;For sure both of those applied to me with respect to the developers and technical leads of the groups in the other divisions.&lt;/p&gt;

&lt;p&gt;I've been thinking a lot recently about the right way to approach this, and really enjoyed this &lt;a href="http://www.agiledata.org/essays/enterpriseArchitecture.html" title="read more about an approach to agile enterprise architecture"&gt;essay on Agile Enterprise Architecture&lt;/a&gt; by Scott W. Adler. He proposes a much more lightweight, hands-on approach to architecture which can best be described as guiding the agile development teams to develop the right architecture themselves.&lt;/p&gt;

&lt;p&gt;Fred Brooks writes in &lt;span style="font-style:italic;"&gt;&lt;a href="http://www.amazon.com/Mythical-Man-Month-Software-Engineering-Anniversary/dp/0201835959" title="link to an Amazon listing for the Mythical Man-Month book about software engineering"&gt;The Mythical Man-Month&lt;/a&gt;&lt;/span&gt; that a systems' architecture must proceed from one or a small number of minds in order to be coherent (I can't find a copy right now, so that's paraphrased). At the same time, agile development techniques like &lt;a href="http://www.extremeprogramming.org/" title="introduction to the Extreme Programming software development methodology"&gt;Extreme Programming (XP)&lt;/a&gt; or &lt;a href="http://www.controlchaos.com/" title="home page of the Scrum software development framework"&gt;Scrum&lt;/a&gt; suggest that the development teams ought to be responsible for evolving their architecture organically. I'm starting to think that both of these are true, and that they are not mutually exclusive, and it is related to another realization I've had (but that I have a hard time remembering):&lt;p&gt;

&lt;blockquote&gt;
A solution architect is responsible for seeing that the end-to-end solution hangs together technically. It is not necessary for the architect to produce the whole end-to-end design himself to achieve this.
&lt;/blockquote&gt;

&lt;p&gt;Going forward, I think I'm going to take the following approach: figure out broadly which development teams are going to need to interact and what their coarse responsibilities are, then immediately get them involved on working out the solution. In true agile fashion, let them work out the details, and just play Product Owner to set high-level (technical) requirements, being on-hand to participate in the design discussions. I think this leverages best the available technical design talent in the development teams, while making sure that someone is tracking it all and fitting it into a coherent mental model to make Mr. Brooks happy.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-5871308384921062660?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/vJh45gPaGkF8nM8IHqttunAmJks/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/vJh45gPaGkF8nM8IHqttunAmJks/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/vJh45gPaGkF8nM8IHqttunAmJks/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/vJh45gPaGkF8nM8IHqttunAmJks/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/5871308384921062660/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=5871308384921062660" title="5 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/5871308384921062660?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/5871308384921062660?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/ylaLmMxI8so/agile-architecture-anti-patterns.html" title="Agile Architecture Anti-Patterns" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>5</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2009/08/agile-architecture-anti-patterns.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0ADRH4zfSp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-1783857594132741222</id><published>2009-05-15T16:18:00.007-04:00</published><updated>2010-09-26T14:29:35.085-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:29:35.085-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="java" /><category scheme="http://www.blogger.com/atom/ns#" term="rsa" /><category scheme="http://www.blogger.com/atom/ns#" term="jce" /><category scheme="http://www.blogger.com/atom/ns#" term="openssl" /><category scheme="http://www.blogger.com/atom/ns#" term="public key cryptography" /><title>RSA Public Key Cryptography in Java</title><content type="html">&lt;p&gt;&lt;a href="http://en.wikipedia.org/wiki/Public-key_cryptography" title="wikipedia article about public key cryptography"&gt;Public key cryptography&lt;/a&gt; is a well-known concept, but for some reason the &lt;a href="http://java.sun.com/javase/technologies/security/" title="home page for Sun's Java standard edition security products"&gt;JCE (Java Cryptography Extensions)&lt;/a&gt; &lt;a href="http://java.sun.com/j2se/1.4.2/docs/guide/security/jce/JCERefGuide.html" title="reference guide for Sun's Java Cryptography Extensions"&gt;documentation&lt;/a&gt; doesn't at all make it clear how to interoperate with common public key formats such as those produced by &lt;a href="http://www.openssl.org/" title="openssl home page"&gt;&lt;tt&gt;openssl&lt;/tt&gt;&lt;/a&gt;. If you try to do a search on the web for how to make &lt;a href="http://en.wikipedia.org/wiki/RSA" title="wikipedia article about RSA cryptography schemes"&gt;RSA&lt;/a&gt; public key cryptography work in Java, you quickly find a lot of people asking questions and not a lot of people answering them. In this post, I'm going to try to lay out very clearly how I got this working.&lt;/p&gt;

&lt;p&gt;Just to set expectations, this is not a tutorial about how to &lt;em&gt;use&lt;/em&gt; the cryptography APIs themselves in &lt;a href="http://java.sun.com/j2se/1.5.0/docs/api/javax/crypto/package-frame.html" title="javadocs for the Java cryptography API"&gt;&lt;tt&gt;javax.crypto&lt;/tt&gt;&lt;/a&gt; (look at the JCE tutorials from Sun for this); nor is this a primer about how public key cryptography works. This article is really about how to manage the keys with off-the-shelf utilities available to your friendly, neighborhood sysadmin and still make use of them from Java programs. Really, this boils down to "how do I get these darn keys loaded into a Java program where they can be used?" This is the article I wish I had when I started trying to muck around with this stuff....&lt;/p&gt;

&lt;h2 style="text-align: left;"&gt;Managing the keys&lt;/h2&gt;

&lt;p&gt;&lt;b&gt;Openssl.&lt;/b&gt; This is the de-facto tool sysadmins use for managing public/private keys, &lt;a href="http://en.wikipedia.org/wiki/X.509" title="wikipedia article about the X.509 certificate standard"&gt;X.509 certificates&lt;/a&gt;, etc. This is what we want to create/manage our keys with, so that they can be stored in formats that are common across most Un*x systems and utilities (like, say, C programs using the &lt;tt&gt;openssl&lt;/tt&gt; library...). Java has this notion of its own keystore, and Sun will give you the &lt;a href="http://java.sun.com/j2se/1.3/docs/tooldocs/win32/keytool.html" title="man page for the keytool utility"&gt;keytool command&lt;/a&gt; with Java, but that doesn't do you much good outside of Java world.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Creating the keypair.&lt;/b&gt; We are going to create a keypair, saving it in openssl's preferred PEM format. PEM formats are ASCII and hence easy to email around as needed. However, we will need to save the keys in the binary DER format so Java can read them. Without further ado, here is the magical incantation for creating the keys we'll use:

&lt;pre&gt;
# generate a 2048-bit RSA private key
$ openssl genrsa -out private_key.pem 2048

# convert private Key to PKCS#8 format (so Java can read it)
$ openssl pkcs8 -topk8 -inform PEM -outform DER -in private_key.pem \
    -out private_key.der -nocrypt

# output public key portion in DER format (so Java can read it)
$ openssl rsa -in private_key.pem -pubout -outform DER -out public_key.der
&lt;/pre&gt;
&lt;/p&gt;

&lt;p&gt;You keep &lt;tt&gt;private_key.pem&lt;/tt&gt; around for reference, but you hand the DER versions to your Java programs.&lt;/p&gt;

&lt;h2 style="text-align: left;"&gt;Loading the keys into Java&lt;/h2&gt;

&lt;p&gt;Really, this boils down to knowing what type of KeySpec to use when reading in the keys. To read in the private key:&lt;/p&gt;

&lt;pre&gt;
import java.io.*;
import java.security.*;
import java.security.spec.*;

public class PrivateKeyReader {

  public static PrivateKey get(String filename)
    throws Exception {
    
    File f = new File(filename);
    FileInputStream fis = new FileInputStream(f);
    DataInputStream dis = new DataInputStream(fis);
    byte[] keyBytes = new byte[(int)f.length()];
    dis.readFully(keyBytes);
    dis.close();

    PKCS8EncodedKeySpec spec =
      new PKCS8EncodedKeySpec(keyBytes);
    KeyFactory kf = KeyFactory.getInstance("RSA");
    return kf.generatePrivate(spec);
  }
}
&lt;/pre&gt;

&lt;p&gt;And now, to read in the public key:&lt;/p&gt;

&lt;pre&gt;
import java.io.*;
import java.security.*;
import java.security.spec.*;

public class PublicKeyReader {

  public static PublicKey get(String filename)
    throws Exception {
    
    File f = new File(filename);
    FileInputStream fis = new FileInputStream(f);
    DataInputStream dis = new DataInputStream(fis);
    byte[] keyBytes = new byte[(int)f.length()];
    dis.readFully(keyBytes);
    dis.close();

    X509EncodedKeySpec spec =
      new X509EncodedKeySpec(keyBytes);
    KeyFactory kf = KeyFactory.getInstance("RSA");
    return kf.generatePublic(spec);
  }
}
&lt;/pre&gt;

&lt;p&gt;That's about it. The hard part was figuring out a compatible set of:
&lt;ol&gt;
&lt;li&gt;openssl DER output options (particularly the &lt;a href="http://en.wikipedia.org/wiki/PKCS" title="wikipedia article about the PKCS public key crytopgrahy standards"&gt;PKCS#8 encoding&lt;/a&gt;)&lt;/li&gt;
&lt;li&gt;which type of KeySpec Java needed to use (strangely enough, the public key needs the "X509" keyspec, even though you would normally handle X.509 certificates with
the &lt;a href="http://www.openssl.org/docs/apps/x509.html" title="Unix man page for the openssl x509 command"&gt;&lt;tt&gt;openssl x509&lt;/tt&gt;&lt;/a&gt; command, not the &lt;a href="http://www.openssl.org/docs/apps/rsa.html" title="Unix man page for the openssl rsa command"&gt;&lt;tt&gt;openssl rsa&lt;/tt&gt;&lt;/a&gt; command. Real intuitive.)&lt;/li&gt;
&lt;/ol&gt;
&lt;/p&gt;

&lt;p&gt;From here, signing and verifying work as described in the JCE documentation; the only other thing you need to know is that you can use the "SHA1withRSA" algorithm when you get your &lt;a href="http://java.sun.com/j2se/1.5.0/docs/api/java/security/Signature.html" title="javadocs for the Signature class"&gt;&lt;tt&gt;java.security.Signature&lt;/tt&gt;&lt;/a&gt; instance for signing/verifying, and that you want the "RSA" algorithm when you get your &lt;a href="http://java.sun.com/j2se/1.5.0/docs/api/javax/crypto/Cipher.html" title="javadocs for the Cipher class"&gt;&lt;tt&gt;javax.crypto.Cipher&lt;/tt&gt;&lt;/a&gt; instance for encrypting/decrypting.&lt;/p&gt;

&lt;p&gt;Many happy security returns to you.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-1783857594132741222?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/KUTNl7qmkdisyq8Z0sUDO6kc_qY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KUTNl7qmkdisyq8Z0sUDO6kc_qY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/KUTNl7qmkdisyq8Z0sUDO6kc_qY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KUTNl7qmkdisyq8Z0sUDO6kc_qY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/1783857594132741222/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=1783857594132741222" title="16 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1783857594132741222?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1783857594132741222?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/FMBjix_zRWA/public-key-cryptography-in-java.html" title="RSA Public Key Cryptography in Java" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>16</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2009/05/public-key-cryptography-in-java.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0YGQHw4eSp7ImA9WxVXE00.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-8142653959254891303</id><published>2009-02-10T18:26:00.005-05:00</published><updated>2009-02-10T18:38:41.231-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-02-10T18:38:41.231-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="csv" /><category scheme="http://www.blogger.com/atom/ns#" term="atom" /><category scheme="http://www.blogger.com/atom/ns#" term="blogger" /><category scheme="http://www.blogger.com/atom/ns#" term="archives" /><category scheme="http://www.blogger.com/atom/ns#" term="download" /><category scheme="http://www.blogger.com/atom/ns#" term="backup" /><category scheme="http://www.blogger.com/atom/ns#" term="python" /><title>Downloading your Blogger archives</title><content type="html">A &lt;a href="http://chewyourgrouse.blogspot.com/"&gt;friend&lt;/a&gt; was looking for a way to grab an archive of his Blogger posts into a CSV file he could do text mining on (and presumably, for a low-fi backup mechanism).

I wrote this Python script for him, enjoy.

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
#!/usr/bin/env python
#
# Copyright (C) 2009 by Jon Moore
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see &amp;lt;http://www.gnu.org/licenses/&gt;.

import csv
import urllib2
import unicodedata
import xml.etree.ElementTree as etree

blog_feed = 'http://codeartisan.blogspot.com/feeds/posts/default'
output = 'posts.csv'

ATOM_NS = 'http://www.w3.org/2005/Atom'

def norm(s):
    if not s: return None
    return s.encode('ascii','ignore')

def main():
    f = open(output, 'wb')
    csv_wr = csv.writer(f)
    url = blog_feed + '?max-results=100'
    csv_wr.writerow(['id','published','updated','permalink','title','content'])
    while url:
        print "fetching", url
        feed = etree.fromstring(urllib2.urlopen(url).read())
        for entry in feed.findall("{%s}entry" % ATOM_NS):
            id = entry.find("{%s}id" % ATOM_NS).text
            published = entry.find("{%s}published" % ATOM_NS).text
            updated = entry.find("{%s}updated" % ATOM_NS).text
            title = norm(entry.find("{%s}title" % ATOM_NS).text)
            content = norm(entry.find("{%s}content" % ATOM_NS).text)
            perm_url = ''
            for link in entry.findall("{%s}link" % ATOM_NS):
                if (link.get('rel') == 'alternate'
                    and link.get('type') == 'text/html'):
                    perm_url = link.get('href')
                    break
            csv_wr.writerow([id,published,updated,perm_url,title,content])
            print "wrote",id
        url = None
        for link in feed.findall("{%s}link" % ATOM_NS):
            if link.get('rel') == 'next':
                url = link.get('href')
                break
    f.close()

if __name__ == "__main__":
    main()
&lt;/pre&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-8142653959254891303?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/a8yvIpmV1Rui1LXZgVrnQaUPMds/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/a8yvIpmV1Rui1LXZgVrnQaUPMds/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/a8yvIpmV1Rui1LXZgVrnQaUPMds/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/a8yvIpmV1Rui1LXZgVrnQaUPMds/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/8142653959254891303/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=8142653959254891303" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8142653959254891303?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8142653959254891303?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/tm8juVttW2A/downloading-your-blogger-archives.html" title="Downloading your Blogger archives" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2009/02/downloading-your-blogger-archives.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04GRns9eSp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-357228904166483911</id><published>2009-01-24T13:52:00.006-05:00</published><updated>2010-09-26T14:32:07.561-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:32:07.561-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="tco" /><category scheme="http://www.blogger.com/atom/ns#" term="app engine" /><category scheme="http://www.blogger.com/atom/ns#" term="PaaS" /><category scheme="http://www.blogger.com/atom/ns#" term="business case" /><category scheme="http://www.blogger.com/atom/ns#" term="cloud computing" /><category scheme="http://www.blogger.com/atom/ns#" term="aws" /><title>Business Cases and Cloud Computing</title><content type="html">&lt;p&gt;I just read a very interesting &lt;a href="http://seekingalpha.com/article/100592-cloud-computing-what-are-the-barriers-to-entry-and-it-diseconomies"&gt;article&lt;/a&gt; by Gregory Ness on seekingalpha.com that talks about some of the technology trends behind cloud computing. One key quote:&lt;/p&gt;

&lt;blockquote&gt; 
Automation and control has been both a key driver and a barrier for the adoption of new technology as well as an enterprise’s ability to monetize past investments.  Increasingly complex networks are requiring escalating rates of manual intervention.  This dynamic will have more impact on IT spending over the next five years than the global recession, because automation is often the best answer to the productivity and expense challenge.
&lt;/blockquote&gt;
 
&lt;p&gt;One other cited link is to an &lt;a href="http://download.microsoft.com/download/1/9/2/192e73a4-7abb-4bad-b469-34632d54a8a6/IDC%20Whitepaper%20Demonstrating%20Business%20Value.pdf"&gt;IDC study&lt;/a&gt; that includes the following graph:&lt;/p&gt;

&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://farm4.static.flickr.com/3298/3223405278_a279aec350_o_d.gif"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 564px; height: 384px;" src="http://farm4.static.flickr.com/3298/3223405278_a279aec350_o_d.gif" border="0" alt="Graph showing that 60% of the total cost of ownership (TCO) for a server over a 3 year lifetime comes from staffing costs." /&gt;&lt;/a&gt;

&lt;p&gt;Note that staffing accounts for 60% of the cost of maintaining a server over its lifetime. Cloud infrastructure services like &lt;a title="Click here to learn more about Amazon Web Services" href="http://aws.amazon.com/"&gt;Amazon&lt;/a&gt; &lt;a title="Click here to learn more about Amazon's Elastic Compute Cloud (EC2) service" href="http://aws.amazon.com/ec2/"&gt;EC2&lt;/a&gt; would really only save an enterprise data center on hardware setup / software install costs, which are probably, in terms of staffing, a small amount of staff time for a given server. Actually administering the server once it is running is really the bulk of the cost, and that won't go away on EC2 -- you'll still need operations staff to provision/image cloud infrastructure. EC2 makes sense if the economies of scale of AWS are such that they can achieve a lower operational cost for that other 40% than you can, or if there is a business / time-to-market value proposition that makes sense in being able to provision hardware on EC2 more rapidly than we can acquire and install hardware yourself.&lt;/p&gt;

&lt;p&gt;Given the huge economy of scale that the large cloud providers have--tens of thousands of servers, it is going to be hard to get your costs for that 40% lower than what they can achieve with their existing infrastructure automation and ability to purchase hardware in bulk, especially for a startup company whose hardware needs are initially modest. Let's guess that there's a 33% markup on cost for EC2, so when you are getting charged $0.10 per CPU hour, it's really only costing them $0.075. Let's assume a 75% &lt;a title="description of an experience curve or learning curve in business terms" href="http://en.wikipedia.org/wiki/Experience_curve_effects"&gt;experience curve&lt;/a&gt; on infrastructure (meaning, once you have doubled the number of servers you have deployed, the last server costs only 75% of what the halfway point was).&lt;/p&gt;

&lt;p&gt;By &lt;a title="article with three different estimates of the number of servers Amazon has" href="http://markmcgranaghan.com/posts/165"&gt;one estimate&lt;/a&gt;, Amazon has 30,000 servers. Now let's work backward (1/0.75 = 1.33): at 15,000 servers, their cost was $0.075 * 1.33 = $0.9975. At 7500 servers, their marginal cost was $0.9975 * 1.33 = $0.13. In other words, you'd have to be planning to deploy 15,000 servers in order to have a hope of getting your marginal cost under what they'll charge you retail.  &lt;/p&gt;

&lt;p&gt;(I think this is actually a conservative estimate: the experience/learning curve for infrastructure deployment is probably steeper than 75% due to existing hierarchical deployment patterns and a product (provisioned servers) that lends itself well to automation. Also, due to the high barrier to entry for cloud computing in terms of number of servers you need to be competitive, they can probably get away with charging an even higher markup).&lt;/p&gt;

&lt;p&gt;One corollary of this is that if you are currently running a data center with far fewer servers (i.e. the hardware is a sunk cost), &lt;span style="font-style:italic;"&gt;you might actually be better off turning your data center off and leasing from Amazon&lt;/span&gt;. Now of course, there are some things (customer credit card data, extremely sensitive business information) that you just wouldn't be willing to host somewhere outside your own data center. But that's probably a very specific set of data--host that stuff and lease the rest in the cloud, particularly if you can get adequate SLAs from your cloud vendor.&lt;/p&gt;

&lt;p&gt;So that deals with the 40% of the TCO for a server that isn't staffing. How do you cut costs on the other 60%?&lt;/p&gt;

&lt;p&gt;You won't really be able to make a dent in that 60% until you get not just to fully automated infrastructure provisioning, but until you get to fully automated software deployment and provisioning. This is not possible until you get to standardized computing platforms with specific functionality that are scale-on-demand, like &lt;a title="Click here to learn about Akamai network services" href="http://www.akamai.com/"&gt;Akamai&lt;/a&gt; &lt;a href="http://www.akamai.com/html/technology/products/netstorage.html"&gt;NetStorage&lt;/a&gt;, &lt;a href="http://aws.amazon.com/"&gt;Amazon&lt;/a&gt; &lt;a href="http://aws.amazon.com/s3"&gt;S3&lt;/a&gt;/&lt;a href="http://aws.amazon.com/ebs"&gt;EBS&lt;/a&gt;/&lt;a href="http://aws.amazon.com/sqs"&gt;SQS&lt;/a&gt;/&lt;a href="http://aws.amazon.com/simpledb/"&gt;SimpleDB&lt;/a&gt;, and &lt;a href="http://www.google.com/"&gt;Google&lt;/a&gt; &lt;a href="http://code.google.com/appengine/"&gt;AppEngine&lt;/a&gt;. These are known as "Platform-as-a-Service" (PaaS) offerings. &lt;/p&gt;
 
&lt;p&gt;There's a similar experience curve argument here: you could spend internal development time here to set up some kind of application deployment framework, but you'd essentially have to be willing to build and deploy within orders of magnitude the number of different apps as the Google App Engine team in order to get your costs under what Google will charge you. Unless you are in the business of directly competing with them in the PaaS market, you might as well buy from them and focus your energy on providing your unique business value, not software or hardware infrastructure. [Editor's note: this was something &lt;a href="http://blog.objectstrategy.com/"&gt;Matt Stevens&lt;/a&gt; said to me a while ago, and it wasn't until I went through the mental exercise of writing this article that I actually got it].&lt;/p&gt;

&lt;p&gt;Yesterday I implemented (not prototyped) a service in Google App Engine in about 6 hours that would cost around $400 per month (according to their recent &lt;a href="http://googleappengine.blogspot.com/2008/05/announcing-open-signups-expected.html"&gt;pricing announcements&lt;/a&gt;) if projected usage were more than double what it is now. I estimate this would require at least 10 database servers just to host the &lt;span style="font-style:italic;"&gt;data&lt;/span&gt; in a scalable, performant fashion, nevermind the REST data interface (webnodes) sitting in front of it. On Amazon EC2, that'd be $720 per month on your small instances (assuming those were even beefy enough), and per the experience curve argument above, it's probably way more than that in our data center. And that's not counting any of the reliability/load balancing infrastructure.&lt;/p&gt;

&lt;p&gt;So my open question is: how, as a software developer, can you justify &lt;span style="font-style:italic;"&gt;not&lt;/span&gt; building your app in one of these cloud frameworks?&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-357228904166483911?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/snnyu2iA2IaJvcPp3K1s1osG8F4/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/snnyu2iA2IaJvcPp3K1s1osG8F4/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/snnyu2iA2IaJvcPp3K1s1osG8F4/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/snnyu2iA2IaJvcPp3K1s1osG8F4/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/357228904166483911/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=357228904166483911" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/357228904166483911?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/357228904166483911?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/Xqg0KU1m3Bg/business-cases-and-cloud-computing.html" title="Business Cases and Cloud Computing" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2009/01/business-cases-and-cloud-computing.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEQERXs7fSp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-528358299708941850</id><published>2009-01-12T18:53:00.037-05:00</published><updated>2010-09-26T14:38:24.505-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:38:24.505-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="REST API" /><category scheme="http://www.blogger.com/atom/ns#" term="rest" /><category scheme="http://www.blogger.com/atom/ns#" term="restful web services" /><category scheme="http://www.blogger.com/atom/ns#" term="web applications" /><title>Websites are also RESTFul Web Services</title><content type="html">&lt;p&gt;I have been reading the Richardson and Ruby book &lt;span style="font-style: italic;"&gt;&lt;a title="Click here to read about this book on O'Reilly" href="http://oreilly.com/catalog/9780596529260/"&gt;RESTful Web Services&lt;/a&gt;&lt;/span&gt; and recently had an epiphany: if you design a RESTful web &lt;em&gt;site&lt;/em&gt; it &lt;b&gt;is also a RESTful web API&lt;/b&gt;. In this post I'll show exactly how that works and how you can use this to rapidly build a prototype of a modern web application.&lt;/p&gt;

&lt;p&gt;First of all, let's start with a very simple application concept and build it from the ground up. Let's consider a simple application that lets you keep a list of favorite items. The resources we'll want to model are:
&lt;ul&gt;
&lt;li&gt;a favorite item&lt;/li&gt;
&lt;li&gt;a list of a user's favorite items&lt;/li&gt;&lt;/ul&gt;
&lt;/p&gt;
&lt;p&gt;We'll assign the following URLs:
&lt;ul&gt;
&lt;li&gt;&lt;tt&gt;/favorites/1234&lt;/tt&gt; for favorite item with primary key 1234&lt;/li&gt;
&lt;li&gt;&lt;tt&gt;/favorites&lt;/tt&gt; is the list of everyone's favorite items&lt;/li&gt;
&lt;li&gt;&lt;tt&gt;/favorites/-/{owner}joe@example.com&lt;/tt&gt; (or its URL-encoded equivalent) is the list of items belonging to our friend Joe (this is a URL format inspired by &lt;a title="Google home page" href="http://www.google.com/"&gt;Google's&lt;/a&gt; &lt;a href="http://www.blogger.com/Protocol%20specification%20for%20GData"&gt;GData protocol&lt;/a&gt;).&lt;/li&gt;&lt;/ul&gt;
&lt;/p&gt;
&lt;p&gt;Now, we'll support the following &lt;a title="definition of HTTP methods" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html#sec9"&gt;HTTP methods&lt;/a&gt;:
&lt;ul&gt;
&lt;li&gt;You can GET and DELETE an individual item (we could allow PUT if we wanted to allow editing, but we'll keep the example simple)&lt;/li&gt;
&lt;li&gt;You can create a new favorite item with a POST to /favorites.&lt;/li&gt;
&lt;li&gt;You can GET the list of a user's favorites.&lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;Ok. Now we want to rapidly prototype this so we know if we have the resources modelled correctly. Fire up your favorite web application framework (&lt;a title="Ruby on Rails home page" href="http://rubyonrails.org/"&gt;Ruby on Rails&lt;/a&gt;, &lt;a title="home page for the Django Python web application framework" href="http://www.djangoproject.com/"&gt;Django&lt;/a&gt;, &lt;a href="http://www.blogger.com/Click%20here%20for%20information%20on%20the%20Spring%20web%20application%20framework%20for%20Java"&gt;Spring&lt;/a&gt;, etc.) and map those URLs to controllers. Now most of these frameworks let you fill in implementations for the various HTTP methods. We'll make a minor simplification and allow "overloaded POST" where we allow passing a URL parameter to POST to specify PUT and DELETE (e.g. "_method=DELETE"). We can implement the proper HTTP method but we'll allow you to use POST to do it too; browsers and some Javascript HTTP implementations can only do GET and POST.&lt;/p&gt;

&lt;p&gt;Ok, now a funny thing happens: it you render an HTML response for everything, you can start playing with your API in your browser! In particular, when we render the list of items, we will naturally put the text of those items on the page, but we can also throw the following HTML snippet at the top of the page:&lt;/p&gt;

&lt;div style="background-color: #bbbbbb; margin-left: 20px; padding-left: 10px;  width: 600px;"&gt;
&lt;pre&gt;
&amp;lt;p&gt;Add a new favorite:&amp;lt;/p&gt;
&amp;lt;form action="/items" method="post"&gt;
  &amp;lt;input type="text" name="itemname"/&gt;
  &amp;lt;input type="submit" value="Add"/&gt;
&amp;lt;/form&gt;
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;We can also add the following form after each item's text:&lt;/p&gt;

&lt;div style="background-color: #bbbbbb; margin-left: 20px; padding-left: 10px;  width: 600px;"&gt;
&lt;pre&gt;
&amp;lt;!-- use a specific item's URL for the action --&gt;
&amp;lt;form action="/items/1234" method="post"/&gt;
 &amp;lt;input type="hidden" name="_method" value="DELETE"/&gt;
 &amp;lt;input type="submit" value="Delete"/&gt;
&amp;lt;/form&gt;
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;which gives us this ugly beastie:&lt;/p&gt;

&lt;div style="margin-left: 20px; padding-left: 10px; width: 450px; border: thin solid black;"&gt;
&lt;b&gt;A Few of My Favorite Things:&lt;/b&gt;&lt;br/&gt;
&lt;form action="http://www.example.com/" method="post"&gt;
Add a new favorite: &lt;input type="text" name="itemname"/&gt;&lt;input type="submit" value="Add"/&gt;
&lt;/form&gt;
&lt;form action="http://www.example.com/" method="post"&gt;
&lt;ul&gt;
&lt;li&gt;raindrops on roses &lt;input type="submit" value="Delete"/&gt;&lt;/li&gt;
&lt;li&gt;whiskers on kittens &lt;input type="submit" value="Delete"/&gt;&lt;/li&gt;
&lt;li&gt;bright copper kettles &lt;input type="submit" value="Delete"/&gt;&lt;/li&gt;
&lt;li&gt;warm woolen mittens &lt;input type="submit" value="Delete"/&gt;&lt;/li&gt;
&lt;li&gt;wild geese that fly with the moon on their wings &lt;input type="submit" value="Delete"/&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/form&gt;
&lt;/div&gt;

&lt;p&gt;Now, one key item is that when you render the result page for adding an item, you can send a 201 (Created) response and say something like "item added", throwing in a link back to the list page. The whole HTML response might be nothing more than:&lt;/p&gt;

&lt;div style="background-color: #bbbbbb; margin-left: 20px; padding-left: 10px;  width: 600px;"&gt;
&lt;pre&gt;
&amp;lt;html&gt;
  &amp;lt;head&gt;&amp;lt;/head&gt;
  &amp;lt;body&gt;
  &amp;lt;p&gt;I created your item &amp;lt;a href="/items/2345"&gt;here&amp;lt;/a&gt;.&amp;lt;/p&gt;
  &amp;lt;p&gt;All your items are &amp;lt;a href="/items/-/{owner}joe@example.com"&gt;here&amp;lt;/a&gt;.&amp;lt;/p&gt;
  &amp;lt;/body&gt;
&amp;lt;/html&gt;
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;We similarly want to render a confirmation page after a DELETE. This makes for an awkward user experience "add, ok, add, ok,..." but you'll notice that the back and forward buttons on your browser actually work without having to rePOST any exchanges. &lt;/p&gt;

&lt;p&gt;[Side note: you could, instead of returning a success page, return a 302 that lands you back on the list page, which maybe gets you closer to what you wanted from a user experience, but this is precisely what will break your browser's back button and make you rePOST.]&lt;/p&gt;

&lt;p&gt;Now you also have the interesting property that all the &lt;em&gt;links&lt;/em&gt; on your site are safe (without side effects) GETs, and all the &lt;em&gt;buttons&lt;/em&gt; are potentially destructive (write operations of one sort or another). I say only "potentially" because you might have a search form with &lt;tt&gt;action="get"&lt;/tt&gt; to do a query, and not all of your POST/PUT/DELETEs will actually change anything.&lt;/p&gt;

&lt;p&gt;At any rate, at this point, you have a functionally working website that someone could use, if somewhat awkwardly. Plus, if you have my frontend aesthetic design sensibilities, your users will have the pleasure of suppressing a gag reflex while using your site.&lt;/p&gt;

&lt;p&gt;So let's spruce this up a little bit. Now, on the HTML page for the list of favorites, we can apply some Javascript. At a first blush, we can hide the delete buttons until a mouseover, which cleans things up somewhat. But the real magic happens when we attach an AJAX event to the delete buttons. Now the script can actually do &lt;em&gt;the very same POST&lt;/em&gt; that the form would have done, and then check the &lt;a title="description of HTTP status codes" href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10"&gt;HTTP status code&lt;/a&gt;, removing the item text from the DOM on success. &lt;/p&gt;

&lt;p&gt;Suddenly, the user never leaves that list page, and we haven't had to change any of the rest of the API -- just the HTML representation of that list page. The AJAX call doesn't care if it gets HTML back (in this case), it just cares about the response code. Now we have the nice AJAXy experience we would expect, but oddly enough you still have a website that will work for people with Javascript disabled.&lt;/p&gt;

&lt;p&gt;The last step towards finishing out your API is probably simply to make structured versions of your representations available (e.g. &lt;a title="description of Javascript Object Notation (JSON)" href="http://www.json.org/"&gt;JSON&lt;/a&gt; or &lt;a title="description of the eXtensible Markup Language (XML)" href="http://en.wikipedia.org/wiki/XML"&gt;XML&lt;/a&gt; formats like &lt;a title="description of the Atom syndication format" href="http://www.atomenabled.org/developers/syndication/atom-format-spec.php"&gt;Atom&lt;/a&gt;) with an optional parameter like "?format=json". Now all of your client-side functions can call URLs with the appropriate format on them and get well-structured data, and everyone else gets HTML.&lt;/p&gt;

&lt;p&gt;Well, I guess that's the second to last step. You probably actually want to apply some graphic design and CSS to your site too...&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-528358299708941850?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/Z631bTPsIcYb56VCzZTnF_uZoSw/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Z631bTPsIcYb56VCzZTnF_uZoSw/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/Z631bTPsIcYb56VCzZTnF_uZoSw/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Z631bTPsIcYb56VCzZTnF_uZoSw/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/528358299708941850/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=528358299708941850" title="6 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/528358299708941850?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/528358299708941850?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/LZ_6-0-roIs/websites-are-also-restful-web-services.html" title="Websites are also RESTFul Web Services" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>6</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2009/01/websites-are-also-restful-web-services.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEIAR3g7eSp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-1292130154490868674</id><published>2008-09-19T22:24:00.014-04:00</published><updated>2010-09-26T14:42:26.601-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:42:26.601-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="simple mac backups" /><category scheme="http://www.blogger.com/atom/ns#" term="mac osx backups" /><category scheme="http://www.blogger.com/atom/ns#" term="backups for mac" /><category scheme="http://www.blogger.com/atom/ns#" term="ssh" /><category scheme="http://www.blogger.com/atom/ns#" term="rsync" /><category scheme="http://www.blogger.com/atom/ns#" term="hosting" /><title>Simple Backups for your Mac</title><content type="html">&lt;p&gt;You are probably well aware of the need for offsite backups; as a technology professional this is one of the first arrangements I look into for any permanent storage of business information. When I started two years ago for an internal "startup" for a large company, one of the first things we did was set up an SVN repository and then work out an arrangement with an offsite data storage provider. However, the cobbler's children have no shoes: I've never set up a proper backup scheme for my own data at home, and it's about time to take care of business.&lt;/p&gt;

&lt;p&gt;Fortunately, now that we've moved to using Macs at home and with the advent of cheap UN*X hosting providers, it's about time I stopped putting this off. The scheme here is pretty simple: get a hosted Linux server from someone like 1and1.com, dreamhost.com, or rackspace.com where the storage is backed up and they take care of security updates for the OS. Then set up a pretty simple combination of the UN*X utilities rsync, ssh, cron, and bash scripts to get secure nightly backups going. Just to make it more fun, I'm going to challenge myself to have this all working in under an hour! I'll keep notes as I'm doing it as to how long it takes, not counting the writeup before or afterward.&lt;/p&gt;

&lt;p&gt;I decided to register a domain name with a hosting provider, since it was included. My basic requirements were:
&lt;ul&gt;
&lt;li&gt;SSH access&lt;/li&gt;
&lt;li&gt;rsync installed&lt;/li&gt;
&lt;li&gt;enough storage for my data&lt;/li&gt;
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;Be sure to acquire the following information from your hosting provider:
&lt;ul&gt;
&lt;li&gt;username/password with SSH access (preferably root, if you want to use the server for other purposes, but this is not necessary)&lt;/li&gt;
&lt;li&gt;IP address&lt;/li&gt;
&lt;li&gt;SSH host key of the server&lt;/li&gt;
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;I ended up registering a new domain with at dreamhost.com at $9.95/month. As it happened, DreamHost was running a promotion with unlimited disk space and bandwidth for the lifetime of my account. Score! I did have to email tech support to get the ssh host key. If you find yourself in a similar position, you can ask for the output of:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
$ ssh-keygen -l -f ssh_host_rsa_key.pub
2048 0e:c2:f6:f4:d9:86:9d:4b:c4:3d:77:e7:a4:bb:59:14 ssh_host_rsa_key.pub
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Ok, great! Now you have a destination for your offsite storage. Next step is to make sure we can securely log in over the network (we'll use ssh for this). On the Mac you want to back up, open up a Terminal window, and ssh into your server using your username and the server's hostname, as in the following. &lt;b&gt;N.B. Do not finish connecting if the ssh server host key you got from your hosting provider does not match the key you see when you try this!&lt;/b&gt;&lt;/p&gt;


&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ jonm$ ssh jonm@backup.dreamhost.com
The authenticity of host 'backup.dreamhost.com (67.205.39.2)' can't be established.
RSA key fingerprint is 0e:c2:f6:f4:d9:86:9d:4b:c4:3d:77:e7:a4:bb:59:14.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'backup.dreamhost.com,67.205.39.2' (RSA) to the list of known hosts.
jonm@backup.dreamhost.com's password:
[backup]$
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Ok, so far so good. Now we need to make sure we can do it without needing a password; this is where user ssh keys come into play. First, let's create an ssh key to use for backups. We'll want to do this as the root user on our Mac, so that when we run the backup script out of cron, we won't run into permissions problems. You can use the "sudo" command to become root on your Mac:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ jonm$ sudo su -

WARNING: Improper use of the sudo command could lead to data loss
or the deletion of important system files. Please double-check your
typing when using sudo. Type "man sudo" for more information.

To proceed, enter your password, or type Ctrl-C to abort.

Password: &amp;lt;enter jonm's password on my mac&amp;gt;
macbook:~ root#
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Now we need to create an SSH public/private key pair; this is a similar concept to PGP email encryption/signing; you can read a really interesting description of the chronology behind public key cryptography in the book &lt;a href="http://www.amazon.com/Crypto/dp/B000OIZV9I/ref=sr_1_11?ie=UTF8&amp;s=books&amp;qid=1221879031&amp;sr=8-11"&gt;&lt;i&gt;Crypto&lt;/i&gt;&lt;/a&gt; by Steven Levy. We'll keep the private key locally on our Mac, and take a copy of the public key and copy it securely up to our backup server; then ssh will use the private key when we connect, allowing the backup server to verify using the public key that we are who we say we are, without having to send a password. Nice.&lt;/p&gt;

&lt;p&gt;Specifically, we will want to do the following (still as root):&lt;/p&gt;


&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# mkdir .ssh
macbook:~ root# chmod 700 .ssh
macbook:~ root# ls -ld .ssh
drwx------  2 root  wheel  68 Sep 19 22:01 .ssh
macbook:~ root# ssh-keygen -t dsa
Generating public/private dsa key pair.
Enter file in which to save the key (/var/root/.ssh/id_dsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /var/root/.ssh/id_dsa.
Your public key has been saved in /var/root/.ssh/id_dsa.pub.
The key fingerprint is:
fd:47:1d:a6:ac:d0:7d:fb:a5:17:cf:e2:8a:93:a5:30 root@jon-moores-macbook.local
macbook:~ root#
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Use an empty passphrase (i.e. just hit return when prompted for the passphrase), as this will allow the ssh program to load the key without interaction from you. Also note, however, that anyone who gets root access to your Mac will be able to ssh into your backup server at will. Given that our backup server contains a copy of what this
would-be hacker would be able to see on the actual Mac anyway, I don't really see this being a big risk....&lt;/p&gt;

&lt;p&gt;Now, we need to copy the public key over to the backup server:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# scp .ssh/id_dsa.pub jonm@backup.dreamhost.com:
jonm@backup.dreamhost.com's password:
id_dsa.pub                             100%  619     0.6KB/s   00:00
macbook:~ root#
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;You'll have to verify the server SSH key one more time, because now you are connecting from root rather than from your normal user account. Now we'll tell the backup host to accept a login from this key pair:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
[backup]$ mkdir -p .ssh
[backup]$ chmod 700 .ssh
[backup]$ cat id_dsa.pub &gt; ~/.ssh/authorized_keys
[backup]$ chmod 600 ~/.ssh/authorized_keys
[backup]$ exit
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Now, we should be able to log in without a password from our Mac:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# ssh jonm@backup.dreamhost.com
[backup]$
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Sweet. Now we create a directory where our mirrored filesystems will live:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
[backup]$ mkdir mac-backups
[backup]$ chmod 700 mac-backups
[backup]$ exit
macbook:~ root#
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;The utility we'll use to do the mirroring is the rsync utility, which can be invoked to run securely over ssh. This actually makes a nice backup utility for regular use, as the rsync protocol is actually pretty smart about being able to find just the small subsets of data that changed since the last sync; after the first big sync, for most personal file use, there won't be much work to do every night.&lt;/p&gt;

&lt;p&gt;For now, let's set up a test directory on our local Mac.&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# mkdir /tmp/back-me-up
macbook:~ root# echo "data" &gt; /tmp/back-me-up/afile.txt
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Now, to make the magic happen, we do this:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# rsync -avz -e ssh /tmp/back-me-up jonm@backup.dreamhost.com:mac-backups
building file list ... done
back-me-up/
back-me-up/afile.txt

sent 116 bytes  received 40 bytes  62.40 bytes/sec
total size is 5  speedup is 0.03
macbook:~ root#
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Now we can keep a window open on our backups host, and we should see everything show up there:&lt;/p&gt;


&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
[backup]$ ls -lR mac-backups
mac-backups:
total 4
drwxr-xr-x 2 jonm pg1807352 4096 2008-09-19 19:11 back-me-up/

mac-backups/back-me-up:
total 4
-rw-r--r-- 1 jonm pg1807352 5 2008-09-19 19:11 afile.txt
[backup]$
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Just for fun, run the same rsync command above and see that nothing happens if there have been no changes (or rather, just that a very small amount of data gets exchanged to verify no changes).&lt;/p&gt;

&lt;p&gt;Let's just make sure changes show up:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# echo "changed-data" &gt; /tmp/back-me-up/afile.txt
macbook:~ root# rsync -avz -e ssh /tmp/back-me-up jonm@backup.dreamhost.com:mac-backups
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;(other window)&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
[backup]$ cat mac-backups/back-me-up/afile.txt
changed-data
[backup]$
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Ok, looking good. Next step is to identify all the directories you want to back up; let's keep a list of them in a config file on our mac:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;&lt;pre&gt;
macbook:~ root# mkdir -p /usr/local/etc
macbook:~ root# cat - &gt; /usr/local/etc/backups.conf
/Users/jonm/Documents
/tmp/back-me-up
macbook:~ root#
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Note that it is important *not* to have trailing slashes on these directory names, as this changes rsync's behavior slightly in a way that you will probably find annoying (it won't copy the directory name over, just the contents).&lt;/p&gt;

&lt;p&gt;Ok, now the next step is to set up a script that can sync each of the directories:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# mkdir -p /usr/local/bin
macbook:~ root# touch /usr/local/bin/do-backups
macbook:~ root# chmod 700 /usr/local/bin/do-backups
macbook:~ root# cat - &gt; /usr/local/bin/do-backups
#!/bin/sh
for dir in `cat /usr/local/etc/backups.conf`; do
  rsync -avz -e ssh $dir jonm@backups.dreamhost.com:mac-backups
done
macbook:~ root#
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Now we run it once by hand to make sure it works:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# /usr/local/bin/do-backups
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Finally, we install this in root's crontab as follows:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# crontab -l &gt; /tmp/root.cron
macbook:~ root# cat - &gt;&gt; /tmp/root.cron
# take a backup every day at 3am
0 3 * * * /usr/local/bin/do-backups &gt;/dev/null
macbook:~ root# crontab /tmp/root.cron
macbook:~ root# rm /tmp/root.cron
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;Nice and simple. Now the backups are off and running every night without your intervention.&lt;/p&gt;

&lt;p&gt;If you ever need to restore from the backup, you can always reverse the rsync process like this:&lt;/p&gt;

&lt;div style="margin: 5px; border: thin solid black; padding: 5px;"&gt;
&lt;pre&gt;
macbook:~ root# rsync -avz -e ssh jonm@backup.dreamhost.com:mac-backups/back-me-up /tmp
&lt;/pre&gt;
&lt;/div&gt;

&lt;p&gt;for each of the directories you have backed up over there.&lt;/p&gt;

&lt;p&gt;Enjoy, and sleep well tonight....&lt;/p&gt;

P.S. Total elapsed time for the exercise was 2 hours from the time I placed the hosting order to the time the crontab was installed, but I took a one hour break in the middle for dessert and bedtime with the kids. So I'll claim this really did only take one hour of "CPU time" for me.&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-1292130154490868674?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/s6ZdimGNAJLoX2l6fgyToyt7c-Y/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/s6ZdimGNAJLoX2l6fgyToyt7c-Y/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/s6ZdimGNAJLoX2l6fgyToyt7c-Y/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/s6ZdimGNAJLoX2l6fgyToyt7c-Y/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/1292130154490868674/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=1292130154490868674" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1292130154490868674?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1292130154490868674?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/DZi-uUzcCLk/simple-backups-for-your-mac.html" title="Simple Backups for your Mac" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/09/simple-backups-for-your-mac.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEEAR304fCp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-21822464782177675</id><published>2008-09-02T15:50:00.010-04:00</published><updated>2010-09-26T14:44:06.334-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:44:06.334-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="process improvement" /><category scheme="http://www.blogger.com/atom/ns#" term="metrics" /><category scheme="http://www.blogger.com/atom/ns#" term="lean engineering" /><category scheme="http://www.blogger.com/atom/ns#" term="kaizen" /><title>Measure your improvements</title><content type="html">&lt;p&gt;Metrics are an important part of any development group's toolset. If we want to continually improve our ability to develop software (through a &lt;a href="http://en.wikipedia.org/wiki/Lean_manufacturing" title="get more information about lean engineering and manufacturing methods"&gt;lean engineering&lt;/a&gt; &lt;a href="http://en.wikipedia.org/wiki/Kaizen" title="get more information about kaizen, a process of continuous improvement"&gt;&lt;i&gt;kaizen&lt;/i&gt;&lt;/a&gt; approach, or simply as a &lt;a href="http://home.nycap.rr.com/klarsen/learnorg/" title="get more information about theories about how organizations learn as individual entities"&gt;learning organization&lt;/a&gt;), then we need to have a way to figure out:
&lt;ul&gt;
&lt;li&gt;what parts of our process need improvement?
&lt;li&gt;when we make a change, did it help or hurt?
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;This is where process metrics come into play. I'll start with my definition of a &lt;em&gt;metric&lt;/em&gt;, which is a numerical measurement of something. If you can count it, it can be a metric. So "number of outstanding bugs" is a metric, but "software quality" is not. The term "quantitative metric" is redundant, and "qualitative metric" is an oxymoron.&lt;/p&gt;

&lt;p&gt;There are generally two types of metrics we can capture:
&lt;ol&gt;
&lt;li&gt;&lt;b&gt;causal metrics&lt;/b&gt;: these are metrics that have a direct business impact: for example, ROI for a feature, unique monthly visitors, click-through ad rate, &lt;em&gt;etc&lt;/em&gt;.
&lt;li&gt;&lt;b&gt;symptomatic metrics&lt;/b&gt;: these are metrics that do not directly affect ROI (although we might &lt;em&gt;believe&lt;/em&gt; they do) but are downstream indicators for one or more causal metrics. The number of outstanding bugs in a product, the number of bugs caught in a certain phase of development, percentage of code covered by unit tests, &lt;em&gt;etc&lt;/em&gt;. are all symptomatic metrics.
&lt;/ol&gt;&lt;/p&gt;

&lt;p&gt;My general observation, based on reading articles around the use of metrics for improving your processes, is that a lot of metrics-based improvement projects fail to distinguish between these two types of metrics. Partly, I think this is because while the causal metrics properly align your improvement efforts with your business's interests, they are also harder to define and measure. By contrast, a lot of symptomatic metrics are easy to find and measure, but their relationship to the business may be less clear.&lt;/p&gt;

&lt;p&gt;For example, consider the balance between software quality and time to market. You can take a longer time when developing a feature or product to reduce the number of bugs that show up at deployment, or you can ship a feature more quickly, knowing that there may be both known and unknown bugs present. In this case, you can measure both number of known bugs at deployment time, and you can measure overall time-to-production for a feature (time from feature conception to deployment).&lt;/p&gt;

&lt;p&gt;Now, if you can decrease time-to-production without increasing the bugginess of your code, that's a win. Similarly, if you can reduce bugginess without lengthening your production time, that's a win. However, both of these things will probably require some effort to implement. Another interesting possibility would be to simply make an adjustment of where you sit on this balance. For example, simply spend more time looking for and fixing bugs in your QA phase, to tradeoff fewer bugs for a slower time-to-market. Or vice versa to get to market more quickly, possibly with more bugs. Both of these adjustments are probably relatively painless to implement, in that no one has to change what they do, just how long they do it for.&lt;/p&gt;

&lt;p&gt;So the question is, which one of these things ought we to do? My argument is that bugginess and time-to-production, both being &lt;em&gt;symtomatic&lt;/em&gt; metrics, don't give us the answer directly. It all depends on our product environment. For example, when producing software to run medical equipment, a company reputation for quality might be more important than shipping new features quickly; or, in a highly innovative internet space, time to market might be king in terms of how much market share you can capture.&lt;/p&gt;

&lt;p&gt;It's management's job to both help implement win-win changes as well as to set the "slider" of the quality/time tradeoff at the right spot. The trick, of course, is that it might be hard to measure this directly; there are several symptomatic things we could measure, including:
&lt;ul&gt;
&lt;li&gt;time spent in development
&lt;li&gt;time spent in QA
&lt;li&gt;time spent in deployment
&lt;li&gt;number of outstanding bugs
&lt;li&gt;profit for the product at a given point in time
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;Now really, the profit over some window of time (e.g. for a website, revenue vs. development spend for a given month) is the thing we want to optimize. The interesting idea here is for management to be able to run a series of experiments: if I increase/decrease QA or development time, how does it affect ROI for my product? How does the relative bugginess of a release affect its profitability? For certain "sliders" in the business, it is relatively simple to take a series of measurements to find a current "sweet spot".&lt;/p&gt;

&lt;p&gt;An interesting idea here is that sometimes we work through things backwards. For example, we try to estimate "how long will it take to fully regression test a release", or "how long will it take to code up a feature", rather than "how buggy will the release be if we test it for X amount of time", or "how much of this functionality can you develop in X amount of time." In other words, rather than deriving the time-to-market from a set of estimates for all the steps, instead &lt;em&gt;set&lt;/em&gt; the time to market by timeboxing those steps, and see what the outcome is. This is a powerful notion of metrics-based management that is hinted at (in the notion of &lt;a href="http://www.controlchaos.org/" title="find out more about the scrum development framework"&gt;Scrum&lt;/a&gt; timeboxed iterations) but which I have not seen explicitly suggested anywhere[&lt;a href="#footnote1"&gt;1&lt;/a&gt;]. (Please post all the references to things that I've missed in the comments section--I'm sure there are plenty).&lt;/p&gt;

&lt;p&gt;At the end of the day, however, it is hard to optimize things we can't measure. I think important metrics to gather are:
&lt;ul&gt;
&lt;li&gt;the levers we have available to manipulate our process (e.g. timeboxing)
&lt;li&gt;causal metrics that affect our business (ROI, product profit)
&lt;/ul&gt;
We need to be aware which metrics are causal, and which are merely symptomatic, so that we are measuring things that directly affect the business somehow. This approach permits empirical management--adjust something you can control, see how it affects your causal metrics, rinse, repeat.&lt;/p&gt;

&lt;hr/&gt;

&lt;p&gt;&lt;a name="footnote1"&gt;[1]&lt;/a&gt; Scrum timeboxes an entire iteration, but does not timebox an individual feature, so a team may be able to spend all their time on one feature, or spread their effort across many features. The closest thing I've seen here is the notion of the "&lt;b&gt;S&lt;/b&gt;mall" in &lt;a href="http://codeartisan.blogspot.com/2008/02/investing-in-user-stories.html" title="find out more about the INVEST model of desirable user story characteristics"&gt;INVEST user stories&lt;/a&gt;, where stories are limited to a certain amount of complexity. However, the story points in this case are still estimates of the work involved, rather than timeboxes around how much time to spend implementing a feature; the "small" requirement is really to permit more accurate estimation rather than to timebox the amount of effort (although it does secondarily have this effect, I've not seen this stressed in articles about this).&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-21822464782177675?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/vn4LWANGsCOmXGkzveN8yD_Dohc/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/vn4LWANGsCOmXGkzveN8yD_Dohc/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/vn4LWANGsCOmXGkzveN8yD_Dohc/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/vn4LWANGsCOmXGkzveN8yD_Dohc/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/21822464782177675/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=21822464782177675" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/21822464782177675?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/21822464782177675?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/JUKu3PY5jKo/measure-your-improvements.html" title="Measure your improvements" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/09/measure-your-improvements.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE8BSXYyfip7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-6241208165467935170</id><published>2008-08-03T12:57:00.004-04:00</published><updated>2010-09-26T14:47:38.896-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:47:38.896-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="configuration debt" /><category scheme="http://www.blogger.com/atom/ns#" term="transition debt" /><category scheme="http://www.blogger.com/atom/ns#" term="architectural debt" /><category scheme="http://www.blogger.com/atom/ns#" term="technical debt" /><title>Cracking down on technical debt</title><content type="html">&lt;p&gt;"Simplicity is the ultimate sophistication." --Leonardo da Vinci.&lt;/p&gt;

&lt;p&gt;"Everything should be made as simple as possible, but no simpler." --Albert Einstein&lt;/p&gt;

&lt;p&gt;"A designer knows he has achieved perfection not when there is nothing left to add, but when there is nothing left to take away."
--Antoine de Saint-Exupery&lt;/p&gt;

&lt;p&gt;I've written before about the notion of &lt;a href="http://codeartisan.blogspot.com/2007/12/whats-apr-on-your-technical-debt.html"&gt;technical debt&lt;/a&gt;. In this post, I want to discuss a few specific sources of technical debt that are easy to accrue, particularly in an agile, iteration-based setting.&lt;/p&gt;

&lt;h3&gt;Incomplete technology transitions&lt;/h3&gt;

&lt;p&gt;These can arise when a technical decision gets made to transition from one technology/architecture/design to another, and the transition happens incrementally. What can end up happening is that an agile team, say one operating under the &lt;a href="http://www.controlchaos.com/"&gt;Scrum&lt;/a&gt; framework, does not complete its incremental transition during the current sprint. Now, although the code is in a working state, there is a good chunk of technical debt arising from having code operating under two separate systems. This &lt;em&gt;transition debt&lt;/em&gt; is problematic for a few reasons:&lt;/p&gt;

&lt;p&gt;First, this can complicate debugging efforts -- when there is a problem with the system, someone has to determine under which scheme the code in question was written. Typically this can mean looking in two different source code hierarchies, or looking through two separate sets of configuration. The system is, as a result, more complicated than it needs to be.&lt;/p&gt;

&lt;p&gt;Secondly, this can be an attractor for additional debt; if the old system is still around, and a developer is more familiar with the old system than the new, there is a very strong temptation to make changes/additions in the old system. This work simply adds to the outstanding transition work, and despite the developer's familiarity, is likely to be implemented in a more difficult or less efficient way (assuming, of course, there were valid technical reasons for making the transition in the first place). &lt;/p&gt;

&lt;p&gt;Finally, this can cause extra work to happen during feature development that touches/interacts with the subsystem in transition, because either the cooperating subsystems have to special case two different interaction styles, or an adaptation layer has to be built to handle both subsystems and abstract their existence away from clients' concerns. Either way, you are writing more code that you would have if you had completed the transition and you just had one implementation of the subsystem.&lt;/p&gt;

&lt;p&gt;Teams working on an iteration-based methodology need to do several things to avoid the pitfalls from transition debt:
&lt;ol&gt;
&lt;li&gt;when a technical decision for a transition has been made, it must be communicated clearly to the whole development team, including the reasons for the transition. This can help prevent the unintentional accrual of additional transition debt.
&lt;li&gt;plan for more refactoring time when signing up for work, to leave time to complete transitions before an iteration ends.
&lt;li&gt;communicate the existence of the transition debt to the Product Owner at the review, so that completing the transition can be scheduled as a backlog item. Furthermore, stress the priority of this carryover work to ensure that the transition debt exists for the shortest amount of time possible.
&lt;/ol&gt;&lt;/p&gt;

&lt;h3&gt;Obsolete/extraneous configuration&lt;/h3&gt;

&lt;p&gt;We'll call this type of technical debt &lt;em&gt;configuration debt&lt;/em&gt;. There are a couple of sources of this type of debt:
&lt;ol&gt;
&lt;li&gt;transitional runtime configuration that still exists after the transition. For example, when a data partner was making an id space transition to extend the length of their ids, we had a flag to govern whether to use old or new ids with that partner, so that we could decouple our code releases from the partner's transitions. Over a year later, the flag still exists, but it is always set to use "new" ids, so there is certainly unneeded code to handle this.
&lt;li&gt;exposing properties that would only change with a code drop as runtime configuration. In this case, the values of the properties would really only change if we rolled a new code release, so they could just as easily be compile-time constants that would not require the scaffolding to make them runtime properties, no matter how simple that scaffolding might be.
&lt;/ol&gt;&lt;/p&gt;

&lt;p&gt;Unnecessary code hurts you in several ways:
&lt;ul&gt;
&lt;li&gt;it took someone time to write it in the first place
&lt;li&gt;you have to compile it or run its unit tests over and over again while you're developing (death of a thousand cuts)!
&lt;li&gt;people need to keep it in their mental model of the system instead of leaving room for parts of the system that actually do something useful
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;The easiest way to prevent the accrual of configuration debt is to review any new runtime configuration parameters at the end of each sprint (which you probably have to do anyway so your operations folk know how to properly configure the new system). Then, where possible:
&lt;ul&gt;
&lt;li&gt;turn as many runtime parameters into compile-time constants as possible
&lt;li&gt;ask under what conditions the parameters will no longer be needed (for example, for configuration that assists with an external transition, add an item to the product backlog to clean up the codebase after the transition is successful)
&lt;/ul&gt;&lt;/p&gt;

&lt;h3&gt;Obsolete/insufficient architecture and design&lt;/h3&gt;

&lt;p&gt;&lt;em&gt;Architectural debt&lt;/em&gt; is probably the most nefarious, because this is debt that doesn't actually get created when the code is first written. Instead, this is usually caused by external factors such as:
&lt;ul&gt;
&lt;li&gt;business environment changes and expected traffic is significantly different than originally anticipated. This can leave you either with an overly complex, over-engineered system, or with a too-simple system that can't easily scale.
&lt;li&gt;product direction changes, and the architecture is not flexible along the new axis of change, so that new development is overly difficult.
&lt;li&gt;expected performance of a new provisional architecture is invalidated by experience
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;Basically, as soon as you realize you need to change your architecture, you have magically "created" technical debt out of all the code that depended on the first architecture. In reality, this debt is probably unavoidable, and what you've really done is convert your inability to perfectly predict the future into a set of work that incorporates new knowledge about the problem domain.&lt;/p&gt;

&lt;p&gt;This can also be hard to identify by scrutinizing the code, but there are some external symptoms of it:
&lt;ul&gt;
&lt;li&gt;difficulty meeting desired performance or scalability targets, especially when concentrated in a certain feature subsystem
&lt;li&gt;adding new instances of a certain class of feature does not get easier over time
&lt;li&gt;lots of bugs being generated by a specific subsystem
&lt;li&gt;increased time-to-market for new features
&lt;li&gt;accleration of bug creation rates
&lt;li&gt;accrual of standard operational processes that require manual intervention/support
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;So when you have some or (gulp) all of those symptoms, you probably have architectural debt lurking in your system. Once you have identified it and have a new target architecture, a lot of this will get converted into transitional debt while you are making the changes.&lt;/p&gt;

&lt;h3&gt;Technical debt vs. technical investment&lt;/h3&gt;

&lt;p&gt;I want to be careful here to distinguish between two sorts of non-functional requirements that might show up on a "tech backlog":
&lt;ul&gt;
&lt;li&gt;&lt;b&gt;technical debt&lt;/b&gt;: this is current brokenness or unneeded complexity in the system that is actively slowing down the business of turning product backlog into working software for your customer.
&lt;li&gt;&lt;b&gt;technical investment&lt;/b&gt;: these are things that are not necessarily broken &lt;em&gt;per se&lt;/em&gt;, but which could speed things up for someone. A good example of this would be automating a manual process.
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;Technical investments can probably be put off while you have existing technical debt, although it can sometimes be hard to distinguish between the two. Clear technical &lt;em&gt;debt&lt;/em&gt; should probably be prioritized at the top of a product backlog, unless there are really high ROI items that might trump it. In general, getting rid of technical debt will increase the ROI of everything else on the backlog, simply by decreasing the "I" part. It can also make estimation more accurate by reducing the complexity of the system to which new functionality will be added. &lt;/p&gt;

&lt;h3&gt;Whose responsibility is technical debt?&lt;/h3&gt;

&lt;p&gt;Generally, as the folks with the technical ability to recognize it, it is the development team's responsibility to try to avoid accruing technical debt while producing product. Failing that, it is their responsibility to recognize/document existing debt and to advocate for its removal. However, note that there are often symptoms of technical debt, such as those I've listed above for architectural debt, that can be recognized by non-technical folks too.&lt;/p&gt;

&lt;p&gt;On the flip side, business folks / product owners need to be able to trade off short term wins that accrue technical debt vs. taking longer to produce a product with less debt. Communication with the tech team is of vital importance here; undoubtedly there will be times when a short-term win will be important (especially with a first-to-market situation), but it needs to be accompanied by a plan to eliminate the accrued debt. i.e. Treat your technical debt like credit card debt that should be paid down ASAP, and not as a long-term mortgage.&lt;/p&gt;

&lt;p&gt;The interest on your technical debt is probably &lt;em&gt;not&lt;/em&gt; tax-deductible.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-6241208165467935170?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/CStsDghoTY7xLneP8sLFm0G-boo/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/CStsDghoTY7xLneP8sLFm0G-boo/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/CStsDghoTY7xLneP8sLFm0G-boo/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/CStsDghoTY7xLneP8sLFm0G-boo/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/6241208165467935170/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=6241208165467935170" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/6241208165467935170?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/6241208165467935170?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/LpfpEWKiOCQ/cracking-down-on-technical-debt.html" title="Cracking down on technical debt" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/08/cracking-down-on-technical-debt.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE4GQXY6eip7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-6811819741886821371</id><published>2008-04-18T19:11:00.005-04:00</published><updated>2010-09-26T14:48:40.812-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:48:40.812-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="product owner" /><category scheme="http://www.blogger.com/atom/ns#" term="timeboxing" /><category scheme="http://www.blogger.com/atom/ns#" term="teams" /><category scheme="http://www.blogger.com/atom/ns#" term="prioritization" /><category scheme="http://www.blogger.com/atom/ns#" term="negotiation" /><category scheme="http://www.blogger.com/atom/ns#" term="scrum" /><title>The Crucible</title><content type="html">&lt;p&gt;We've recently re-adjusted our sprint length to be 3 weeks, down from 4. The reasoning behind this was to allow us to align with sprints on other products, to give us a uniformity of scheduling and to permit inter-product developer swaps from sprint to sprint, if needed.&lt;/p&gt;

&lt;p&gt;One of the side effects is that our planning process now takes up proportionately more time of the sprint and so the actual work time of the sprint is quite compressed. Couple this with a handful of developers being out for personal reasons (wedding, health issues, impending birth of a child) and suddenly this sprint had a lot of high priority sprint backlog and not a lot of available story point capacity on the teams.&lt;/p&gt;

&lt;p&gt;But then something magical happened--what I'll call the "crucible moment." The status of the sprint was that there were a bunch of high priority, smaller maintenance tasks (cleanup of existing features, releasing the previous sprints' work to production, etc.) and then one big new feature. As the teams were working their way down the list of user stories and filling up their story points, everyone quickly realized that that big new feature might not fit into the sprint.&lt;/p&gt;

&lt;p&gt;With the purifying fire of timeboxing (forgive my overly dramatic metaphors here), the sprint teams and product teams immediately began self-organizing and negotiating. No one wanted to have a sprint without a killer feature, and so the horse trading began. Some of the higher priority stories were off the "tech backlog" -- non-functional investments in build infrastructure, etc. Developers began identifying tech backlog items that could stand to wait. Product owners began reconsidering some of the higher priority smaller stuff, conceding that some of them might not be so important after all. Different development teams tried to juggle stories between themselves so that the large story could get onto one team's sprint backlog (we prefer not to split stories across teams, to minimize cross-team dependencies). Some stories were scoped down to take less time.&lt;/p&gt;

&lt;p&gt;And then, when it was all done, we discovered there was enough room for &lt;b&gt;two&lt;/b&gt; pretty big features.&lt;/p&gt;

&lt;p&gt;This was scrum at its best--focusing on the art of the possible to squeeze as much as possible into a fixed amount of time. No time for extraneous explorations, just a focus on extracting the maximum product value out of the available resources, with a brutal willingness to critically re-examine priorities. What's more, it was carried out in a context of teamwork between the product owners and the teams; &lt;b&gt;everyone&lt;/b&gt; was trying to figure out how to get those features in.&lt;/p&gt;

&lt;p&gt;So, a great day for our product and for our organization. We might actually be getting the hang of this scrum thing....&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-6811819741886821371?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/FtHQRSH8Kw_Rm6S0T1w7zGDZVCw/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/FtHQRSH8Kw_Rm6S0T1w7zGDZVCw/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/FtHQRSH8Kw_Rm6S0T1w7zGDZVCw/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/FtHQRSH8Kw_Rm6S0T1w7zGDZVCw/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/6811819741886821371/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=6811819741886821371" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/6811819741886821371?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/6811819741886821371?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/dNW4LHCym30/crucible.html" title="The Crucible" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/04/crucible.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE4MRn87fyp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-8090721625848513962</id><published>2008-03-26T23:14:00.004-04:00</published><updated>2010-09-26T14:49:47.107-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:49:47.107-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="timeboxing" /><category scheme="http://www.blogger.com/atom/ns#" term="teams" /><category scheme="http://www.blogger.com/atom/ns#" term="sprint review" /><category scheme="http://www.blogger.com/atom/ns#" term="burndown" /><category scheme="http://www.blogger.com/atom/ns#" term="scrum master" /><category scheme="http://www.blogger.com/atom/ns#" term="estimation" /><category scheme="http://www.blogger.com/atom/ns#" term="scrum" /><title>Sprint Review</title><content type="html">&lt;p&gt;Just wanted to follow up on my &lt;a href="http://codeartisan.blogspot.com/2008/02/back-to-basics.html"&gt;previous post&lt;/a&gt; about how we were going to get back to scrum basics this sprint. We had our sprint review and sprint retrospectives today, and the sprint was viewed as a great success.&lt;/p&gt;

&lt;p&gt;Here are some things that I think contributed to our success:&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Better estimation&lt;/b&gt;: We used past task estimation history plus monte-carlo simulation, as I &lt;a href="http://codeartisan.blogspot.com/2007/12/scrum-thoughts-improving-individual.html"&gt;described earlier&lt;/a&gt;, to sign up for a set of achievable tasks. The end result was that we showed up at the review with 90-95% of the committed work done. The parts that didn't get done were due to external blockers we couldn't do anything about (e.g. external partner didn't have a data feed ready) or were due to too many round-trips through the design-IA-product-eng collaboration cycle (essentially, deviating from the plan we set forth at the beginning of the sprint) that took time away from other things.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Commitment&lt;/b&gt;: Towards the end of the set of product backlog our team was planning, we hit a fairly large task that was hard to estimate out. However, we had two team members who were very lightly booked, and we simply asked them if they were willing to commit to finishing the task somehow by the end of the sprint. They signed up, and they got it done. There was really no talk of punting things off the sprint, although there were a couple of rescopings and/or a change of plan due to time constraints that happened to get things done by the review. In general, I think these changes actually just meant the abstract functionality in the user story got built with fewer resources -- i.e. we completed them more efficiently thanks to the pressure of timeboxing.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Empowerment&lt;/b&gt;: Thanks to being committed to their tasks, my teammates basically kept themselves unblocked most of the sprint by engaging with other departments (design, product, IA, ops, QA) early on to make sure they could get what they needed. Most of the time things stayed unblocked by getting the right folks together in one room. One of my teammates mentioned that one of the things I was best able to do was to simply identify which people needed to be in the room! Then he went off and arranged the meetings and got things done himself.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Shippable product&lt;/b&gt;: We planned explicit tasks for each feature to test/verify it, to show QA how it worked, and to demo it and get explicit signoff from the product owner. Our product owner gave me the feedback that this resulted in most of the user stories we called "done" being potentially shippable.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Self-organization&lt;/b&gt;: The team dynamically swapped tasks with one another to load balance effectively during the sprint. There was such appreciation for this help that people started buying each other donuts as thank-yous, and I think we ended up with around 4 dozen donuts being exchanged by the end of the sprint. Bad for the waistline, but good for team morale!&lt;/p&gt;

&lt;p&gt;On another note, this was a re-entry for me into the dual role of scrum master and team member (in that I signed up for development sprint tasks as well as being scrum master). For other developers who find themselves being called on to carry the scrum master mantle, here are some tips to help you survive:
&lt;ul&gt;
&lt;li&gt;&lt;b&gt;Only book yourself half-time&lt;/b&gt;. The rest of the time will be eaten up by scrum mastering, and if you don't scale back on your coding commitments, you're just going to end up staying up late working all sprint.
&lt;li&gt;&lt;b&gt;Tell people how to get unblocked rather than trying to unblock them yourself&lt;/b&gt;. This seems pretty simple, but is a big time saver. For one thing, it's probably quicker to describe what to do (e.g. just set up a meeting with X, Y, and Z) than it is for you to send out the meeting invite yourself. Plus, you remove &lt;em&gt;yourself&lt;/em&gt; as a bottleneck -- so your teammate doesn't have to wait for you to send the invite. Furthermore, after you do this a few times, your teammates will pick it up and will be able to just solve more of their problems themselves without asking you for help.
&lt;li&gt;&lt;b&gt;Get good reports&lt;/b&gt;. I invested 5-6 hours towards the beginning of the sprint to get some automated reports set up to get info out of our ticketing system. I really ended up just using two reports:
&lt;ol&gt;
&lt;li&gt;Storyboard: for each story, identify who has tickets against it, and identify whether each ticket is "Not Started", "In Progress", or "Finished". This provides a quick way to scan the status of each story and reminds you to ask questions like: what do we need to do to close out that story again? why are we working on lower priority things instead of the higher priority ones? etc.
&lt;li&gt;Burndown count: for each story, add up time remaining on each ticket, and then provide a total for the amount of hours left for the sprint.
&lt;/ol&gt;
So what I did was: right before scrum, I pulled up the Storyboard report to check status so I could ask followup questions during scrum. Right after scrum, I hit the burndown count and then actually plotted by hand on a big piece of flipchart paper the burndown graph. That was pretty much the extent of the reporting I needed.
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;So, all in all, things turned out pretty well. I'm excited to see the teams back in a good sprint cycle groove and turning things out (our review meeting lasted almost 5 hours, mainly because so much had gotten done across the multiple teams working on the product that it took a while to go over it all). We'll be mostly keeping things the same going into the next sprint, so I'll be curious to see if the teams are able to accelerate now that they're used to working this way.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-8090721625848513962?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/WPTj7t4HE46BloN2J5AGc4fLOU0/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/WPTj7t4HE46BloN2J5AGc4fLOU0/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/WPTj7t4HE46BloN2J5AGc4fLOU0/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/WPTj7t4HE46BloN2J5AGc4fLOU0/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/8090721625848513962/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=8090721625848513962" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8090721625848513962?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/8090721625848513962?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/Zz1ORsET6CQ/sprint-review.html" title="Sprint Review" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/03/sprint-review.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUcBSXYycSp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-7730707034265885877</id><published>2008-03-16T22:12:00.007-04:00</published><updated>2010-09-26T14:50:58.899-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:50:58.899-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="rest" /><category scheme="http://www.blogger.com/atom/ns#" term="design philosophy" /><category scheme="http://www.blogger.com/atom/ns#" term="web2.0" /><category scheme="http://www.blogger.com/atom/ns#" term="microservices" /><category scheme="http://www.blogger.com/atom/ns#" term="esr" /><category scheme="http://www.blogger.com/atom/ns#" term="unix" /><category scheme="http://www.blogger.com/atom/ns#" term="architecture" /><category scheme="http://www.blogger.com/atom/ns#" term="rest programming" /><title>REST: Unix programming for the Web</title><content type="html">&lt;p&gt;I've been giving some thought to &lt;a href="http://en.wikipedia.org/wiki/Representational_State_Transfer"&gt;REST-style architectures&lt;/a&gt; recently, and recently re-read some of &lt;a href="http://www.catb.org/~esr/writings/taoup/"&gt;The Art of Unix Programming&lt;/a&gt; by &lt;a href="http://www.catb.org/~esr/"&gt;Eric S. Raymond&lt;/a&gt;. ESR notes that some of the characteristics of &lt;a href="http://en.wikipedia.org/wiki/Unix_philosophy"&gt;Unix-style programming&lt;/a&gt; include:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Do one thing, and do it well (attributed to &lt;a href="http://en.wikipedia.org/wiki/Douglas_McIlroy"&gt;Doug McIlroy&lt;/a&gt;) &lt;/li&gt;
&lt;li&gt;Everything is a file.&lt;/li&gt;
&lt;li&gt;Comprise complex systems by connecting smaller, simpler programs (e.g. Unix pipes).&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Unix-style systems have had undeniable success and remarkable stickiness for a technology; I have an old copy of my father's System V manual from when he worked at Bell Labs (yeah, they actually &lt;i&gt;printed&lt;/i&gt; out the man pages and bound them!), and I pretty much recognize everything in there. Sure, there are many new commands available, new kernels, new distributions, etc., but they are all very recognizably &lt;i&gt;Unixy&lt;/i&gt;.&lt;/p&gt;

&lt;p&gt;I've been thinking about how this philosophy applies to the Web 2.0 world. I think this list turns into:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Do one thing, and do it well.&lt;/li&gt;
&lt;li&gt;Everything is a RESTful service.&lt;/li&gt;
&lt;li&gt;Comprise complex systems by interconnecting smaller, simpler services.&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;For one thing, "do one thing, and do it well" isn't limited to Unix, this is a key part of abstracting and decomposing a technical problem. But we see it everywhere: I read my mail on &lt;a href="http://mail.google.com/"&gt;Gmail&lt;/a&gt;, keep my to-do lists on &lt;a href="http://www.rememberthemilk.com/"&gt;Remember The Milk&lt;/a&gt;, store photos on &lt;a href="http://www.flickr.com/"&gt;Flickr&lt;/a&gt;, keep my browser bookmarks on &lt;a href="http://del.icio.us/"&gt;del.icio.us&lt;/a&gt;, etc. All these sites adhere to this principle.&lt;/p&gt;

&lt;p&gt;But taking things down a layer, what does this mean to someone architecting or implementing a Web 2.0 service? For one thing, I think this means that it would make sense to break your overall service down into individual &lt;i&gt;microservices&lt;/i&gt; that are individually maintained and deployed; i.e. break things down to the smallest level where they still make coherent sense.&lt;/p&gt;

&lt;p&gt;As for "everything is a REST service", the everything-is-a-file abstraction worked for Unix because there was a small set of common operations that applied to files (&lt;tt&gt;open&lt;/tt&gt;, &lt;tt&gt;close&lt;/tt&gt;, &lt;tt&gt;dup&lt;/tt&gt;, &lt;tt&gt;read&lt;/tt&gt;, &lt;tt&gt;write&lt;/tt&gt;). Sounds a lot like doing REST over HTTP, with HEAD, GET, PUT, POST, DELETE.&lt;/p&gt;

&lt;p&gt;Combining various small REST microservices into larger services is already being done (this is, after all, basically what &lt;a href="http://www.google.com/ig"&gt;iGoogle&lt;/a&gt; is) on a one-off basis. The main question is: &lt;b&gt;what is the Web 2.0 equivalent of the pipe?&lt;/b&gt; Namely, is there an easily understood abstraction for composing webservices? Sounds like a topic that might be rife for some kind of logical calculus (like the &lt;a href="http://en.wikipedia.org/wiki/Relational_calculus"&gt;relational calculus&lt;/a&gt; for databases or the &lt;a href="http://en.wikipedia.org/wiki/Pi_calculus"&gt;pi-calculus&lt;/a&gt; for concurrent processes), e.g. the &lt;em&gt;REST calculus&lt;/em&gt;. If there were a couple of easily understand and specifiable combinators, these could be pretty easily built into some language-specific libraries for use in quickly building some new macro-services.&lt;/p&gt;

&lt;p&gt;I might try to interest some of my old colleagues from my programming language research days to see if they have anything to say about the matter....&lt;/p&gt;

&lt;p&gt;Comments definitely welcome here. Is this a new idea? Are others espousing this? Is this even a good idea?&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-7730707034265885877?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/mB5HCOfeYJqDUhQnNfrAVQh7gxw/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mB5HCOfeYJqDUhQnNfrAVQh7gxw/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/mB5HCOfeYJqDUhQnNfrAVQh7gxw/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mB5HCOfeYJqDUhQnNfrAVQh7gxw/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/7730707034265885877/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=7730707034265885877" title="6 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/7730707034265885877?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/7730707034265885877?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/RNwUxDzsHk4/rest-unix-programming-for-web.html" title="REST: Unix programming for the Web" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>6</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/03/rest-unix-programming-for-web.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUYEQX49eyp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-1646420855191129533</id><published>2008-02-28T23:53:00.004-05:00</published><updated>2010-09-26T14:51:40.063-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:51:40.063-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="INVEST" /><category scheme="http://www.blogger.com/atom/ns#" term="pre-planning" /><category scheme="http://www.blogger.com/atom/ns#" term="user stories" /><category scheme="http://www.blogger.com/atom/ns#" term="scrum" /><title>Return on INVESTment</title><content type="html">&lt;p&gt;At our last &lt;a href="http://codeartisan.blogspot.com/2007/10/scrum-thoughts-pre-planning.html"&gt;pre-planning&lt;/a&gt; meeting, we made a point of putting all the user stories into &lt;a href="http://codeartisan.blogspot.com/2008/02/investing-in-user-stories.html"&gt;INVEST&lt;/a&gt; format, and I was pleased that there was a general consensus that this worked well.&lt;/p&gt;

&lt;p&gt;I think this took quite a while, partially because we were all getting used to evaluating the statement of the stories critically, but I think this was worth it. We had product, IA, and engineering folks suggesting wordings for stories, and at least some of our stories came in in a canonical format:&lt;/p&gt;

&lt;p&gt;"As &amp;lt;who&amp;gt;, I want &amp;lt;feature&amp;gt;, so that &amp;lt;value&amp;gt;."&lt;/p&gt;

&lt;p&gt;Having the full INVEST filter did help us with a few things:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;&lt;b&gt;Independent:&lt;/b&gt; We did not have a lot of stories that were dependent, but we did end up with a very few that were cross-team. Since this was just a handful of the stories, we figured it would be ok to leave the story as is (since we couldn't quickly come up with a way to rewrite it) and track the dependencies through our &lt;a href="http://codeartisan.blogspot.com/2007/10/scrum-thoughts-scrums-and-scrums-of.html"&gt;scrum-of-scrums&lt;/a&gt;.
&lt;li&gt;&lt;span style="font-weight:bold;"&gt;Negotiable:&lt;/span&gt; I think we generally nailed this one. We did not look at any IA wireframes or Design mockups during pre-planning, and were able to identify the high-level goals here. This has already become useful in planning, where my team has suggested a new / faster approach to at least one story which was not as originally conceived, but our Product Owner agreed that we still hit the abstract requirement.
&lt;li&gt;&lt;span style="font-weight:bold;"&gt;Valuable:&lt;/span&gt; There were a few stories that were original cast as abstract technical requirements (e.g. make the middleware support foo), but we refactored them in a way that expressed value to the end user (which, incidentally, will make it more obvious how to test).
&lt;li&gt;&lt;span style="font-weight:bold;"&gt;Estimable:&lt;/span&gt; We've already agreed not to do a small handful of stories because we realized there were too many unknowns; for one of these, there was a middleware task that depended on IA we hadn't seen and on data we didn't have in the DB yet. We actually spent a lot of time trying to talk about how to tackle this before we realized we couldn't estimate it. Since it was not critical for implementation this sprint, we agreed with the product owner to turn this into a story where we would do a feasibility study and rough design by the end of the sprint instead, which was something we could commit to. (The value here is to the product owner, who will then be able to write an Estimable story about the feature!)
&lt;li&gt;&lt;span style="font-weight:bold;"&gt;Small:&lt;/span&gt; We did a good job here of refactoring large stories into multiple pieces and then figuring out how to get Value out of each piece. We only had a handful of medium to large stories.
&lt;li&gt;&lt;span style="font-weight:bold;"&gt;Testable:&lt;/span&gt; We're asking each team during planning to make sure they hash out "how to demo" for the feature with the Product Owner, so we should know if we've gotten there by the end of sprint planning.
&lt;/ul&gt;

&lt;p&gt;With the up-front work on the stories we did, our team has found it easier to negotiate a specific solution and in some cases to actually plan without the product owner, which is actually convenient (he's doing multiple duty so he's having to bounce back and forth between multiple sprint planning sessions!).&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-1646420855191129533?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/ZwAKT7vHJcilt3LeyUU6Fap-8qc/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZwAKT7vHJcilt3LeyUU6Fap-8qc/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/ZwAKT7vHJcilt3LeyUU6Fap-8qc/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZwAKT7vHJcilt3LeyUU6Fap-8qc/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/1646420855191129533/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=1646420855191129533" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1646420855191129533?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/1646420855191129533?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/kcf4okgIzs0/return-on-investment.html" title="Return on INVESTment" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/02/return-on-investment.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUUMQH45cCp7ImA9Wx5WFU8.&quot;"><id>tag:blogger.com,1999:blog-631933842798219175.post-4746015782966704588</id><published>2008-02-24T16:21:00.005-05:00</published><updated>2010-09-26T14:54:41.028-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-26T14:54:41.028-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="INVEST" /><category scheme="http://www.blogger.com/atom/ns#" term="product owner" /><category scheme="http://www.blogger.com/atom/ns#" term="scrum master" /><category scheme="http://www.blogger.com/atom/ns#" term="user stories" /><category scheme="http://www.blogger.com/atom/ns#" term="estimation" /><category scheme="http://www.blogger.com/atom/ns#" term="scrum" /><category scheme="http://www.blogger.com/atom/ns#" term="cross-team dependencies" /><title>INVESTing in user stories</title><content type="html">&lt;p&gt;In my &lt;a href="http://codeartisan.blogspot.com/2008/02/back-to-basics.html"&gt;previous post&lt;/a&gt; I made reference to the &lt;a href="http://abrachan.wordpress.com/2007/02/10/invest-an-acronymn-for-getting-the-requirements-right/"&gt;INVEST&lt;/a&gt; acronym for evaluating user stories:
&lt;ul&gt;
&lt;li&gt;&lt;b&gt;(I)ndependent&lt;/b&gt;
&lt;li&gt;&lt;b&gt;(N)egotiable&lt;/b&gt;
&lt;li&gt;&lt;b&gt;(V)aluable&lt;/b&gt;
&lt;li&gt;&lt;b&gt;(E)stimable&lt;/b&gt;
&lt;li&gt;&lt;b&gt;(S)mall&lt;/b&gt;
&lt;li&gt;&lt;b&gt;(T)estable&lt;/b&gt;
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;I'd like to spend a little bit of time talking about each of these characteristics, and motivating why each is important through some anecdotes about what happens when each characteristic is not attained.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Independent&lt;/b&gt;: The idea here is that stories are free of dependencies from one another. (A good test would be to ask if you could implement them in any order, rather than just in the priority order generated by the Product Owner). We want this for multiple reasons: first, on a multi-team project, it allows stories to be load-balanced across teams more easily, since in theory any one story could get moved around, rather than having to move an entire batch of them. Secondly, and on a related note, it means that when sprint teams sign up for work, they can draw the commit line anywhere. Thirdly, it helps avoid cross-team dependencies (although a &lt;a href="http://codeartisan.blogspot.com/2007/10/scrum-thoughts-scrums-and-scrums-of.html"&gt;scrum-of-scrums&lt;/a&gt; and specific attention to dependent situations can handle it, it is still much easier to handle all your dependencies intra-team).&lt;/p&gt;

&lt;p&gt;For a specific example, consider if you have teams that are layered horizontally by functional layer (e.g. a database team, and a webapp team). If you have two stories which are the "halves" of building some functionality, and assign one to each team, you take on the following risks:
&lt;ul&gt;
&lt;li&gt;teams must coordinate closely around this feature (extra communication/tracking overhead)
&lt;li&gt;if one team finishes but the other doesn't, you may have extra "clean up" work at the end of the sprint to make the software still work, and you may have had a team do work that ultimately had no product impact that sprint (e.g. middleware can handle some new data and display it, but data didn't actually apppear in the DB, so no actual behavioral difference)
&lt;/ul&gt;&lt;/p&gt;

&lt;p&gt;We'll see that some of the other INVEST characteristics actually help track this down as well.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Negotiable.&lt;/b&gt; To me, this means that the requirements given are as abstract as possible, so that the actual details of what is going to get built are determined by the team and Product Owner during &lt;a href="http://codeartisan.blogspot.com/2007/10/scrum-thoughts-sprint-planning.html"&gt;sprint planning&lt;/a&gt; and modified as needed over the course of the sprint. For example, "user can change timezone with one click" vs. "user timezone is shown in a 100x25 dropdown box in the header". (Of course, if the latter language actually represented a contractual obligation, then that might be as abstract as you can make it, but you get the idea...).&lt;/p&gt;

&lt;p&gt;This is primarily important for two reasons: to allow the team to exercise maximal creativity and to allow the team to adjust for the unpredictable events that will happen mid-sprint. In the former case, the dropdown box might be tacked on to the story as a starting point or suggestion, but the team may come up with an approach that is easier to implement or which actually satisfies the Product Owner more. In the latter case, it leaves some room to negotiate if the team has under-estimated and is behind mid-sprint, and still be able to "finish" the user story.&lt;/p&gt;

&lt;p&gt;The danger of not being able to negotiate the functionality is really twofold: first, not completing the full set of work in a sprint eventually crushes team morale; not being able to complete an assignment is a big downer, especially when you have worked hard and because, due to the inherent complexity of software development, things just took longer than you thought. Eventually you get to the point where your team will just shrug, and say, oh well, I guess we won't finish that stuff. I have seen this happen first-hand, and it is demotivating.&lt;/p&gt;

&lt;p&gt;Secondly, there is danger to the product roadmap. By not having requirements couched abstractly, you run the risk of slipping features or having unfinished, "carryover" work, which adds up and pushes the product roadmap back. By giving the team the freedom to brainstorm a way to satisfy the requirement in less time, you are not letting them off the hook -- you are using the deadline of the end of the sprint as pressure to encourage the team to develop the functionality as efficiently as possible. The resulting functionality may not be as complex or deep as originally hoped for or conceived, but if the spirit of the story is met, then you have some aspect of it ready to go out as shippable product.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Valuable.&lt;/b&gt; Each user story should provide value somehow. The original articles I read about this amended this to "provide value to the end user / customer." While I think noting the value explicitly can help product owners check off that the story will help their Key Performance Indicators (KPI), I think the more important thing this brings is that the &lt;em&gt;team&lt;/em&gt; is aware of &lt;em&gt;why&lt;/em&gt; this story is important. This can help constrain the solution space for Negotiation in an important way. Finally, this is just a cross-check for Indepencence; if this story is completed, and no other, does it generate value, or do we also need another story to be finished in order to get the value?&lt;/p&gt;

&lt;p&gt;On a side note, the phrase "to the end user" is an interesting one. We keep a "tech backlog" of infrastructure/refactoring ideas around, and prior to each sprint, we evaluate which ones are critical to current development, and ask for those to be prioritized in with the current product backlog. Generally, we have been very judicious about this, maybe in a nod to providing direct benefit to an end user -- we usually wait until an infrastructure adjustment is needed or desirable for implementing new functionality before taking it on. This suggests that other infrastructure work get carried out during lab days, to scratch those developers' itches. I'm still up in the air over how strict we should be about "to the end user", but I do lean towards requiring that most of the time.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Estimable.&lt;/b&gt; If you can't put an estimate on it, either the requirements are too vague (see Testable), or the technical solution is unknown (e.g. "I don't even know if this is feasible!"). In the latter case, a suggested tactic would be to alter the user story into a feasibility study / research effort for this sprint, which could be easily timeboxed; the original story could then be revisited in a later sprint, when there will be less cloudiness around it, and it can be properly estimated.&lt;/p&gt;

&lt;p&gt;Where this can bite you is in signing up for work you don't know you can finish. This sets up for an expectation mismatch with your Product Owner, and also prevents you from making efficient use of your time; you run the risk of attempting to estimate it, and either grossly underestimating it, putting all the lower-priority sprint backlog at risk, of grossly overestimating it and not signing up for enough work, or of padding the estimate to try to account for the risk and then spending more time on it than the feature is really worth.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Small.&lt;/b&gt; We want things that will take no more than a full sprint to do, and hopefully less. While the full scope of a feature vision may require more than one sprint, you want to refactor it somehow so that you get &lt;em&gt;something&lt;/em&gt; out of even the very first sprint (see Valuable); then you can be sure that you reap some result from your effort in shippable product, as opposed to doing some partial work, and then having the remaining work deprioritized for several months before you can extract value.&lt;/p&gt;

&lt;p&gt;Finally, a purely pragmatic reason for keeping the stories small is to reduce the risk of gross under-estimation. I've personally been way off on a single big story (as everyone is from time to time), and when it's been a big one, I've simultaneously trashed my personal life for a month of overtime and sleep deprivation, while requiring a bunch of load-balancing and rejuggling across multiple sprint teams due to the fact that the rest of the sprint backlog I signed up for was now at risk. So don't do that -- take smaller bites, just like Mom used to say.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Testable&lt;/b&gt;. This primarily serves three purposes: ensuring that Product Owner expectations are in-tune with what the team thinks it is delivering (see also Negotiable), giving the team a way to know when it can stop working on the story, and giving the testers a starting point for writing their test cases. Beware of non-quantifiable adjectives like "good" or "acceptable" in your user story descriptions. For a while, when we were doing sprint planning on spreadsheets, we had a "How to Demo" column--this worked great while we did it, but we never had enough discipline to follow through here and continue doing it. This is one of the things I'm hoping to bring back during my &lt;a href="http://codeartisan.blogspot.com/2008/02/back-to-basics.html"&gt;Scrum revival&lt;/a&gt; next sprint.&lt;/p&gt;

&lt;p&gt;There's nothing worse than showing up to a sprint review and having your Product Owner say, "but that's not at all what I asked for, or that's not what I meant." Big morale crusher for everyone involved (team and stakeholders).&lt;/p&gt;

&lt;p&gt;Finally, this gives the Scrum master a hook to save the team from perfectionism or unbounded creativity. For example, if you've gotten the feature to the point where it satisfies the acceptance tests and has been built up to your standards of quality, just stop working on it, and start working on the next user story. This is your old friend, the Pareto principle, at work -- would you rather spend a day mining the long tail of a functionally complete feature, or would you rather spend it getting the up-front meat of a new feature? The other place this helps is when you finish a feature, and you and the Product Owner are looking at it, and you now see something totally awesome that is now possible -- stop, ship the feature you have, and queue the good idea up as a user story for the next sprint so it can be properly prioritized with everything else. Again, this is about efficient use of the time in the sprint.&lt;/p&gt;

&lt;hr/&gt;

&lt;p&gt;I'm anticipating having this be a little painful as we work through this together with the product team the first time; we've all signed off on this in principle, but we've never actually attempted to make each story adhere to INVEST. I suspect, like all similar things, it will be a bit robotic for the first few stories, but we'll quickly get the hang of it and be able to move on during &lt;a href="http://codeartisan.blogspot.com/2007/10/scrum-thoughts-pre-planning.html"&gt;pre-planning&lt;/a&gt;. I'll let you know how it goes.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;p&gt;
&lt;script type="text/javascript"&gt;&lt;!--
google_ad_client = "pub-1136626927500183";
/* 468x60, created 9/8/08 */
google_ad_slot = "4192662419";
google_ad_width = 468;
google_ad_height = 60;
//--&gt;
&lt;/script&gt;
&lt;script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"&gt;
&lt;/script&gt;
&lt;/p&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/631933842798219175-4746015782966704588?l=codeartisan.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/cJNl_SOYgbUgk9T6bTWDuaCn8as/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/cJNl_SOYgbUgk9T6bTWDuaCn8as/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/cJNl_SOYgbUgk9T6bTWDuaCn8as/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/cJNl_SOYgbUgk9T6bTWDuaCn8as/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;</content><link rel="replies" type="application/atom+xml" href="http://codeartisan.blogspot.com/feeds/4746015782966704588/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=631933842798219175&amp;postID=4746015782966704588" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4746015782966704588?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/631933842798219175/posts/default/4746015782966704588?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/codeartisan/~3/a0_WCMnCCCE/investing-in-user-stories.html" title="INVESTing in user stories" /><author><name>Jon Moore</name><uri>http://www.blogger.com/profile/16766484929210129406</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://codeartisan.blogspot.com/2008/02/investing-in-user-stories.html</feedburner:origLink></entry></feed>

