<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">

<channel>
	<title>Reproducible Ideas</title>
	
	<link>http://reproducibleresearch.org/blog</link>
	<description>Promoting reproducible research</description>
	<pubDate>Tue, 05 May 2009 18:33:42 +0000</pubDate>
	<generator>http://wordpress.org/?v=2.7</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" href="http://feeds.feedburner.com/ReproducibleIdeas" type="application/rss+xml" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com" /><item>
		<title>One more RR web site</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/Nzcbg1EpZnU/</link>
		<comments>http://reproducibleresearch.org/blog/2009/04/30/one-more-rr-web-site/#comments</comments>
		<pubDate>Thu, 30 Apr 2009 14:58:08 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=64</guid>
		<description><![CDATA[Just learned about Reproducible Research Planet.
]]></description>
			<content:encoded><![CDATA[<p>Just learned about <a href="http://www.rrplanet.com/">Reproducible Research Planet</a>.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/Nzcbg1EpZnU" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/04/30/one-more-rr-web-site/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/04/30/one-more-rr-web-site/</feedburner:origLink></item>
		<item>
		<title>Plan for merging .org and .net sites</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/wuK9forrk_s/</link>
		<comments>http://reproducibleresearch.org/blog/2009/04/28/plan-for-merging-org-and-net-sites/#comments</comments>
		<pubDate>Tue, 28 Apr 2009 21:29:47 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=61</guid>
		<description><![CDATA[Patrick Vandewalle and I will be combining our efforts to develop a web site to promote reproducible research. He has the domain name reproducibleresearch.net while I have reproducibleresearch.org. His site is better than the one I&#8217;ve developed, so I&#8217;d rather support his effort than continue  my own.
I plan to leave this web site up for [...]]]></description>
			<content:encoded><![CDATA[<p>Patrick Vandewalle and I will be combining our efforts to develop a web site to promote reproducible research. He has the domain name <a href="http://reproducibleresearch.net/index.php/Main_Page">reproducibleresearch.net</a> while I have <a href="http://www.reproducibleresearch.org/">reproducibleresearch.org</a>. His site is better than the one I&#8217;ve developed, so I&#8217;d rather support his effort than continue  my own.</p>
<p>I plan to leave this web site up for a few more weeks and then hand the .org name over to Patrick. During that time, some of the content from this site will be merged into the framework of his new site. Please go over to the new site and participate in the forums.</p>
<p>I plan continue blogging about reproducible research from time to time, but future posts will be on my personal blog, <a href="http://www.johndcook.com/blog/">The Endeavour</a>. I may write a few more posts here regarding the status of the transition.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/wuK9forrk_s" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/04/28/plan-for-merging-org-and-net-sites/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/04/28/plan-for-merging-org-and-net-sites/</feedburner:origLink></item>
		<item>
		<title>New web site devoted to RR</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/hLeDStmv_7U/</link>
		<comments>http://reproducibleresearch.org/blog/2009/04/27/new-web-site-devoted-to-rr/#comments</comments>
		<pubDate>Tue, 28 Apr 2009 02:23:31 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=59</guid>
		<description><![CDATA[Check out the new web site http://www.reproducibleresearch.net by Patrick Vandewalle, Jelena Kovačević, and Martin Vetterli.

]]></description>
			<content:encoded><![CDATA[<p>Check out the new web site <a href="http://www.reproducibleresearch.net">http://www.reproducibleresearch.net</a> by Patrick Vandewalle, Jelena Kovačević, and Martin Vetterli.</p>
<p style="text-align: center"><a href="http://www.reproducibleresearch.net"><img src="http://reproducibleresearch.org/rr.png" alt="Reproducible Research logo" width="150" height="57" /></a></p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/hLeDStmv_7U" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/04/27/new-web-site-devoted-to-rr/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/04/27/new-web-site-devoted-to-rr/</feedburner:origLink></item>
		<item>
		<title>Reproducible Research in Signal Processing</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/WMdtHAiv_ko/</link>
		<comments>http://reproducibleresearch.org/blog/2009/04/23/reproducible-research-in-signal-processing/#comments</comments>
		<pubDate>Fri, 24 Apr 2009 01:13:58 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=57</guid>
		<description><![CDATA[Patrick Vandewalle, Jelena Kovačević, and Martin Vetterli have published a new article &#8220;Reproducible Research in Signal Processing: What, Why, and How&#8221; in IEEE Signal Processing Magazine (37) May 2009.
]]></description>
			<content:encoded><![CDATA[<p>Patrick Vandewalle, Jelena Kovačević, and Martin Vetterli have published a new article &#8220;Reproducible Research in Signal Processing: What, Why, and How&#8221; in IEEE Signal Processing Magazine (37) May 2009.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/WMdtHAiv_ko" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/04/23/reproducible-research-in-signal-processing/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/04/23/reproducible-research-in-signal-processing/</feedburner:origLink></item>
		<item>
		<title>Preserving (the memory of) documents</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/nLtpMIPPlWU/</link>
		<comments>http://reproducibleresearch.org/blog/2009/03/17/preserving-documents/#comments</comments>
		<pubDate>Tue, 17 Mar 2009 17:34:31 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=55</guid>
		<description><![CDATA[The Long Now Foundation has produced a Rosetta disk containing 13,000 pages of information regarding 1,500 human languages. The text is engraved, not encoded. The text starts out large enough to read with the naked eye and becomes continuously smaller, strongly suggesting one should examine the disk under magnification to read further.

Long Now is trying [...]]]></description>
			<content:encoded><![CDATA[<p>The <a href="http://www.longnow.org/">Long Now Foundation</a> has produced a Rosetta disk containing 13,000 pages of information regarding 1,500 human languages. The text is engraved, not encoded. The text starts out large enough to read with the naked eye and becomes continuously smaller, strongly suggesting one should examine the disk under magnification to read further.</p>
<p style="text-align: center"><img src="http://media.longnow.org/files/2/RosettaTop.jpg" alt="" width="400" height="403" /></p>
<p>Long Now is trying to preserve documentation for thousands of years, but I just want to know how to preserve documents even for a few months or years. They want to hold on to knowledge as civilizations come and go. I&#8217;m just trying to hold on to knowledge as personnel come and go.</p>
<p>Mundane document preservation is a very difficult problem. <strong>Preserving the Declaration of Independence is easy; preserving meeting notes is hard</strong>. Preserving the Declaration is a technical problem. If you keep it in a glass case filled with nitrogen, keep the lights low, and make sure <a href="http://www.imdb.com/title/tt0368891/">Nicolas Cage</a> doesn&#8217;t steal it, you&#8217;re OK. Millions of people know that the document exists, and they know where to look for it. And besides the original paper copy, the text is available electronically in countless locations.</p>
<p>How do I preserve the document that describes why my internal software application uses the parameters it does? Make notes in the source code? Good idea, but most of the people who want to know about the parameters are not software developers. What about version control systems or content management systems? Great idea: put everything associated with a project in one place. But wherever you put the information, <strong>someone has to remember that it exists and know where to look for it</strong>.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/nLtpMIPPlWU" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/03/17/preserving-documents/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/03/17/preserving-documents/</feedburner:origLink></item>
		<item>
		<title>Legal frameworks</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/zRtjJGN5d8I/</link>
		<comments>http://reproducibleresearch.org/blog/2009/03/17/legal-frameworks/#comments</comments>
		<pubDate>Tue, 17 Mar 2009 13:20:22 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=53</guid>
		<description><![CDATA[Greg Wilson has a new blog post Legal Frameworks for Reproducible Research.
]]></description>
			<content:encoded><![CDATA[<p>Greg Wilson has a new blog post <a href="http://pyre.third-bit.com/blog/archives/2230.html">Legal Frameworks for Reproducible Research</a>.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/zRtjJGN5d8I" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/03/17/legal-frameworks/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/03/17/legal-frameworks/</feedburner:origLink></item>
		<item>
		<title>Orfeo Toolbox</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/zqiIsJAa6R8/</link>
		<comments>http://reproducibleresearch.org/blog/2009/03/16/orfeo-toolbox/#comments</comments>
		<pubDate>Tue, 17 Mar 2009 04:20:05 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=50</guid>
		<description><![CDATA[The Orfeo Toolbox is an open source library of image processing algorithms developed by the  French Space Agency (CNES).
The tagline for the Orfeo Toolbox project is &#8220;Orfeo Toolbox is not a black box.&#8221;
]]></description>
			<content:encoded><![CDATA[<p>The Orfeo Toolbox is an open source library of image processing algorithms developed by the  <a title="CNES" href="http://www.cnes.fr/">French Space Agency (CNES)</a>.</p>
<p>The tagline for the Orfeo Toolbox project is &#8220;Orfeo Toolbox is not a black box.&#8221;</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/zqiIsJAa6R8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/03/16/orfeo-toolbox/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/03/16/orfeo-toolbox/</feedburner:origLink></item>
		<item>
		<title>Reproducible network benchmarks</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/VE1y2Ilmz1Q/</link>
		<comments>http://reproducibleresearch.org/blog/2009/03/16/network-benchmarks/#comments</comments>
		<pubDate>Tue, 17 Mar 2009 04:01:19 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=47</guid>
		<description><![CDATA[I just found out about  coNCePTuaL, a project that promotes reproducible research in the context of performance measurements for high-speed computer networks.  (The capitalized letters in the name stand for Network Correctness and Performance Testing Language.)
The project is located at  http://conceptual.sourceforge.net/ and is described in the paper Reproducible network benchmarks with coNCePTuaL by Scott Pakin [...]]]></description>
			<content:encoded><![CDATA[<p>I just found out about  coNCePTuaL, a project that promotes reproducible research in the context of performance measurements for high-speed computer networks.  (The capitalized letters in the name stand for Network Correctness and Performance Testing Language.)</p>
<p>The project is located at  <a href="http://conceptual.sourceforge.net/" target="_blank">http://conceptual.sourceforge.net/</a> and is described in the paper <a href="http://www.c3.lanl.gov/PAL/publications/papers/Pakin2004:reproducible.pdf">Reproducible network benchmarks with coNCePTuaL</a> by Scott Pakin of Los Alamos National Laboratory.</p>
<p>Some of the highlights of coNCePTuaL:</p>
<ul>
<li> Performance tests (timed network-communication patterns) are described in a precise but English-like &#8220;executable pseudocode&#8221; designed for basic readability, even by someone not familiar with coNCePTuaL.</li>
<li>Output files produced by coNCePTuaL-based performance tests include not only the measurements but the code describing the  test itself and a detailed description of the experimental platform on which the code ran.  This enables a third party to see exactly what was run, how it was run, and what the results were, all in one file.</li>
<li>coNCePTuaL can automatically produce space-time diagrams of the communication pattern for additional clarity of presentation.</li>
</ul>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/VE1y2Ilmz1Q" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/03/16/network-benchmarks/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/03/16/network-benchmarks/</feedburner:origLink></item>
		<item>
		<title>Science in the open</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/rhLOfowLHrE/</link>
		<comments>http://reproducibleresearch.org/blog/2009/02/11/science-in-the-open/#comments</comments>
		<pubDate>Thu, 12 Feb 2009 03:35:05 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=45</guid>
		<description><![CDATA[See Greg Wilson&#8217;s post this evening Science in the Open for two stories of reproducible research.
]]></description>
			<content:encoded><![CDATA[<p>See Greg Wilson&#8217;s post this evening <a href="http://pyre.third-bit.com/blog/archives/2035.html">Science in the Open</a> for two stories of reproducible research.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/rhLOfowLHrE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/02/11/science-in-the-open/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/02/11/science-in-the-open/</feedburner:origLink></item>
		<item>
		<title>A proposal for an Sweave service</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/t4badudNaos/</link>
		<comments>http://reproducibleresearch.org/blog/2009/01/23/sweave-build-service-proposal/#comments</comments>
		<pubDate>Fri, 23 Jan 2009 17:46:37 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=40</guid>
		<description><![CDATA[Sweave has been discussed here many times, but here&#8217;s a brief description for those just joining the discussion. Sweave is a tool for embedding R code inside LaTeX files, analogous to the way web development languages such as PHP or GCI embed scripting code in HTML. When you compile an Sweave file, the R code [...]]]></description>
			<content:encoded><![CDATA[<p>Sweave has been discussed here many times, but here&#8217;s a brief description for those just joining the discussion. Sweave is a tool for embedding R code inside LaTeX files, analogous to the way web development languages such as PHP or GCI embed scripting code in HTML. When you compile an Sweave file, the R code executes and the results (and optionally the source code) are inserted into the LaTeX output.</p>
<p>Sweave has the potential to make statistical analyses more reproducible. But I doubt many realize its vulnerabilities. The Sweave files are likely to have implicit dependencies on R session state or data located outside the file. <strong>You don&#8217;t really know that the output is reproducible until it&#8217;s compiled by someone else in a fresh environment</strong>.</p>
<p>My proposal is a service that lets you submit an Sweave file and get back the resulting LaTeX and PDF output. An extension to this would allow users to also upload data files along with their Sweave file so not all data would have to be in the Sweave file itself. For good measure, there should be some checksums to certify just what input went into producing the output.</p>
<p>Here&#8217;s one way I see this being used. Suppose you&#8217;re about to put a project on the shelf for a while. For example, you&#8217;re about to submit a paper to a journal.You may need to come back and make changes six months later. You think about the difficulty you&#8217;ve had in the past with these sorts of edits and want to make sure it doesn&#8217;t happen again. So you submit your Sweave document to the build server to verify that it is self-contained.</p>
<p>Here&#8217;s another scenario. Suppose you&#8217;ve asked someone whom you supervise to produce a report. Instead of letting them give you a PDF, you might insist they give you an Sweave file that you then run through the build service to make your own PDF. That way you can have the whole &#8220;but it works on my machine&#8221; discussion now rather than having it months later after the person who make the report has a new computer or a new job.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/t4badudNaos" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/01/23/sweave-build-service-proposal/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/01/23/sweave-build-service-proposal/</feedburner:origLink></item>
		<item>
		<title>Reproducibility talk in Houston this afternoon</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/FbCOJCRAU8M/</link>
		<comments>http://reproducibleresearch.org/blog/2009/01/16/reproducibility-talk-in-houston-this-afternoon/#comments</comments>
		<pubDate>Fri, 16 Jan 2009 15:30:55 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=38</guid>
		<description><![CDATA[I just found out that Keith Baggerly will be speaking at Rice University this afternoon. His talk is entitled &#8220;Cell Lines, Microarrays, Drugs and Disease: Trying to Predict Response to Chemotherapy.&#8221; Here is part of the seminar announcement most relevant to reproducibility.
In this talk, we will describe how we have analyzed the data, and the [...]]]></description>
			<content:encoded><![CDATA[<p>I just found out that <a href="http://reproducibleresearch.org/blog/2008/10/30/forensic-bioinformatics/">Keith Baggerly</a> will be speaking at Rice University this afternoon. His talk is entitled &#8220;Cell Lines, Microarrays, Drugs and Disease: Trying to Predict Response to Chemotherapy.&#8221; Here is part of the seminar announcement most relevant to reproducibility.</p>
<blockquote><p>In this talk, we will describe how we have analyzed the data, and the implications of the ambiguities for the clinical findings. We will also describe methods for making such analyses more reproducible, so that progress can be made more steadily.</p></blockquote>
<p>The talk will be at 4 PM in <a href="http://www.rice.edu/maps/maps.html">Keck Hall</a> room 102.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/FbCOJCRAU8M" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/01/16/reproducibility-talk-in-houston-this-afternoon/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/01/16/reproducibility-talk-in-houston-this-afternoon/</feedburner:origLink></item>
		<item>
		<title>Peer review</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/dDFC0d1pkTs/</link>
		<comments>http://reproducibleresearch.org/blog/2009/01/09/peer-review/#comments</comments>
		<pubDate>Fri, 09 Jan 2009 15:03:24 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=35</guid>
		<description><![CDATA[Michael Neilsen posted an excellent article this morning Three myths of scientific peer review. He points out that peer review has only become common in the last 40 or 50 years. Maybe a few years from now someone will write an article looking back at how reproducible research came to be de rigueur. No one [...]]]></description>
			<content:encoded><![CDATA[<p>Michael Neilsen posted an excellent article this morning <a href="http://michaelnielsen.org/blog/?p=531">Three myths of scientific peer review</a>. He points out that peer review has only become common in the last 40 or 50 years. Maybe a few years from now someone will write an article looking back at how reproducible research came to be <em>de rigueur</em>. No one questions whether peer review is a good thing, though many people have complaints about the current system and argue about ways to make it better. Maybe the same will be said for reproducible research some day.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/dDFC0d1pkTs" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/01/09/peer-review/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/01/09/peer-review/</feedburner:origLink></item>
		<item>
		<title>Taking your code out for a walk</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/wRDgZHTxhLc/</link>
		<comments>http://reproducibleresearch.org/blog/2009/01/06/taking-your-code-out-for-a-walk/#comments</comments>
		<pubDate>Tue, 06 Jan 2009 17:44:01 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=33</guid>
		<description><![CDATA[When I was in college, a friend of mine told me he liked to take his code out for a walk every now and then. By that he meant recompiling and running all of his programs. At the time I though that was unnecessary. If a program compiled and ran the last time you touched [...]]]></description>
			<content:encoded><![CDATA[<p>When I was in college, a friend of mine told me he liked to take his code out for a walk every now and then. By that he meant recompiling and running all of his programs. At the time I though that was unnecessary. If a program compiled and ran the last time you touched it, why shouldn&#8217;t it compile and run now? He simply said that I might be surprised.</p>
<p>Even when your source code isn&#8217;t changing, the environment around it is changing. When I was in college, computers didn&#8217;t have automatic weekly updates, but they changed often enough that taking your code out for a walk now and then made sense. Now it makes even more sense. See Jon Claerbout&#8217;s <a href="http://csdl2.computer.org/comp/mags/cs/2009/01/mcs2009010005.pdf">story</a> along these lines.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/wRDgZHTxhLc" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/01/06/taking-your-code-out-for-a-walk/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/01/06/taking-your-code-out-for-a-walk/</feedburner:origLink></item>
		<item>
		<title>CiSE special issue on reproducible research</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/ZhvIXZw0iW8/</link>
		<comments>http://reproducibleresearch.org/blog/2009/01/06/cise-special-issue-on-reproducible-research/#comments</comments>
		<pubDate>Tue, 06 Jan 2009 16:24:31 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=27</guid>
		<description><![CDATA[Computing in Science and Engineering has just come out with a special issue on reproducible research.  (When you first visit the link, you need to click on &#8220;vol 11.&#8221; The page is doing some fancy JavaScript that makes it impossible to link directly to the issue.)
The following articles on RR are included.
Guest Editors&#8217; Introduction: Reproducible [...]]]></description>
			<content:encoded><![CDATA[<p>Computing in Science and Engineering has just come out with a <a href="http://www2.computer.org/portal/web/csdl/magazines/cise#3">special issue on reproducible research</a>.  (When you first visit the <a href="http://www2.computer.org/portal/web/csdl/magazines/cise#3">link</a>, you need to click on &#8220;vol 11.&#8221; The page is doing some fancy JavaScript that makes it impossible to link directly to the issue.)</p>
<p>The following articles on RR are included.</p>
<p><strong><a href="http://csdl2.computer.org/comp/mags/cs/2009/01/mcs2009010005.pdf">Guest Editors&#8217; Introduction: Reproducible Research</a></strong></p>
<p><a href="http://jackson-d-1.geo.utexas.edu/researcher.php?researcher_id=3044">Sergey Fomel</a>, University of Texas at Austin<br />
<a href="http://en.wikipedia.org/wiki/Jon_Claerbout"><span>Jon F. Claerbout</span></a>, Stanford University</p>
<p><strong>Reproducible Research in Computational Harmonic Analysis</strong><br />
<a href="http://www-stat.stanford.edu/~donoho/"><span>David L. Donoho</span></a>, Stanford University<br />
<a href="http://www.stanford.edu/~arianm/"><span>Arian Maleki</span></a>, Stanford University<br />
Inam Ur Rahman, Apple Computer<br />
<a href="http://www-stat.stanford.edu/~mshahram/"><span>Morteza Shahram</span></a>, Stanford University<br />
<a href="http://www.stanford.edu/~vcs/"><span>Victori</span><span>a</span><span> Stodden</span></a>, Harvard University</p>
<p><strong>Python Tools for Reproducible Research on Hyperbolic Problems</strong><br />
<a href="http://www.amath.washington.edu/~rjl/"><span>Randall J. LeVeque</span></a>, University of Washington</p>
<p><strong>Distributed Reproducible Research Using Cached Computations</strong><br />
<a href="http://www.biostat.jhsph.edu/~rpeng/cv.html"><span>Roger D. Peng</span></a>, Johns Hopkins Bloomberg School of Public Health<br />
Sandrah P. Eckel, Johns Hopkins Bloomberg School of Public Health</p>
<p><strong>The Legal Framework for Reproducible Scientific Research: Licensing and Copyright</strong><br />
<a href="http://www.stanford.edu/~vcs/"><span>Victoria Stodden</span></a>, Harvard University</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/ZhvIXZw0iW8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2009/01/06/cise-special-issue-on-reproducible-research/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2009/01/06/cise-special-issue-on-reproducible-research/</feedburner:origLink></item>
		<item>
		<title>Irreproducible results in neuroscience</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/G4fJOXGuzL4/</link>
		<comments>http://reproducibleresearch.org/blog/2008/12/28/irreproducible-results-in-neuroscience/#comments</comments>
		<pubDate>Mon, 29 Dec 2008 02:27:50 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=26</guid>
		<description><![CDATA[See Andrew Gelman&#8217;s post Suspiciously high correlations in brain imaging studies.
]]></description>
			<content:encoded><![CDATA[<p>See Andrew Gelman&#8217;s post <a href="http://www.stat.columbia.edu/~cook/movabletype/archives/2008/12/suspiciously-hi.html">Suspiciously high correlations in brain imaging studies</a>.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/G4fJOXGuzL4" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2008/12/28/irreproducible-results-in-neuroscience/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2008/12/28/irreproducible-results-in-neuroscience/</feedburner:origLink></item>
		<item>
		<title>BioMed Critical Commentary</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/oc9Qa7tHGdQ/</link>
		<comments>http://reproducibleresearch.org/blog/2008/12/15/biomed-critical-commentary/#comments</comments>
		<pubDate>Mon, 15 Dec 2008 14:37:33 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=24</guid>
		<description><![CDATA[I just found out about BioMed Critical Commentary. Here&#8217;s an excerpt from the site&#8217;s philosophy statement.
The current system of scientific journals serves well certain constituencies: the advertisers, the journals themselves, and    the authors. It is the underlying philosophy of BioMed Critical Commentary to serve the readers in preference to any   [...]]]></description>
			<content:encoded><![CDATA[<p>I just found out about <a href="http://bm-cc.org/about.php">BioMed Critical Commentary</a>. Here&#8217;s an excerpt from the site&#8217;s philosophy statement.</p>
<blockquote><p>The current system of scientific journals serves well certain constituencies: the advertisers, the journals themselves, and    the authors. It is the underlying philosophy of BioMed Critical Commentary to serve the readers in preference to any    other constituency.</p></blockquote>
<p>In particular, this site could serve as a public forum for criticism that journals are not eager to publish. It could be a good place to discuss specific examples of irreproducible analyses.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/oc9Qa7tHGdQ" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2008/12/15/biomed-critical-commentary/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2008/12/15/biomed-critical-commentary/</feedburner:origLink></item>
		<item>
		<title>Three reasons to distrust microarray results</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/Exv80Oxh9v4/</link>
		<comments>http://reproducibleresearch.org/blog/2008/12/10/three-reasons-to-distrust-microarray-results/#comments</comments>
		<pubDate>Wed, 10 Dec 2008 19:29:49 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=23</guid>
		<description><![CDATA[Even when lab work and statistical analysis carried out perfectly, microarray experiment conclusions have a high probability of being incorrect for probabilistic reasons. Of course lab work and statistical analysis are not carried out perfectly. I went to a talk earlier this week that demonstrated reproducibility problems coming both from the wet lab and from [...]]]></description>
			<content:encoded><![CDATA[<p>Even when lab work and statistical analysis carried out perfectly, <a href="http://en.wikipedia.org/wiki/DNA_microarray">microarray</a> experiment conclusions have a high probability of being incorrect for <a href="http://www.johndcook.com/blog/2008/12/06/why-microarray-studies-are-often-wrong/">probabilistic reasons</a>. Of course lab work and statistical analysis are not carried out perfectly. I went to a talk earlier this week that demonstrated reproducibility problems coming both from the wet lab and from the statistical analysis.</p>
<p>The talk presented a study that supposedly discovered genes that can distinguish those who will respond to a certain therapy from those who will not. On closer analysis, the paper actually demonstrated that is it possible to distinguish microarray experiments conducted on one day from experiments conducted another day. That is, batch effects from the lab were much larger than differences between patients who did and did not respond to therapy. I hear that this is typical unless gene expression levels vary dramatically between subgroups.</p>
<p>The talk also discussed problems with reproducing the statistical analysis. As is so often the case, data were mislabeled. In fact, 3/4 of the samples were mislabeled. Simply keeping up with indexes is the <a href="http://reproducibleresearch.org/blog/2008/10/31/biggest-barrier-to-reproducibility/">biggest barrier to reproducibility</a>. It is shocking how often studies simply did not analyze the data they say they analyzed. This seems like a simple matter to get right; perhaps people give little attention to it precisely because it seems so simple.</p>
<p>So, three reasons to be skeptical of microarray experiment conclusions:</p>
<ol>
<li>High probability of false discovery</li>
<li>Statistical reproducibility problems</li>
<li>Physical reproducibility problems</li>
</ol>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/Exv80Oxh9v4" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2008/12/10/three-reasons-to-distrust-microarray-results/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2008/12/10/three-reasons-to-distrust-microarray-results/</feedburner:origLink></item>
		<item>
		<title>Distributing Reproducible Research</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/tpX07uQ-fpI/</link>
		<comments>http://reproducibleresearch.org/blog/2008/12/02/distributing-reproducible-research/#comments</comments>
		<pubDate>Tue, 02 Dec 2008 13:29:10 +0000</pubDate>
		<dc:creator>Roger</dc:creator>
		
		<category><![CDATA[Distribution]]></category>

		<category><![CDATA[Tools]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=22</guid>
		<description><![CDATA[Most people would agree that reproducible results are important in all areas of science.  I think reproducibility is particularly important in areas of science where replication of an experiment or study&#8212;where a similar question is addressed using independent investigators, data, and methodology&#8212;is highly unlikely.  Such studies are typically difficult to replicate because of time, money, [...]]]></description>
			<content:encoded><![CDATA[<p>Most people would agree that reproducible results are important in all areas of science.  I think reproducibility is particularly important in areas of science where replication of an experiment or study&#8212;where a similar question is addressed using independent investigators, data, and methodology&#8212;is highly unlikely.  Such studies are typically difficult to replicate because of time, money, ethics, or perhaps all three.  In these cases, all we are left with are the data at hand and being able to reproduce the published results from these data is critical.</p>
<p>Much heat has been generated over the question of whether scientists should be forced to make their data and methodology public.  Journals such as <a href="http://www.sciencemag.org/">Science</a> and <a href="http://www.nature.com/">Nature</a> have adopted data dissemination policies; the National Institutes of Health requires data sharing plans for some of its grants; and the Office of Management and Budget Circular <a href="http://www.whitehouse.gov/omb/circulars/a110/a110.html">A-110</a> requires that data generated under federally sponsored research be made available upon request if those data were used in developing a government agency action.  While the debate over such dissemination policies is highly relevant, I think it can obscure and cause people to overlook an important question related to reproducible research.</p>
<p>One way I sometimes think of this question is as follows: Suppose a collaborator comes to you and says &#8220;I desperately want to make my research reproducible.  What should I do?&#8221;  I don&#8217;t mean to frame this as purely a hypothetical question&#8212;I have actually had people ask me this before.</p>
<p>The problem right now is that I don&#8217;t think proponents of reproducible research (myself included) have a good answer to this question.  A typical response might be &#8220;make the code and data available&#8221;.  Yes, but how?  If we cannot come up with a concrete and coherent answer to this question for people who are willing to make their work reproducible, we cannot realistically expect to change the minds of people who are currently unwilling to make their research reproducible.</p>
<p>I think there are two important roadblocks that make it difficult to publish reproducible research.  The first is the lack of a broad toolset that a wide range of researchers can use to assist them in publishing their data and methodology.  There are a number of efforts out there to develop tools, but many of these tools either have important limitations or are only accessible to more sophisticated users (the Sweave/LaTeX combination comes to mind, although it is a great contribution).  A related problem involves getting people to use tools that are already out there.  For example, I believe the use of version control software is a critical aspect of reproducible research and there are many high-quality software packages available for all operating systems.  I personally use <a href="http://git.or.cz/">git</a> but many others would also fit the bill.  I must say I&#8217;ve had limited success convincing people they need to use version control systems.  I think the basic problem is that it involves learning Yet Another Software Package.</p>
<p>The second roadblock for reproducible research is distribution.  Suppose I carefully keep track of all the code I use to analyze my data and am happy to give the code and data to others.  How do I do that?  Many knowledgeable people will setup a web site for themselves and post code and data on their own web pages.  But demanding that everyone create a web site for distributing reproducible research is in my opinion a steep demand.  Many researchers do not have this capability and even if they did, it is not clear to me that web pages are the ideal medium for disseminating reproducible research.  How much data analysis is done in your web browser?</p>
<p>The distribution problem can be addressed by creating some basic infrastructure.  Analogous infrastructure already exists in other domains.  Users of the R statistical system have the <a href="http://cran.r-project.org/">Comprehensive R Archive Network</a> (CRAN) which is used to disseminate R packages (add-on functionality) to anyone around the world.  In practice there is no need to interact with the web site with a browser because R itself can fetch the packages from the Archive and install them without the user ever having to change applications.  Similar facilities exist for Perl (CPAN) and TeX (CTAN).  Of course, we cannot expect such resources to appear out of thin air.  Developing a useful archive requires hardware and administrative time.</p>
<p>I have been trying to develop a system for R users that can be used to distribute reproducible research via a central repository.  The software is an R package called &#8216;<a href="http://cran.r-project.org/package=cacher">cacher</a>&#8216; and the associated repository is what I call the <a href="http://www.reproducible.org/">Reproducible Research Archive</a>.  The basic idea of the &#8216;cacher&#8217; package is to take code that represents a data analysis and cache the code and associated data in a series of key-value databases.  This &#8220;data analysis cache&#8221; can then be packaged and uploaded to the Archive.  Each cache package is given a unique ID (via SHA-1) so that it can be referenced by others in a global fashion.  On the other side of things, the &#8216;cacher&#8217; package can download an available cache package and a user can run the code in the package to reproduce the results.</p>
<p>Not all of the abovementioned functionality is complete but many aspects of the &#8216;cacher&#8217; package are available.  There is also a <a href="http://www.jstatsoft.org/v26/i07">paper</a> in the Journal of Statistical Software that describes the package in greater detail.  The advantage of the &#8216;cacher&#8217; system is that R users have relatively little to learn&#8212;just a few functions.  Of course, the disadvantage is that it is only available to R users, who are a minority of people conducting data analysis in the world.</p>
<p>There are of course other challenges that I haven&#8217;t mentioned that will need to be solved before reproducible research goes mainstream. I think the development of the necessary infrastructure (software and distribution media) is just one important challenge that is critical to its adoption because less technical users need to be able to easily &#8220;plug-in&#8221; to an existing framework without having to build a piece of it themselves.  By learning from experiences in other domains I think we can successfully build this infrastructure and bring reproducible research to a much wider audience.</p>
<p><em>Roger D. Peng</em><br />
<em>Department of Biostatistics</em><br />
<em>Johns Hopkins Bloomberg School of Public Health</em></p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/tpX07uQ-fpI" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2008/12/02/distributing-reproducible-research/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2008/12/02/distributing-reproducible-research/</feedburner:origLink></item>
		<item>
		<title>Seven presentations on RR</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/YYrO5968bC4/</link>
		<comments>http://reproducibleresearch.org/blog/2008/11/29/seven-presentations-on-rr/#comments</comments>
		<pubDate>Sat, 29 Nov 2008 21:10:18 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=21</guid>
		<description><![CDATA[Sergey Fomel just told me about a special session on reproducible research at the &#8220;Berlin 6 Open Access Conference&#8221; in Dusseldorf, Germany. Presentations from the conference are available online.
Sergey Fomel and Sünje Dallmeier-Tiessen gave presentations in geophysics. Patrick Vandewalle and Jelena Kovacevic gave presentations in signal processing. Mark Liberman, Kai von Fintel, and Steven Krauwer [...]]]></description>
			<content:encoded><![CDATA[<p><a href="http://www.beg.utexas.edu/staffinfo/fomels01.htm">Sergey Fomel</a> just told me about a <a href="http://ldc.upenn.edu/myl/Berlin6Session5/Overview.html">special session</a> on reproducible research at the &#8220;Berlin 6 Open Access Conference&#8221; in Dusseldorf, Germany. Presentations from the conference are available online.</p>
<p><a href="http://www.beg.utexas.edu/staffinfo/fomels01.htm">Sergey Fomel</a> and <a href="http://oa.helmholtz.de/index.php?id=137">Sünje Dallmeier-Tiessen</a> gave presentations in geophysics. <a href="http://www.pixeltje.be/">Patrick Vandewalle</a> and <a href="http://www.andrew.cmu.edu/user/jelenak/">Jelena Kovacevic</a> gave presentations in signal processing. <a href="http://www.ling.upenn.edu/%7Emyl/">Mark Liberman</a>, <a href="http://web.mit.edu/linguistics/people/faculty/fintel/index.html">Kai von Fintel</a>, and <a class="style1" href="http://www-sk.let.uu.nl/">Steven Krauwer</a> gave presentations related to language and technology.</p>
<p>Video of the presentations is available <a href="http://www.berlin6.org/?page_id=73">here</a>.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/YYrO5968bC4" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2008/11/29/seven-presentations-on-rr/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2008/11/29/seven-presentations-on-rr/</feedburner:origLink></item>
		<item>
		<title>The Fastware project</title>
		<link>http://feedproxy.google.com/~r/ReproducibleIdeas/~3/WQMAiv9-q3A/</link>
		<comments>http://reproducibleresearch.org/blog/2008/11/26/the-fastware-project/#comments</comments>
		<pubDate>Wed, 26 Nov 2008 12:22:35 +0000</pubDate>
		<dc:creator>John</dc:creator>
		
		<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://reproducibleresearch.org/blog/?p=20</guid>
		<description><![CDATA[Thomas Guest has a new blog post Books, blogs, comments and code samples discussing the challenges of writing a book that contains code samples, may be rendered to multiple devices as well as paper, etc. He points to a project by author Scott Meyers called Fastware that explores ways of meeting these challenges. I haven&#8217;t [...]]]></description>
			<content:encoded><![CDATA[<p>Thomas Guest has a new blog post <a href="http://wordaligned.org/articles/code-samples">Books, blogs, comments and code samples</a> discussing the challenges of writing a book that contains code samples, may be rendered to multiple devices as well as paper, etc. He points to a project by author Scott Meyers called <a href="http://fastwareproject.blogspot.com/">Fastware</a> that explores ways of meeting these challenges. I haven&#8217;t had time to explore Fastware yet, but it sounds like it is concerned with some of the same problems that come up in reproducible research.</p>
<img src="http://feeds.feedburner.com/~r/ReproducibleIdeas/~4/WQMAiv9-q3A" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://reproducibleresearch.org/blog/2008/11/26/the-fastware-project/feed/</wfw:commentRss>
		<feedburner:origLink>http://reproducibleresearch.org/blog/2008/11/26/the-fastware-project/</feedburner:origLink></item>
	</channel>
</rss>
