<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0"> <channel><title>2657 Productions News</title> <link>http://news.mrdwab.com</link> <description>..:: Whereabouts and Whatabouts of the 2657 World ::..</description> <lastBuildDate>Mon, 16 Jan 2012 05:56:16 +0000</lastBuildDate> <language>en</language> <sy:updatePeriod>hourly</sy:updatePeriod> <sy:updateFrequency>1</sy:updateFrequency> <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://feeds.feedburner.com/2657Productions" /><feedburner:info uri="2657productions" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><creativeCommons:license>http://creativecommons.org/licenses/by-nc-sa/3.0/</creativeCommons:license><image><link>http://creativecommons.org/licenses/by-nc-sa/3.0/</link><url>http://creativecommons.org/images/public/somerights20.gif</url><title>Some Rights Reserved</title></image><feedburner:emailServiceId>2657Productions</feedburner:emailServiceId><feedburner:feedburnerHostname>http://feedburner.google.com</feedburner:feedburnerHostname><item><title>I’m not at all religious, but…</title><link>http://feedproxy.google.com/~r/2657Productions/~3/rPmhmOn7Inw/</link> <comments>http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/#comments</comments> <pubDate>Sun, 26 Jun 2011 12:43:03 +0000</pubDate> <dc:creator>Ananda</dc:creator> <category><![CDATA[(all categories)]]></category> <category><![CDATA[Humor]]></category> <category><![CDATA[Pictures]]></category> <category><![CDATA[Ambika!]]></category> <category><![CDATA[silly pictures]]></category> <guid isPermaLink="false">http://news.mrdwab.com/?p=1199</guid> <description><![CDATA[&#8230; here is a goddess that I am happy to worship&#8230; I also had a few alternatives&#8211;and I&#8217;m still not sure which one is my favorite.]]></description> <content:encoded><![CDATA[<p>&#8230; here is a goddess that I am happy to worship&#8230;</p><div
id="attachment_1200" class="wp-caption aligncenter" style="width: 410px"><a
href="http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/ambika-border-1/" rel="attachment wp-att-1200"><img
src="http://news.mrdwab.com/wp-content/uploads/2011/06/Ambika-Border-1-400x400.jpg" alt="" title="Ambika - Border 1" width="400" height="400" class="size-medium wp-image-1200" /></a><p
class="wp-caption-text">Don&#039;t make me squirt my milk bottle at you!</p></div><p><span
id="more-1199"></span></p><p>I also had a few alternatives&#8211;and I&#8217;m still not sure which one is my favorite.</p> <a
href='http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/ambika-border-1/' title='Don&#039;t make me squirt my milk bottle at you!'><img
width="150" height="150" src="http://news.mrdwab.com/wp-content/uploads/2011/06/Ambika-Border-1-150x150.jpg" class="attachment-thumbnail" alt="Don&#039;t make me squirt my milk bottle at you!" title="Don&#039;t make me squirt my milk bottle at you!" /></a> <a
href='http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/ambika-border-sepia/' title='One of the ancient goddesses...'><img
width="150" height="150" src="http://news.mrdwab.com/wp-content/uploads/2011/06/Ambika-Border-Sepia-150x150.jpg" class="attachment-thumbnail" alt="One of the ancient goddesses..." title="One of the ancient goddesses..." /></a> <a
href='http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/ambika-colored-pencil/' title='In a coloring book coming to a bookstore near you!'><img
width="150" height="150" src="http://news.mrdwab.com/wp-content/uploads/2011/06/Ambika-Colored-Pencil-150x150.jpg" class="attachment-thumbnail" alt="In a coloring book coming to a bookstore near you!" title="In a coloring book coming to a bookstore near you!" /></a> <a
href='http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/ambika-third-eye/' title='The third eye begins to glow just before Kali begins to emerge from Ambika.'><img
width="150" height="150" src="http://news.mrdwab.com/wp-content/uploads/2011/06/Ambika-Third-Eye-150x150.jpg" class="attachment-thumbnail" alt="The third eye begins to glow just before Kali begins to emerge from Ambika." title="The third eye begins to glow just before Kali begins to emerge from Ambika." /></a> <a
href='http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/ambika/' title='The original....'><img
width="150" height="150" src="http://news.mrdwab.com/wp-content/uploads/2011/06/Ambika-150x150.jpg" class="attachment-thumbnail" alt="The original...." title="The original...." /></a> <div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/2657Productions?a=rPmhmOn7Inw:NfPdMzLyAZc:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=rPmhmOn7Inw:NfPdMzLyAZc:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=rPmhmOn7Inw:NfPdMzLyAZc:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=rPmhmOn7Inw:NfPdMzLyAZc:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=rPmhmOn7Inw:NfPdMzLyAZc:gIN9vFwOqvQ" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=rPmhmOn7Inw:NfPdMzLyAZc:TzevzKxY174"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=TzevzKxY174" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=rPmhmOn7Inw:NfPdMzLyAZc:bAAVSKdHlY4"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=rPmhmOn7Inw:NfPdMzLyAZc:bAAVSKdHlY4" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/2657Productions/~4/rPmhmOn7Inw" height="1" width="1"/>]]></content:encoded> <wfw:commentRss>http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/feed/</wfw:commentRss> <slash:comments>0</slash:comments> <feedburner:origLink>http://news.mrdwab.com/2011/06/26/im-not-at-all-religious-but/</feedburner:origLink></item> <item><title>On the trucks around town…</title><link>http://feedproxy.google.com/~r/2657Productions/~3/MrtfNBylC5c/</link> <comments>http://news.mrdwab.com/2011/06/15/on-the-trucks-around-town/#comments</comments> <pubDate>Wed, 15 Jun 2011 04:40:22 +0000</pubDate> <dc:creator>Ananda</dc:creator> <category><![CDATA[(all categories)]]></category> <category><![CDATA[(non) fiction]]></category> <category><![CDATA[Humor]]></category> <category><![CDATA[India]]></category> <category><![CDATA[Pictures]]></category> <category><![CDATA[Ambika!]]></category> <category><![CDATA[silly pictures]]></category> <category><![CDATA[we two ours one]]></category> <guid isPermaLink="false">http://news.mrdwab.com/?p=1189</guid> <description><![CDATA[Anyone who has spent some time in India is sure to have noticed the slogans painted on the back of trucks, autos, and other vehicles advising &#8220;we two, ours one&#8221;. This is part of India&#8217;s &#8220;family planning&#8221; efforts&#8211;efforts which have had a pretty bumpy history that included a forced sterilization program. Originally, the slogans were [...]]]></description> <content:encoded><![CDATA[<p>Anyone who has spent some time in India is sure to have noticed the slogans painted on the back of trucks, autos, and other vehicles advising &#8220;we two, ours one&#8221;. This is part of India&#8217;s &#8220;<a
href="http://en.wikipedia.org/wiki/Human_population_control#India">family planning</a>&#8221; efforts&#8211;efforts which have had a pretty <a
href="http://en.wikipedia.org/wiki/Family_planning_in_India">bumpy history</a> that included a forced sterilization program.</p><p>Originally, the slogans were &#8220;we two, ours two&#8221;, or at least that was the catchy English version&#8211;regional languages usually had a slogan more along the lines of &#8220;one family, two children&#8221;. And, the change to the new slogan led to at least one humorous math discussion with an auto driver who commented that, &#8220;Earlier, it was &#8216;we two, ours two&#8217;; now, it is &#8216;we two, ours one&#8217;. What&#8217;s next? &#8216;We two, ours half?&#8217;&#8221;</p><p>Anyway, keen observers might have noticed the following new addition to selected trucks:</p><p><a
href="http://news.mrdwab.com/2011/06/15/on-the-trucks-around-town/we-2-ours-1-1/" rel="attachment wp-att-1190"><img
src="http://news.mrdwab.com/wp-content/uploads/2011/06/We-2-Ours-1-1-400x300.jpg" alt="We two, ours one" title="We 2 Ours 1-1" width="400" height="300" class="aligncenter size-medium wp-image-1190" /></a></p><p><a
href="http://news.mrdwab.com/2011/06/15/on-the-trucks-around-town/we-2-ours-1-2/" rel="attachment wp-att-1193"><img
src="http://news.mrdwab.com/wp-content/uploads/2011/06/We-2-Ours-1-2-400x270.jpg" alt="We two, ours one" title="We 2 Ours 1-2" width="400" height="270" class="aligncenter size-medium wp-image-1193" /></a></p> <div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/2657Productions?a=MrtfNBylC5c:kpdUUVZsD3Y:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=MrtfNBylC5c:kpdUUVZsD3Y:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=MrtfNBylC5c:kpdUUVZsD3Y:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=MrtfNBylC5c:kpdUUVZsD3Y:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=MrtfNBylC5c:kpdUUVZsD3Y:gIN9vFwOqvQ" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=MrtfNBylC5c:kpdUUVZsD3Y:TzevzKxY174"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=TzevzKxY174" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=MrtfNBylC5c:kpdUUVZsD3Y:bAAVSKdHlY4"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=MrtfNBylC5c:kpdUUVZsD3Y:bAAVSKdHlY4" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/2657Productions/~4/MrtfNBylC5c" height="1" width="1"/>]]></content:encoded> <wfw:commentRss>http://news.mrdwab.com/2011/06/15/on-the-trucks-around-town/feed/</wfw:commentRss> <slash:comments>0</slash:comments> <feedburner:origLink>http://news.mrdwab.com/2011/06/15/on-the-trucks-around-town/</feedburner:origLink></item> <item><title>Stratified random sampling in R from a data frame</title><link>http://feedproxy.google.com/~r/2657Productions/~3/yEk9eTGAtoU/</link> <comments>http://news.mrdwab.com/2011/05/20/stratified-random-sampling-in-r-from-a-data-frame/#comments</comments> <pubDate>Fri, 20 May 2011 18:01:21 +0000</pubDate> <dc:creator>Ananda</dc:creator> <category><![CDATA[Geekiness]]></category> <category><![CDATA[Useless Knowledge]]></category> <category><![CDATA[code]]></category> <category><![CDATA[R]]></category> <category><![CDATA[R functions]]></category> <category><![CDATA[sampling]]></category> <category><![CDATA[statistics]]></category> <category><![CDATA[stratified sampling]]></category> <guid isPermaLink="false">http://news.mrdwab.com/?p=1174</guid> <description><![CDATA[After a little bit more work, there&#8217;s a new stratified random sampling function, this one letting you sample from a data frame, returning all the variables for each of your samples as a nice data frame that you can continue working on as usual. Get the function at http://news.mrdwab.com/stratified. Usage notes in the head of [...]]]></description> <content:encoded><![CDATA[<p>After a little bit more work, there&#8217;s a new stratified random sampling function, this one letting you sample from a data frame, returning all the variables for each of your samples as a nice data frame that you can continue working on as usual.</p><p>Get the function at <a
href="http://news.mrdwab.com/stratified">http://news.mrdwab.com/stratified</a>. Usage notes in the head of the function.</p><p><span
id="more-1174"></span></p><p>Here&#8217;s the function:</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1174code3'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p11743"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
</pre></td><td
class="code" id="p1174code3"><pre class="rsplus" style="font-family:monospace;">stratified <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">function</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span>, id, group, size, seed<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;NULL&quot;</span>, ...<span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
  <span style="color: #228B22;">#  USE: * Specify your data frame, ID variable (as column number), and</span>
  <span style="color: #228B22;">#         grouping variable (as column number) as the first three arguments.</span>
  <span style="color: #228B22;">#       * Decide on your sample size. For a sample proportional to the</span>
  <span style="color: #228B22;">#         population, enter &quot;size&quot; as a decimal. For an equal number of</span>
  <span style="color: #228B22;">#         samples from each group, enter &quot;size&quot; as a whole number.</span>
  <span style="color: #228B22;">#       * Decide on if you want to use a seed or not. If not, leave blank</span>
  <span style="color: #228B22;">#         or type &quot;NULL&quot; (with quotes). </span>
  <span style="color: #228B22;">#</span>
  <span style="color: #228B22;">#  Example 1: To sample 10% of each group from a data frame named &quot;z&quot;, where</span>
  <span style="color: #228B22;">#             the ID variable is the first variable, the grouping variable</span>
  <span style="color: #228B22;">#             is the fourth variable, and the desired seed is &quot;1&quot;, use:</span>
  <span style="color: #228B22;"># </span>
  <span style="color: #228B22;">#                 &gt; stratified(z, 1, 4, .1, 1)</span>
  <span style="color: #228B22;">#</span>
  <span style="color: #228B22;">#  Example 2: To run the same sample as above but without a seed, use:</span>
  <span style="color: #228B22;"># </span>
  <span style="color: #228B22;">#                 &gt; stratified(z, 1, 4, .1)</span>
  <span style="color: #228B22;">#</span>
  <span style="color: #228B22;">#  Example 3: To sample 5 from each group from a data frame named &quot;z&quot;, where</span>
  <span style="color: #228B22;">#             the ID variable is the first variable, the grouping variable</span>
  <span style="color: #228B22;">#             is the third variable, and the desired seed is 2, use:</span>
  <span style="color: #228B22;">#</span>
  <span style="color: #228B22;">#                 &gt; stratified(z, 1, 3, 5, 2)</span>
  <span style="color: #228B22;">#</span>
  <span style="color: #228B22;">#  NOTE: Not tested on datasets with LOTS of groups or with HUGE</span>
  <span style="color: #228B22;">#        differences in group sizes. Probably INCREDIBLY inefficient.</span>
&nbsp;
  k <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">unstack</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">as.<span style="">vector</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span><span style="color: #080;">&#91;</span>id<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #0000FF; font-weight: bold;">as.<span style="">vector</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span><span style="color: #080;">&#91;</span>group<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
  l <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span>
  results <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">vector</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;list&quot;</span>, l<span style="color: #080;">&#41;</span>
&nbsp;
  <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>seed <span style="color: #080;">==</span> <span style="color: #ff0000;">&quot;NULL&quot;</span> <span style="color: #080;">&amp;</span> size <span style="color: #080;">&lt;</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        n <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">round</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>N<span style="color: #080;">&#41;</span><span style="color: #080;">*</span>size<span style="color: #080;">&#41;</span>
        results<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, n, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>seed <span style="color: #080;">==</span> <span style="color: #ff0000;">&quot;NULL&quot;</span> <span style="color: #080;">&amp;</span> size <span style="color: #080;">&gt;=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        results<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, size, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>size <span style="color: #080;">&lt;</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span>seed<span style="color: #080;">&#41;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        n <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">round</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>N<span style="color: #080;">&#41;</span><span style="color: #080;">*</span>size<span style="color: #080;">&#41;</span>
        results<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, n, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>size <span style="color: #080;">&gt;=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span>seed<span style="color: #080;">&#41;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        results<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, size, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span>
  z <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">unlist</span><span style="color: #080;">&#40;</span>results<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
  <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>z<span style="color: #080;">&#41;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span><span style="color: #080;">&#91;</span>id<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>
  w <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">merge</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span>, z<span style="color: #080;">&#41;</span>
  w<span style="color: #080;">&#91;</span><span style="color: #0000FF; font-weight: bold;">order</span><span style="color: #080;">&#40;</span>w<span style="color: #080;">&#91;</span>group<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #080;">&#93;</span>
<span style="color: #080;">&#125;</span></pre></td></tr></table></div><p>And here are some examples of the function in action:</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1174code4'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p11744"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
</pre></td><td
class="code" id="p1174code4"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">source</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;http://news.mrdwab.com/stratified&quot;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Make up some data</span>
<span style="color: #080;">&gt;</span> A <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #ff0000;">100</span>
<span style="color: #080;">&gt;</span> B <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;AA&quot;</span>, <span style="color: #ff0000;">&quot;BB&quot;</span>, <span style="color: #ff0000;">&quot;CC&quot;</span>, <span style="color: #ff0000;">&quot;DD&quot;</span>, <span style="color: #ff0000;">&quot;EE&quot;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">100</span>, <span style="color: #0000FF; font-weight: bold;">replace</span><span style="color: #080;">=</span><span style="color: #0000FF; font-weight: bold;">T</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">C</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">rnorm</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">100</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">D</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">abs</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">round</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">rnorm</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">100</span><span style="color: #080;">&#41;</span>, digits<span style="color: #080;">=</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> E <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;CA&quot;</span>, <span style="color: #ff0000;">&quot;NY&quot;</span>, <span style="color: #ff0000;">&quot;TX&quot;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">100</span>, <span style="color: #0000FF; font-weight: bold;">replace</span><span style="color: #080;">=</span><span style="color: #0000FF; font-weight: bold;">T</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> dat <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span>A, B, <span style="color: #0000FF; font-weight: bold;">C</span>, <span style="color: #0000FF; font-weight: bold;">D</span>, E<span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># view the first few rows</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">head</span><span style="color: #080;">&#40;</span>dat<span style="color: #080;">&#41;</span>
  A  B           <span style="color: #0000FF; font-weight: bold;">C</span>   <span style="color: #0000FF; font-weight: bold;">D</span>  E
<span style="color: #ff0000;">1</span> <span style="color: #ff0000;">1</span> CC <span style="color: #080;">-</span><span style="color: #ff0000;">0.07870439</span> <span style="color: #ff0000;">0.6</span> NY
<span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2</span> CC <span style="color: #080;">-</span><span style="color: #ff0000;">0.65048634</span> <span style="color: #ff0000;">0.3</span> TX
<span style="color: #ff0000;">3</span> <span style="color: #ff0000;">3</span> EE  <span style="color: #ff0000;">1.02703616</span> <span style="color: #ff0000;">1.3</span> NY
<span style="color: #ff0000;">4</span> <span style="color: #ff0000;">4</span> BB <span style="color: #080;">-</span><span style="color: #ff0000;">1.08696775</span> <span style="color: #ff0000;">0.4</span> TX
<span style="color: #ff0000;">5</span> <span style="color: #ff0000;">5</span> CC  <span style="color: #ff0000;">0.56741795</span> <span style="color: #ff0000;">0.2</span> CA
<span style="color: #ff0000;">6</span> <span style="color: #ff0000;">6</span> AA <span style="color: #080;">-</span><span style="color: #ff0000;">0.46448941</span> <span style="color: #ff0000;">0.5</span> TX
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Sample 10% from each group from variable B, no seed</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span>dat, <span style="color: #ff0000;">1</span>, <span style="color: #ff0000;">2</span>, .1<span style="color: #080;">&#41;</span>
    A  B           <span style="color: #0000FF; font-weight: bold;">C</span>   <span style="color: #0000FF; font-weight: bold;">D</span>  E
<span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">6</span> AA <span style="color: #080;">-</span><span style="color: #ff0000;">0.46448941</span> <span style="color: #ff0000;">0.5</span> TX
<span style="color: #ff0000;">7</span>  <span style="color: #ff0000;">71</span> AA  <span style="color: #ff0000;">1.98128479</span> <span style="color: #ff0000;">2.1</span> CA
<span style="color: #ff0000;">5</span>  <span style="color: #ff0000;">53</span> BB  <span style="color: #ff0000;">1.00539398</span> <span style="color: #ff0000;">0.7</span> NY
<span style="color: #ff0000;">10</span> <span style="color: #ff0000;">97</span> BB  <span style="color: #ff0000;">0.68252675</span> <span style="color: #ff0000;">1.9</span> NY
<span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span> CC <span style="color: #080;">-</span><span style="color: #ff0000;">0.07870439</span> <span style="color: #ff0000;">0.6</span> NY
<span style="color: #ff0000;">4</span>  <span style="color: #ff0000;">42</span> CC <span style="color: #080;">-</span><span style="color: #ff0000;">2.00256854</span> <span style="color: #ff0000;">0.3</span> TX
<span style="color: #ff0000;">8</span>  <span style="color: #ff0000;">76</span> DD <span style="color: #080;">-</span><span style="color: #ff0000;">0.84151459</span> <span style="color: #ff0000;">0.2</span> NY
<span style="color: #ff0000;">9</span>  <span style="color: #ff0000;">95</span> DD <span style="color: #080;">-</span><span style="color: #ff0000;">0.47276142</span> <span style="color: #ff0000;">0.3</span> CA
<span style="color: #ff0000;">11</span> <span style="color: #ff0000;">99</span> DD  <span style="color: #ff0000;">1.05173419</span> <span style="color: #ff0000;">2.1</span> TX
<span style="color: #ff0000;">3</span>  <span style="color: #ff0000;">10</span> EE <span style="color: #080;">-</span><span style="color: #ff0000;">0.69079473</span> <span style="color: #ff0000;">1.1</span> TX
<span style="color: #ff0000;">6</span>  <span style="color: #ff0000;">57</span> EE <span style="color: #080;">-</span><span style="color: #ff0000;">0.38210921</span> <span style="color: #ff0000;">1.5</span> CA
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Sample 10% from each group from variable E, seed of 1</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span>dat, <span style="color: #ff0000;">1</span>, <span style="color: #ff0000;">5</span>, .1, <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span>
    A  B          <span style="color: #0000FF; font-weight: bold;">C</span>   <span style="color: #0000FF; font-weight: bold;">D</span>  E
<span style="color: #ff0000;">4</span>  <span style="color: #ff0000;">33</span> AA  <span style="color: #ff0000;">1.6105099</span> <span style="color: #ff0000;">0.5</span> CA
<span style="color: #ff0000;">7</span>  <span style="color: #ff0000;">48</span> AA  <span style="color: #ff0000;">0.3128274</span> <span style="color: #ff0000;">0.6</span> CA
<span style="color: #ff0000;">9</span>  <span style="color: #ff0000;">62</span> DD  <span style="color: #ff0000;">0.4673061</span> <span style="color: #ff0000;">0.0</span> CA
<span style="color: #ff0000;">10</span> <span style="color: #ff0000;">86</span> EE  <span style="color: #ff0000;">0.4047880</span> <span style="color: #ff0000;">1.6</span> CA
<span style="color: #ff0000;">3</span>  <span style="color: #ff0000;">28</span> AA <span style="color: #080;">-</span><span style="color: #ff0000;">1.6815553</span> <span style="color: #ff0000;">0.3</span> NY
<span style="color: #ff0000;">5</span>  <span style="color: #ff0000;">36</span> AA  <span style="color: #ff0000;">0.3307508</span> <span style="color: #ff0000;">0.3</span> NY
<span style="color: #ff0000;">8</span>  <span style="color: #ff0000;">53</span> BB  <span style="color: #ff0000;">1.0053940</span> <span style="color: #ff0000;">0.7</span> NY
<span style="color: #ff0000;">1</span>  <span style="color: #ff0000;">21</span> DD  <span style="color: #ff0000;">0.5229282</span> <span style="color: #ff0000;">1.2</span> TX
<span style="color: #ff0000;">2</span>  <span style="color: #ff0000;">27</span> BB  <span style="color: #ff0000;">0.8678977</span> <span style="color: #ff0000;">0.7</span> TX
<span style="color: #ff0000;">6</span>  <span style="color: #ff0000;">44</span> DD <span style="color: #080;">-</span><span style="color: #ff0000;">0.5790353</span> <span style="color: #ff0000;">0.9</span> TX
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># You can also be verbose if it helps you remember what you're doing</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span><span style="color: #080;">=</span>dat, id<span style="color: #080;">=</span><span style="color: #ff0000;">1</span>, group<span style="color: #080;">=</span><span style="color: #ff0000;">5</span>, size<span style="color: #080;">=</span>.1, seed<span style="color: #080;">=</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span>
    A  B          <span style="color: #0000FF; font-weight: bold;">C</span>   <span style="color: #0000FF; font-weight: bold;">D</span>  E
<span style="color: #ff0000;">4</span>  <span style="color: #ff0000;">33</span> AA  <span style="color: #ff0000;">1.6105099</span> <span style="color: #ff0000;">0.5</span> CA
<span style="color: #ff0000;">7</span>  <span style="color: #ff0000;">48</span> AA  <span style="color: #ff0000;">0.3128274</span> <span style="color: #ff0000;">0.6</span> CA
<span style="color: #ff0000;">9</span>  <span style="color: #ff0000;">62</span> DD  <span style="color: #ff0000;">0.4673061</span> <span style="color: #ff0000;">0.0</span> CA
<span style="color: #ff0000;">10</span> <span style="color: #ff0000;">86</span> EE  <span style="color: #ff0000;">0.4047880</span> <span style="color: #ff0000;">1.6</span> CA
<span style="color: #ff0000;">3</span>  <span style="color: #ff0000;">28</span> AA <span style="color: #080;">-</span><span style="color: #ff0000;">1.6815553</span> <span style="color: #ff0000;">0.3</span> NY
<span style="color: #ff0000;">5</span>  <span style="color: #ff0000;">36</span> AA  <span style="color: #ff0000;">0.3307508</span> <span style="color: #ff0000;">0.3</span> NY
<span style="color: #ff0000;">8</span>  <span style="color: #ff0000;">53</span> BB  <span style="color: #ff0000;">1.0053940</span> <span style="color: #ff0000;">0.7</span> NY
<span style="color: #ff0000;">1</span>  <span style="color: #ff0000;">21</span> DD  <span style="color: #ff0000;">0.5229282</span> <span style="color: #ff0000;">1.2</span> TX
<span style="color: #ff0000;">2</span>  <span style="color: #ff0000;">27</span> BB  <span style="color: #ff0000;">0.8678977</span> <span style="color: #ff0000;">0.7</span> TX
<span style="color: #ff0000;">6</span>  <span style="color: #ff0000;">44</span> DD <span style="color: #080;">-</span><span style="color: #ff0000;">0.5790353</span> <span style="color: #ff0000;">0.9</span> TX</pre></td></tr></table></div> <div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/2657Productions?a=yEk9eTGAtoU:rGn30-HzPzc:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=yEk9eTGAtoU:rGn30-HzPzc:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=yEk9eTGAtoU:rGn30-HzPzc:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=yEk9eTGAtoU:rGn30-HzPzc:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=yEk9eTGAtoU:rGn30-HzPzc:gIN9vFwOqvQ" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=yEk9eTGAtoU:rGn30-HzPzc:TzevzKxY174"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=TzevzKxY174" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=yEk9eTGAtoU:rGn30-HzPzc:bAAVSKdHlY4"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=yEk9eTGAtoU:rGn30-HzPzc:bAAVSKdHlY4" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/2657Productions/~4/yEk9eTGAtoU" height="1" width="1"/>]]></content:encoded> <wfw:commentRss>http://news.mrdwab.com/2011/05/20/stratified-random-sampling-in-r-from-a-data-frame/feed/</wfw:commentRss> <slash:comments>4</slash:comments> <feedburner:origLink>http://news.mrdwab.com/2011/05/20/stratified-random-sampling-in-r-from-a-data-frame/</feedburner:origLink></item> <item><title>Stratified Random Sampling in R–A Function in Progress</title><link>http://feedproxy.google.com/~r/2657Productions/~3/-wyu5FBhqlg/</link> <comments>http://news.mrdwab.com/2011/05/15/stratified-random-sampling-in-r-beta/#comments</comments> <pubDate>Sun, 15 May 2011 10:16:02 +0000</pubDate> <dc:creator>Ananda</dc:creator> <category><![CDATA[(all categories)]]></category> <category><![CDATA[Geekiness]]></category> <category><![CDATA[Useless Knowledge]]></category> <category><![CDATA[code]]></category> <category><![CDATA[experiments]]></category> <category><![CDATA[R]]></category> <category><![CDATA[R functions]]></category> <category><![CDATA[sampling]]></category> <category><![CDATA[statistics]]></category> <category><![CDATA[tapply()]]></category> <guid isPermaLink="false">http://news.mrdwab.com/?p=1141</guid> <description><![CDATA[IMPORTANT: This is here mostly to remind me of how I solved my problem. You should read Stratified random sampling in R from a data frame if you really want to use this function. I know that sampling is quite complex, and I will admit that I know very little about its complexities. Fortunately, software [...]]]></description> <content:encoded><![CDATA[<blockquote><p><strong>IMPORTANT</strong>: This is here mostly to remind me of how I solved my problem. You should read <a
href="http://news.mrdwab.com/2011/05/20/stratified-random-sampling-in-r-from-a-data-frame/" title="Stratified random sampling in R from a data frame">Stratified random sampling in R from a data frame</a> if you really want to use this function.</p></blockquote><p>I know that sampling is quite complex, and I will admit that I know very little about its complexities. Fortunately, software like <a
href="http://www.r-project.org">R</a> lets you draw <a
href="http://news.mrdwab.com/2009/11/29/simple-sampling-with-r/" title="Simple sampling with R">simple random samples</a> pretty easily, either <a
href="http://news.mrdwab.com/2009/11/30/sampling-with-replacement-in-r/" title="Sampling with replacement in R">either with</a> or without replacement. Unfortunately, I could not find any feature to allow me to do simple stratified random sampling, at least not with the features I was looking for. Fortunately again, with a little bit of experimenting, it can be pretty easy to learn how to write functions in R when a direct solution does not present itself.</p><p>This post shares my initial &#8220;work-in-progress&#8221; on writing an R function for stratified sampling.</p><p><span
id="more-1141"></span></p><h2>The problem&#8230;</h2><p>Here&#8217;s the minimum that I was hoping for:</p><ul><li>I wanted to be able to draw both a proportional sample (which is more common, for it allows you to make generalizations about the population as a whole) as well as a fixed-size sample (which less common, but it is useful for making comparisons across groups).</li><li>I often use a seed when sampling, so I wanted that to be a part of the function.</li><li>I wanted the output to be the same as if I were to sample from each group individually.<li>I was hoping that my output could be stored as a new object that I could then reuse (either a list or a data frame, preferably the latter).</li></ul><p>My initial searches directed me to <a
href="http://yihui.name/r/stat/sampling_survey/stratified/index.htm" target="_blank">Yihui Xie&#8217;s page on stratified sampling using tapply()</a>. However, this option did not satisfy my needs. As far as I could figure, it only allowed me to take a fixed sample size. Also, I wasn&#8217;t totally satisfied with the output.</p><p>Consider the following. In Yihui Xie&#8217;s example, there is a difference between the results one would get if they sampled from each group separately, but using the same seed.</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1141code10'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p114110"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
</pre></td><td
class="code" id="p1141code10"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> dat <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span>x <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #ff0000;">15</span>, stratum <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">gl</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">3</span>, <span style="color: #ff0000;">5</span><span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> dat
    x stratum
<span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span>       <span style="color: #ff0000;">1</span>
<span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">2</span>       <span style="color: #ff0000;">1</span>
<span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">3</span>       <span style="color: #ff0000;">1</span>
<span style="color: #ff0000;">4</span>   <span style="color: #ff0000;">4</span>       <span style="color: #ff0000;">1</span>
<span style="color: #ff0000;">5</span>   <span style="color: #ff0000;">5</span>       <span style="color: #ff0000;">1</span>
<span style="color: #ff0000;">6</span>   <span style="color: #ff0000;">6</span>       <span style="color: #ff0000;">2</span>
<span style="color: #ff0000;">7</span>   <span style="color: #ff0000;">7</span>       <span style="color: #ff0000;">2</span>
<span style="color: #ff0000;">8</span>   <span style="color: #ff0000;">8</span>       <span style="color: #ff0000;">2</span>
<span style="color: #ff0000;">9</span>   <span style="color: #ff0000;">9</span>       <span style="color: #ff0000;">2</span>
<span style="color: #ff0000;">10</span> <span style="color: #ff0000;">10</span>       <span style="color: #ff0000;">2</span>
<span style="color: #ff0000;">11</span> <span style="color: #ff0000;">11</span>       <span style="color: #ff0000;">3</span>
<span style="color: #ff0000;">12</span> <span style="color: #ff0000;">12</span>       <span style="color: #ff0000;">3</span>
<span style="color: #ff0000;">13</span> <span style="color: #ff0000;">13</span>       <span style="color: #ff0000;">3</span>
<span style="color: #ff0000;">14</span> <span style="color: #ff0000;">14</span>       <span style="color: #ff0000;">3</span>
<span style="color: #ff0000;">15</span> <span style="color: #ff0000;">15</span>       <span style="color: #ff0000;">3</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span><span style="color: #080;">;</span> <span style="color: #0000FF; font-weight: bold;">tapply</span><span style="color: #080;">&#40;</span>dat$x, dat$stratum, <span style="color: #0000FF; font-weight: bold;">sample</span>, size <span style="color: #080;">=</span> <span style="color: #ff0000;">3</span><span style="color: #080;">&#41;</span>
$`<span style="color: #ff0000;">1</span>`
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">5</span> <span style="color: #ff0000;">4</span>
&nbsp;
$`<span style="color: #ff0000;">2</span>`
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">10</span>  <span style="color: #ff0000;">6</span>  <span style="color: #ff0000;">8</span>
&nbsp;
$`<span style="color: #ff0000;">3</span>`
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">15</span> <span style="color: #ff0000;">13</span> <span style="color: #ff0000;">12</span>
&nbsp;
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Compare with what we get when we sample individually:</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span><span style="color: #080;">;</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #ff0000;">5</span>, <span style="color: #ff0000;">3</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">5</span> <span style="color: #ff0000;">4</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span><span style="color: #080;">;</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">6</span><span style="color: #080;">:</span><span style="color: #ff0000;">10</span>, <span style="color: #ff0000;">3</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span>  <span style="color: #ff0000;">7</span> <span style="color: #ff0000;">10</span>  <span style="color: #ff0000;">9</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span><span style="color: #080;">;</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">11</span><span style="color: #080;">:</span><span style="color: #ff0000;">15</span>, <span style="color: #ff0000;">3</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">12</span> <span style="color: #ff0000;">15</span> <span style="color: #ff0000;">14</span></pre></td></tr></table></div><p>I&#8217;m sure there&#8217;s some sampling theory that explains this, or at least something about how R treats its data, but at the moment, that&#8217;s beyond my humble level of expertise.</p><h2>Stratified sampling, Mr. DWAB style&#8230;</h2><p>The solution I arrived at is to use &#8220;unstack()&#8221; and a few conditional loops to take the samples.</p><p>And, without more rambling, here&#8217;s what I came up with.</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1141code11'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p114111"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
</pre></td><td
class="code" id="p1141code11"><pre class="rsplus" style="font-family:monospace;">stratified <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">function</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span>, size, seed, dframe<span style="color: #080;">=</span>FALSE, ...<span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
         <span style="color: #228B22;"># USE: Start with a data frame with your cases in one column and your</span>
         <span style="color: #228B22;"># groups in another column. Decide on if you want to use a seed or not. </span>
         <span style="color: #228B22;"># If not, seed should be &quot;NO&quot; (with quotes). Decide on if you want your</span>
         <span style="color: #228B22;"># output as a data frame or not; by default, dframe is set to &quot;FALSE&quot;.</span>
         <span style="color: #228B22;"># To take a sample proportional to the population size in each group,</span>
         <span style="color: #228B22;"># enter &quot;size&quot; as a decimal. Otherwise, enter size as a whole number.</span>
         <span style="color: #228B22;">#</span>
         <span style="color: #228B22;"># Example 1a: To sample 10% of each group from a data frame named &quot;z&quot;</span>
         <span style="color: #228B22;"># and using a seed of &quot;1&quot;, use: &gt; stratified(z, .1, 1)</span>
         <span style="color: #228B22;"># Example 1b: To run the same sample as above but display the result as</span>
         <span style="color: #228B22;"># a data frame, use: &gt; stratified(z, .1, 1, T)</span>
         <span style="color: #228B22;">#</span>
         <span style="color: #228B22;"># Example 2: To sample 10% of each group from a data frame named &quot;z&quot;</span>
         <span style="color: #228B22;"># and using no seed, use: &gt; stratified(z, .1, &quot;NO&quot;)</span>
         <span style="color: #228B22;">#</span>
         <span style="color: #228B22;"># Example 3: To sample 5 from each group from a data frame named &quot;z&quot;</span>
         <span style="color: #228B22;"># and using a seed of 30, use: &gt; stratified(z, 5, 30)</span>
         <span style="color: #228B22;">#</span>
         <span style="color: #228B22;"># NOTE: Not recommended for datasets with LOTS of groups or with HUGE</span>
         <span style="color: #228B22;"># differences in group sizes.</span>
  k <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">unstack</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">df</span><span style="color: #080;">&#41;</span>
  <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>dframe <span style="color: #080;">==</span> FALSE<span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
    <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>seed <span style="color: #080;">==</span> <span style="color: #ff0000;">&quot;NO&quot;</span> <span style="color: #080;">&amp;</span> size <span style="color: #080;">&lt;</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        n <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">round</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>N<span style="color: #080;">&#41;</span><span style="color: #080;">*</span>size<span style="color: #080;">&#41;</span>
        pre <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">structure</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Population Size&quot;</span> <span style="color: #080;">=</span>
                             <span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Sample Size&quot;</span> <span style="color: #080;">=</span> n, Seed <span style="color: #080;">=</span> seed,
                             Sample <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, n, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>, <span style="color: #0000FF; font-weight: bold;">class</span> <span style="color: #080;">=</span> <span style="color: #ff0000;">&quot;power.htest&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>pre<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>seed <span style="color: #080;">==</span> <span style="color: #ff0000;">&quot;NO&quot;</span> <span style="color: #080;">&amp;</span> size <span style="color: #080;">&gt;=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        pre <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">structure</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Population Size&quot;</span> <span style="color: #080;">=</span>
                             <span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Sample Size&quot;</span> <span style="color: #080;">=</span> size, Seed <span style="color: #080;">=</span> seed,
                             Sample <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, size, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>,
                             <span style="color: #0000FF; font-weight: bold;">class</span> <span style="color: #080;">=</span> <span style="color: #ff0000;">&quot;power.htest&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>pre<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>size <span style="color: #080;">&lt;</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span>seed<span style="color: #080;">&#41;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        n <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">round</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>N<span style="color: #080;">&#41;</span><span style="color: #080;">*</span>size<span style="color: #080;">&#41;</span>
        pre <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">structure</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Population Size&quot;</span> <span style="color: #080;">=</span>
                             <span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Sample Size&quot;</span> <span style="color: #080;">=</span> n, Seed <span style="color: #080;">=</span> seed,
                             Sample <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, n, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>, <span style="color: #0000FF; font-weight: bold;">class</span> <span style="color: #080;">=</span> <span style="color: #ff0000;">&quot;power.htest&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>pre<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>size <span style="color: #080;">&gt;=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span>seed<span style="color: #080;">&#41;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        pre <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">structure</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">list</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Population Size&quot;</span> <span style="color: #080;">=</span>
                             <span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">&quot;Sample Size&quot;</span> <span style="color: #080;">=</span> size, Seed <span style="color: #080;">=</span> seed,
                             Sample <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, size, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>,
                             <span style="color: #0000FF; font-weight: bold;">class</span> <span style="color: #080;">=</span> <span style="color: #ff0000;">&quot;power.htest&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>pre<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span>
  <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #080;">&#123;</span>
    <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>seed <span style="color: #080;">==</span> <span style="color: #ff0000;">&quot;NO&quot;</span> <span style="color: #080;">&amp;</span> size <span style="color: #080;">&lt;</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        n <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">round</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>N<span style="color: #080;">&#41;</span><span style="color: #080;">*</span>size<span style="color: #080;">&#41;</span>
        res <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, n, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span>, <span style="color: #ff0000;">&quot;Samples&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>seed <span style="color: #080;">==</span> <span style="color: #ff0000;">&quot;NO&quot;</span> <span style="color: #080;">&amp;</span> size <span style="color: #080;">&gt;=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        res <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, size, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span>, <span style="color: #ff0000;">&quot;Samples&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>size <span style="color: #080;">&lt;</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span>seed<span style="color: #080;">&#41;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        n <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">round</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>N<span style="color: #080;">&#41;</span><span style="color: #080;">*</span>size<span style="color: #080;">&#41;</span>
        res <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, n, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span>, <span style="color: #ff0000;">&quot;Samples&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span> <span style="color: #0000FF; font-weight: bold;">else</span> <span style="color: #0000FF; font-weight: bold;">if</span> <span style="color: #080;">&#40;</span>size <span style="color: #080;">&gt;=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
      <span style="color: #0000FF; font-weight: bold;">for</span> <span style="color: #080;">&#40;</span>i <span style="color: #0000FF; font-weight: bold;">in</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #0000FF; font-weight: bold;">length</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span> <span style="color: #080;">&#123;</span>
        <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span>seed<span style="color: #080;">&#41;</span>
        N <span style="color: #080;">=</span> k<span style="color: #080;">&#91;</span><span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#93;</span>
        res <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>k<span style="color: #080;">&#91;</span>i<span style="color: #080;">&#93;</span><span style="color: #080;">&#41;</span>, <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span>N, size, ...<span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">names</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;Group&quot;</span>, <span style="color: #ff0000;">&quot;Samples&quot;</span><span style="color: #080;">&#41;</span>
        <span style="color: #0000FF; font-weight: bold;">print</span><span style="color: #080;">&#40;</span>res<span style="color: #080;">&#41;</span>
      <span style="color: #080;">&#125;</span>
    <span style="color: #080;">&#125;</span>
  <span style="color: #080;">&#125;</span>
<span style="color: #080;">&#125;</span></pre></td></tr></table></div><p>You can load the function by typing:</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1141code12'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p114112"><td
class="line_numbers"><pre>1
</pre></td><td
class="code" id="p1141code12"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">source</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;http://news.mrdwab.com/stratified-beta&quot;</span><span style="color: #080;">&#41;</span></pre></td></tr></table></div><h2>And now, to test it&#8230;</h2><p>Let&#8217;s generate some dummy data and see what we can come up with. The function takes the following arguments (in the following order):</p><ul><li><code>df</code>: The source data frame, with the first column being the IDs and the second column being the groups.</li><li><code>size</code>: The sample size you want, either as a percentage (for proportional sampling&#8211;expressed as a decimal) or as a whole number.</li><li><code>seed</code>: The seed you want to use. If you don&#8217;t want to use a seed, enter &#8220;NO&#8221;.</li><li><code>dframe</code>: What format you want the output in, either a list or a data frame. Defaults to a list (<code>dframe=FALSE</code>), which is better at the moment since the data frame option is not working the way I expect it to yet.</li></ul><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1141code13'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p114113"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
</pre></td><td
class="code" id="p1141code13"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Generate some data</span>
<span style="color: #080;">&gt;</span> a <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #ff0000;">100</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">123</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> b <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">sample</span><span style="color: #080;">&#40;</span><span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;a&quot;</span>, <span style="color: #ff0000;">&quot;b&quot;</span>, <span style="color: #ff0000;">&quot;c&quot;</span>, <span style="color: #ff0000;">&quot;d&quot;</span><span style="color: #080;">&#41;</span>, <span style="color: #ff0000;">100</span>, <span style="color: #0000FF; font-weight: bold;">replace</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">T</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> z <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">data.<span style="">frame</span></span><span style="color: #080;">&#40;</span>a, b<span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Check how big each group is</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">table</span><span style="color: #080;">&#40;</span>z$b<span style="color: #080;">&#41;</span>
&nbsp;
 a  b  <span style="color: #0000FF; font-weight: bold;">c</span>  d
<span style="color: #ff0000;">26</span> <span style="color: #ff0000;">27</span> <span style="color: #ff0000;">20</span> <span style="color: #ff0000;">27</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Make sure the function is loaded before you continue!</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># source(&quot;http://news.mrdwab.com/stratified-beta&quot;)</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Take a 15% sample and use a seed of 1</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span>z, .15, <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span>
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> a
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">26</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">4</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">38</span>, <span style="color: #ff0000;">45</span>, <span style="color: #ff0000;">54</span>, <span style="color: #ff0000;">81</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> b
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">27</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">4</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">39</span>, <span style="color: #ff0000;">43</span>, <span style="color: #ff0000;">60</span>, <span style="color: #ff0000;">79</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">c</span>
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">20</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">3</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">23</span>, <span style="color: #ff0000;">26</span>, <span style="color: #ff0000;">33</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> d
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">27</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">4</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">21</span>, <span style="color: #ff0000;">31</span>, <span style="color: #ff0000;">53</span>, <span style="color: #ff0000;">71</span>
&nbsp;
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Take a sample of 5 from each group and use a seed of 1</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span>z, <span style="color: #ff0000;">5</span>, <span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span>
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> a
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">26</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">5</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">38</span>, <span style="color: #ff0000;">45</span>, <span style="color: #ff0000;">54</span>, <span style="color: #ff0000;">81</span>, <span style="color: #ff0000;">30</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> b
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">27</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">5</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">39</span>, <span style="color: #ff0000;">43</span>, <span style="color: #ff0000;">60</span>, <span style="color: #ff0000;">79</span>, <span style="color: #ff0000;">19</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">c</span>
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">20</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">5</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">23</span>, <span style="color: #ff0000;">26</span>, <span style="color: #ff0000;">33</span>, <span style="color: #ff0000;">78</span>, <span style="color: #ff0000;">14</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> d
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">27</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">5</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">21</span>, <span style="color: #ff0000;">31</span>, <span style="color: #ff0000;">53</span>, <span style="color: #ff0000;">71</span>, <span style="color: #ff0000;">11</span>
&nbsp;
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Take a sample of 15 from each group, with replacement, and a seed of 1</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span>z, <span style="color: #ff0000;">15</span>, <span style="color: #ff0000;">1</span>, <span style="color: #0000FF; font-weight: bold;">replace</span><span style="color: #080;">=</span><span style="color: #0000FF; font-weight: bold;">T</span><span style="color: #080;">&#41;</span>
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> a
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">26</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">15</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">38</span>, <span style="color: #ff0000;">45</span>, <span style="color: #ff0000;">56</span>, <span style="color: #ff0000;">91</span>, <span style="color: #ff0000;">35</span>, <span style="color: #ff0000;">91</span>, <span style="color: #ff0000;">96</span>, <span style="color: #ff0000;">74</span>, <span style="color: #ff0000;">62</span>, <span style="color: #ff0000;">15</span>, <span style="color: #ff0000;">35</span>, <span style="color: #ff0000;">30</span>, <span style="color: #ff0000;">74</span>, <span style="color: #ff0000;">45</span>, <span style="color: #ff0000;">81</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> b
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">27</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">15</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">39</span>, <span style="color: #ff0000;">44</span>, <span style="color: #ff0000;">63</span>, <span style="color: #ff0000;">93</span>, <span style="color: #ff0000;">29</span>, <span style="color: #ff0000;">93</span>, <span style="color: #ff0000;">95</span>, <span style="color: #ff0000;">66</span>, <span style="color: #ff0000;">64</span>, <span style="color: #ff0000;">3</span>, <span style="color: #ff0000;">29</span>, <span style="color: #ff0000;">19</span>, <span style="color: #ff0000;">70</span>, <span style="color: #ff0000;">44</span>, <span style="color: #ff0000;">77</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">c</span>
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">20</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">15</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">23</span>, <span style="color: #ff0000;">26</span>, <span style="color: #ff0000;">55</span>, <span style="color: #ff0000;">94</span>, <span style="color: #ff0000;">22</span>, <span style="color: #ff0000;">92</span>, <span style="color: #ff0000;">94</span>, <span style="color: #ff0000;">72</span>, <span style="color: #ff0000;">61</span>, <span style="color: #ff0000;">9</span>, <span style="color: #ff0000;">22</span>, <span style="color: #ff0000;">14</span>, <span style="color: #ff0000;">72</span>, <span style="color: #ff0000;">26</span>, <span style="color: #ff0000;">78</span>
&nbsp;
&nbsp;
&nbsp;
&nbsp;
          Group <span style="color: #080;">=</span> d
Population Size <span style="color: #080;">=</span> <span style="color: #ff0000;">27</span>
    Sample Size <span style="color: #080;">=</span> <span style="color: #ff0000;">15</span>
           Seed <span style="color: #080;">=</span> <span style="color: #ff0000;">1</span>
         Sample <span style="color: #080;">=</span> <span style="color: #ff0000;">21</span>, <span style="color: #ff0000;">32</span>, <span style="color: #ff0000;">58</span>, <span style="color: #ff0000;">88</span>, <span style="color: #ff0000;">16</span>, <span style="color: #ff0000;">88</span>, <span style="color: #ff0000;">89</span>, <span style="color: #ff0000;">65</span>, <span style="color: #ff0000;">59</span>, <span style="color: #ff0000;">4</span>, <span style="color: #ff0000;">16</span>, <span style="color: #ff0000;">11</span>, <span style="color: #ff0000;">67</span>, <span style="color: #ff0000;">32</span>, <span style="color: #ff0000;">69</span>
&nbsp;
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Take a sample of 10% from each group, using a seed of 1,</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># and display the output as a data frame</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span>z, .1, <span style="color: #ff0000;">1</span>, dframe<span style="color: #080;">=</span><span style="color: #0000FF; font-weight: bold;">T</span><span style="color: #080;">&#41;</span>
  Group Samples
<span style="color: #ff0000;">1</span>     a      <span style="color: #ff0000;">38</span>
<span style="color: #ff0000;">2</span>     a      <span style="color: #ff0000;">45</span>
<span style="color: #ff0000;">3</span>     a      <span style="color: #ff0000;">54</span>
  Group Samples
<span style="color: #ff0000;">1</span>     b      <span style="color: #ff0000;">39</span>
<span style="color: #ff0000;">2</span>     b      <span style="color: #ff0000;">43</span>
<span style="color: #ff0000;">3</span>     b      <span style="color: #ff0000;">60</span>
  Group Samples
<span style="color: #ff0000;">1</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">23</span>
<span style="color: #ff0000;">2</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">26</span>
  Group Samples
<span style="color: #ff0000;">1</span>     d      <span style="color: #ff0000;">21</span>
<span style="color: #ff0000;">2</span>     d      <span style="color: #ff0000;">31</span>
<span style="color: #ff0000;">3</span>     d      <span style="color: #ff0000;">53</span></pre></td></tr></table></div><h2>Replicating the results from tapply()</h2><p>I mentioned earlier that the results are different from what you would get if you were to use the <code>tapply()</code> function. However, it is easy to get the same results using this <code>stratified</code> function&#8211;simply move your &#8220;<code>seed</code>&#8221; outside of the function (enter seed as <code>"NO"</code> [with quotes] and instead, use <code>set.seed()</code> as you normally would).</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1141code14'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p114114"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
</pre></td><td
class="code" id="p1141code14"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> <span style="color: #228B22;"># See what tapply() gives us</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span><span style="color: #080;">;</span> <span style="color: #0000FF; font-weight: bold;">tapply</span><span style="color: #080;">&#40;</span>z$a, z$b, <span style="color: #0000FF; font-weight: bold;">sample</span>, size <span style="color: #080;">=</span> <span style="color: #ff0000;">4</span><span style="color: #080;">&#41;</span>
$a
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">38</span> <span style="color: #ff0000;">45</span> <span style="color: #ff0000;">54</span> <span style="color: #ff0000;">81</span>
&nbsp;
$b
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">29</span> <span style="color: #ff0000;">86</span> <span style="color: #ff0000;">95</span> <span style="color: #ff0000;">63</span>
&nbsp;
$c
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">61</span>  <span style="color: #ff0000;">9</span> <span style="color: #ff0000;">14</span> <span style="color: #ff0000;">92</span>
&nbsp;
$d
<span style="color: #080;">&#91;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#93;</span> <span style="color: #ff0000;">67</span> <span style="color: #ff0000;">31</span> <span style="color: #ff0000;">68</span> <span style="color: #ff0000;">34</span>
&nbsp;
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># The normal usage for the stratified function</span>
<span style="color: #080;">&gt;</span> stratified<span style="color: #080;">&#40;</span>z, <span style="color: #ff0000;">4</span>, <span style="color: #ff0000;">1</span>, dframe <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">T</span><span style="color: #080;">&#41;</span>
  Group Samples
<span style="color: #ff0000;">1</span>     a      <span style="color: #ff0000;">38</span>
<span style="color: #ff0000;">2</span>     a      <span style="color: #ff0000;">45</span>
<span style="color: #ff0000;">3</span>     a      <span style="color: #ff0000;">54</span>
<span style="color: #ff0000;">4</span>     a      <span style="color: #ff0000;">81</span>
  Group Samples
<span style="color: #ff0000;">1</span>     b      <span style="color: #ff0000;">39</span>
<span style="color: #ff0000;">2</span>     b      <span style="color: #ff0000;">43</span>
<span style="color: #ff0000;">3</span>     b      <span style="color: #ff0000;">60</span>
<span style="color: #ff0000;">4</span>     b      <span style="color: #ff0000;">79</span>
  Group Samples
<span style="color: #ff0000;">1</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">23</span>
<span style="color: #ff0000;">2</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">26</span>
<span style="color: #ff0000;">3</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">33</span>
<span style="color: #ff0000;">4</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">78</span>
  Group Samples
<span style="color: #ff0000;">1</span>     d      <span style="color: #ff0000;">21</span>
<span style="color: #ff0000;">2</span>     d      <span style="color: #ff0000;">31</span>
<span style="color: #ff0000;">3</span>     d      <span style="color: #ff0000;">53</span>
<span style="color: #ff0000;">4</span>     d      <span style="color: #ff0000;">71</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Getting the same results as tapply()</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Set the seed before using the function,</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># and set the seed for the function as &quot;NO&quot;</span>
<span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">set.<span style="">seed</span></span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">1</span><span style="color: #080;">&#41;</span><span style="color: #080;">;</span> stratified<span style="color: #080;">&#40;</span>z, <span style="color: #ff0000;">4</span>, <span style="color: #ff0000;">&quot;NO&quot;</span>, dframe <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">T</span><span style="color: #080;">&#41;</span>
  Group Samples
<span style="color: #ff0000;">1</span>     a      <span style="color: #ff0000;">38</span>
<span style="color: #ff0000;">2</span>     a      <span style="color: #ff0000;">45</span>
<span style="color: #ff0000;">3</span>     a      <span style="color: #ff0000;">54</span>
<span style="color: #ff0000;">4</span>     a      <span style="color: #ff0000;">81</span>
  Group Samples
<span style="color: #ff0000;">1</span>     b      <span style="color: #ff0000;">29</span>
<span style="color: #ff0000;">2</span>     b      <span style="color: #ff0000;">86</span>
<span style="color: #ff0000;">3</span>     b      <span style="color: #ff0000;">95</span>
<span style="color: #ff0000;">4</span>     b      <span style="color: #ff0000;">63</span>
  Group Samples
<span style="color: #ff0000;">1</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">61</span>
<span style="color: #ff0000;">2</span>     <span style="color: #0000FF; font-weight: bold;">c</span>       <span style="color: #ff0000;">9</span>
<span style="color: #ff0000;">3</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">14</span>
<span style="color: #ff0000;">4</span>     <span style="color: #0000FF; font-weight: bold;">c</span>      <span style="color: #ff0000;">92</span>
  Group Samples
<span style="color: #ff0000;">1</span>     d      <span style="color: #ff0000;">67</span>
<span style="color: #ff0000;">2</span>     d      <span style="color: #ff0000;">31</span>
<span style="color: #ff0000;">3</span>     d      <span style="color: #ff0000;">68</span>
<span style="color: #ff0000;">4</span>     d      <span style="color: #ff0000;">34</span></pre></td></tr></table></div><h2>The unfortunate&#8230;</h2><p>There are some advantages to each of the output formats. I&#8217;ve set up the list to be quite verbose, which is useful with the proportionate sampling since it shows us how many samples have been taken from each group. The data frame output format, on the other hand, is quite compact.</p><p>What I still need to figure out, though, is why R won&#8217;t store my output. I suspect that it has something to do with how my loops are set up. I assume that somewhere, I need to add something like an rbind command.</p><p>When the time is right, I will be sure to post what I&#8217;ve found.</p> <div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/2657Productions?a=-wyu5FBhqlg:wjrKfapmaSY:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=-wyu5FBhqlg:wjrKfapmaSY:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=-wyu5FBhqlg:wjrKfapmaSY:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=-wyu5FBhqlg:wjrKfapmaSY:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=-wyu5FBhqlg:wjrKfapmaSY:gIN9vFwOqvQ" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=-wyu5FBhqlg:wjrKfapmaSY:TzevzKxY174"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=TzevzKxY174" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=-wyu5FBhqlg:wjrKfapmaSY:bAAVSKdHlY4"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=-wyu5FBhqlg:wjrKfapmaSY:bAAVSKdHlY4" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/2657Productions/~4/-wyu5FBhqlg" height="1" width="1"/>]]></content:encoded> <wfw:commentRss>http://news.mrdwab.com/2011/05/15/stratified-random-sampling-in-r-beta/feed/</wfw:commentRss> <slash:comments>0</slash:comments> <feedburner:origLink>http://news.mrdwab.com/2011/05/15/stratified-random-sampling-in-r-beta/</feedburner:origLink></item> <item><title>Reshaping data in R revisited</title><link>http://feedproxy.google.com/~r/2657Productions/~3/sZV-XYjPD1k/</link> <comments>http://news.mrdwab.com/2011/04/18/reshaping-data-in-r-revisited/#comments</comments> <pubDate>Mon, 18 Apr 2011 04:15:17 +0000</pubDate> <dc:creator>Ananda</dc:creator> <category><![CDATA[Geekiness]]></category> <category><![CDATA[Useless Knowledge]]></category> <category><![CDATA[code]]></category> <category><![CDATA[data manipulation]]></category> <category><![CDATA[R]]></category> <category><![CDATA[reshape]]></category> <category><![CDATA[Stata]]></category> <guid isPermaLink="false">http://news.mrdwab.com/?p=1116</guid> <description><![CDATA[A year ago, I wrote a post about reshaping data from a wide format to a long format. I thought that considering how much time had passed, it would be good to revisit R&#8217;s in-built reshape functions. For these examples, I&#8217;ve copied the Stata examples from the UCLA Academic Technology Services&#8217;s &#8220;Reshape data wide to [...]]]></description> <content:encoded><![CDATA[<p>A year ago, I wrote a post about reshaping data from a wide format to a long format. I thought that considering how much time had passed, it would be good to revisit R&#8217;s in-built reshape functions.</p><p>For these examples, I&#8217;ve copied the Stata examples from the UCLA Academic Technology Services&#8217;s <a
href="http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm">&#8220;Reshape data wide to long&#8221;</a> page. Since the data is provided in Stata dta files, you need to first load the &#8220;foreign&#8221; package to be able to read the data in R.</p><p><span
id="more-1116"></span></p><p>This first example is very basic. There are four variables, the first one being the unique id, and the remaining three being the measures over three years.</p><p>The basic reshape command in R needs you to specify the data that is being reshaped, the ultimate &#8220;direction&#8221; (wide or long), and which variables are the ones to be reshaped. The default character R expects for &#8220;sep&#8221; is a period&#8211;in other words, it expects that your variables are named in the form of &#8220;faminc.96&#8243; and so on. However, your variables may be named in other ways, for example &#8220;faminc-96&#8243;, &#8220;faminc_96&#8243;, or (as in this example) &#8220;faminc96&#8243;. If your variable naming pattern is anything other than what R expects as its default, you also need to specify the separating character. In the case of this dataset, there is no separating character, so you simply use <code>sep=""</code>.</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1116code20'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p111620"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
</pre></td><td
class="code" id="p1116code20"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> <span style="color: #0000FF; font-weight: bold;">library</span><span style="color: #080;">&#40;</span>foreign<span style="color: #080;">&#41;</span> <span style="color: #228B22;"># Lets us use Stata files directly</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Example 1: Very basic reshape</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Use &quot;read.dta&quot; instead of &quot;read.csv&quot; or &quot;read.table&quot;</span>
<span style="color: #080;">&gt;</span> faminc <span style="color: #080;">=</span> read.<span style="">dta</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;http://www.ats.ucla.edu/stat/stata/modules/faminc.dta&quot;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> faminc
  famid faminc96 faminc97 faminc98
<span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">3</span>    <span style="color: #ff0000;">75000</span>    <span style="color: #ff0000;">76000</span>    <span style="color: #ff0000;">77000</span>
<span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">1</span>    <span style="color: #ff0000;">40000</span>    <span style="color: #ff0000;">40500</span>    <span style="color: #ff0000;">41000</span>
<span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">2</span>    <span style="color: #ff0000;">45000</span>    <span style="color: #ff0000;">45400</span>    <span style="color: #ff0000;">45800</span>
<span style="color: #080;">&gt;</span> l.<span style="">faminc</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">reshape</span><span style="color: #080;">&#40;</span>faminc, direction<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;long&quot;</span>, varying<span style="color: #080;">=</span><span style="color: #ff0000;">2</span><span style="color: #080;">:</span><span style="color: #ff0000;">4</span>, sep<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;&quot;</span>, idvar<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;famid&quot;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> l.<span style="">faminc</span><span style="color: #080;">&#91;</span><span style="color: #0000FF; font-weight: bold;">order</span><span style="color: #080;">&#40;</span>l.<span style="">faminc</span>$famid<span style="color: #080;">&#41;</span>,<span style="color: #080;">&#93;</span>
     famid <span style="color: #0000FF; font-weight: bold;">time</span> faminc
<span style="color: #ff0000;">1.96</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">96</span>  <span style="color: #ff0000;">40000</span>
<span style="color: #ff0000;">1.97</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">97</span>  <span style="color: #ff0000;">40500</span>
<span style="color: #ff0000;">1.98</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">98</span>  <span style="color: #ff0000;">41000</span>
<span style="color: #ff0000;">2.96</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">96</span>  <span style="color: #ff0000;">45000</span>
<span style="color: #ff0000;">2.97</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">97</span>  <span style="color: #ff0000;">45400</span>
<span style="color: #ff0000;">2.98</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">98</span>  <span style="color: #ff0000;">45800</span>
<span style="color: #ff0000;">3.96</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">96</span>  <span style="color: #ff0000;">75000</span>
<span style="color: #ff0000;">3.97</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">97</span>  <span style="color: #ff0000;">76000</span>
<span style="color: #ff0000;">3.98</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">98</span>  <span style="color: #ff0000;">77000</span></pre></td></tr></table></div><p>In the second example at UCLA ATS, the unique identifier is the combination of the first two variables. Since R assumes that whatever you have not specified as varying is going to be your identifying variable, it is not always required that you specify anything for &#8220;idvar&#8221;.</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1116code21'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p111621"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
</pre></td><td
class="code" id="p1116code21"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Example 2: Two identifying variables</span>
<span style="color: #080;">&gt;</span> kidshtwt <span style="color: #080;">=</span> read.<span style="">dta</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;http://www.ats.ucla.edu/stat/stata/modules/kidshtwt.dta&quot;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> kidshtwt
  famid birth ht1 ht2 wt1 wt2
<span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.8</span> <span style="color: #ff0000;">3.4</span>  <span style="color: #ff0000;">19</span>  <span style="color: #ff0000;">28</span>
<span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.9</span> <span style="color: #ff0000;">3.8</span>  <span style="color: #ff0000;">21</span>  <span style="color: #ff0000;">28</span>
<span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">3</span> <span style="color: #ff0000;">2.2</span> <span style="color: #ff0000;">2.9</span>  <span style="color: #ff0000;">20</span>  <span style="color: #ff0000;">23</span>
<span style="color: #ff0000;">4</span>     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.0</span> <span style="color: #ff0000;">3.2</span>  <span style="color: #ff0000;">25</span>  <span style="color: #ff0000;">30</span>
<span style="color: #ff0000;">5</span>     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">1.8</span> <span style="color: #ff0000;">2.8</span>  <span style="color: #ff0000;">20</span>  <span style="color: #ff0000;">33</span>
<span style="color: #ff0000;">6</span>     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">3</span> <span style="color: #ff0000;">1.9</span> <span style="color: #ff0000;">2.4</span>  <span style="color: #ff0000;">22</span>  <span style="color: #ff0000;">33</span>
<span style="color: #ff0000;">7</span>     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.2</span> <span style="color: #ff0000;">3.3</span>  <span style="color: #ff0000;">22</span>  <span style="color: #ff0000;">28</span>
<span style="color: #ff0000;">8</span>     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.3</span> <span style="color: #ff0000;">3.4</span>  <span style="color: #ff0000;">20</span>  <span style="color: #ff0000;">30</span>
<span style="color: #ff0000;">9</span>     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">3</span> <span style="color: #ff0000;">2.1</span> <span style="color: #ff0000;">2.9</span>  <span style="color: #ff0000;">22</span>  <span style="color: #ff0000;">31</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Note the use of &quot;timevar&quot; to name the &quot;times&quot; column more appropriately.</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Also, we want to exclude the weight data from our reshape.</span>
<span style="color: #080;">&gt;</span> l.<span style="">kidsht</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">reshape</span><span style="color: #080;">&#40;</span>kidshtwt<span style="color: #080;">&#91;</span><span style="color: #080;">-</span><span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">5</span>, <span style="color: #ff0000;">6</span><span style="color: #080;">&#41;</span><span style="color: #080;">&#93;</span>, direction<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;long&quot;</span>, varying<span style="color: #080;">=</span><span style="color: #ff0000;">3</span><span style="color: #080;">:</span><span style="color: #ff0000;">4</span>, sep<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;&quot;</span>,
<span style="color: #080;">+</span>                    idvar<span style="color: #080;">=</span><span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #ff0000;">2</span>, timevar<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;age&quot;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># There are other ways to do that previous step. Here is one:</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;">#      &gt; l.kidsht = reshape(kidshtwt, direction=&quot;long&quot;, idvar=1:2,</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;">#      +                    varying=3:4, drop=5:6, sep=&quot;&quot;, timevar=&quot;age&quot;)</span>
<span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Let's sort the data first by the family id then by the birth order</span>
<span style="color: #080;">&gt;</span> l.<span style="">kidsht</span><span style="color: #080;">&#91;</span><span style="color: #0000FF; font-weight: bold;">order</span><span style="color: #080;">&#40;</span>l.<span style="">kidsht</span>$famid, l.<span style="">kidsht</span>$birth<span style="color: #080;">&#41;</span>,<span style="color: #080;">&#93;</span>
      famid birth age  ht
1.1.1     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.8</span>
1.1.2     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.4</span>
1.2.1     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.9</span>
1.2.2     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.8</span>
1.3.1     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.2</span>
1.3.2     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.9</span>
2.1.1     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.0</span>
2.1.2     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.2</span>
2.2.1     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">1.8</span>
2.2.2     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.8</span>
2.3.1     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">1.9</span>
2.3.2     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.4</span>
3.1.1     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.2</span>
3.1.2     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.3</span>
3.2.1     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.3</span>
3.2.2     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.4</span>
3.3.1     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.1</span>
3.3.2     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.9</span></pre></td></tr></table></div><p>The third example at UCLA&#8217;s page is pretty straightforward. It uses the same data we just loaded in example 2, but we are reshaping all four measured variables.</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1116code22'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p111622"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
</pre></td><td
class="code" id="p1116code22"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> l.<span style="">kidshtwt</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">reshape</span><span style="color: #080;">&#40;</span>kidshtwt, direction<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;long&quot;</span>, idvar<span style="color: #080;">=</span><span style="color: #ff0000;">1</span><span style="color: #080;">:</span><span style="color: #ff0000;">2</span>,varying<span style="color: #080;">=</span><span style="color: #ff0000;">3</span><span style="color: #080;">:</span><span style="color: #ff0000;">6</span>,
<span style="color: #080;">+</span>                      sep<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;&quot;</span>, timevar<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;age&quot;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> l.<span style="">kidshtwt</span><span style="color: #080;">&#91;</span><span style="color: #0000FF; font-weight: bold;">order</span><span style="color: #080;">&#40;</span>l.<span style="">kidshtwt</span>$famid, l.<span style="">kidshtwt</span>$birth<span style="color: #080;">&#41;</span>,<span style="color: #080;">&#93;</span>
      famid birth age  ht wt
1.1.1     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.8</span> <span style="color: #ff0000;">19</span>
1.1.2     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.4</span> <span style="color: #ff0000;">28</span>
1.2.1     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.9</span> <span style="color: #ff0000;">21</span>
1.2.2     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.8</span> <span style="color: #ff0000;">28</span>
1.3.1     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.2</span> <span style="color: #ff0000;">20</span>
1.3.2     <span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.9</span> <span style="color: #ff0000;">23</span>
2.1.1     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.0</span> <span style="color: #ff0000;">25</span>
2.1.2     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.2</span> <span style="color: #ff0000;">30</span>
2.2.1     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">1.8</span> <span style="color: #ff0000;">20</span>
2.2.2     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.8</span> <span style="color: #ff0000;">33</span>
2.3.1     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">1.9</span> <span style="color: #ff0000;">22</span>
2.3.2     <span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.4</span> <span style="color: #ff0000;">33</span>
3.1.1     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.2</span> <span style="color: #ff0000;">22</span>
3.1.2     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">1</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.3</span> <span style="color: #ff0000;">28</span>
3.2.1     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.3</span> <span style="color: #ff0000;">20</span>
3.2.2     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">2</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">3.4</span> <span style="color: #ff0000;">30</span>
3.3.1     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">1</span> <span style="color: #ff0000;">2.1</span> <span style="color: #ff0000;">22</span>
3.3.2     <span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">3</span>   <span style="color: #ff0000;">2</span> <span style="color: #ff0000;">2.9</span> <span style="color: #ff0000;">31</span></pre></td></tr></table></div><p>The fourth example was the most tricky one for me at first. In that example, the variables are not distinuished by &#8220;time&#8221; (numerically) but rather, by a character. As you can see, the variable names are &#8220;famid&#8221;, &#8220;named&#8221;, &#8220;incd&#8221;,  &#8220;namem&#8221;, and &#8220;incm&#8221; &#8212; in other words, income and name for dad (variable ending in &#8220;d&#8221;) and mom (variable ending in &#8220;m&#8221;) for each family.</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1116code23'); return false;">View Code</a> RSPLUS</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p111623"><td
class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
</pre></td><td
class="code" id="p1116code23"><pre class="rsplus" style="font-family:monospace;"><span style="color: #080;">&gt;</span> <span style="color: #228B22;"># Example 3: Non-numeric identifiers for the variables</span>
<span style="color: #080;">&gt;</span> dadmomw <span style="color: #080;">=</span> read.<span style="">dta</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;http://www.ats.ucla.edu/stat/stata/modules/dadmomw.dta&quot;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> dadmomw
  famid named  incd namem  incm
<span style="color: #ff0000;">1</span>     <span style="color: #ff0000;">1</span>  Bill <span style="color: #ff0000;">30000</span>  Bess <span style="color: #ff0000;">15000</span>
<span style="color: #ff0000;">2</span>     <span style="color: #ff0000;">2</span>   Art <span style="color: #ff0000;">22000</span>   Amy <span style="color: #ff0000;">18000</span>
<span style="color: #ff0000;">3</span>     <span style="color: #ff0000;">3</span>  Paul <span style="color: #ff0000;">25000</span>   Pat <span style="color: #ff0000;">50000</span>
<span style="color: #080;">&gt;</span> r.<span style="">dadmomw</span> <span style="color: #080;">=</span> <span style="color: #0000FF; font-weight: bold;">reshape</span><span style="color: #080;">&#40;</span>dadmomw, direction<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;long&quot;</span>, idvar<span style="color: #080;">=</span><span style="color: #ff0000;">1</span>, varying<span style="color: #080;">=</span><span style="color: #ff0000;">2</span><span style="color: #080;">:</span><span style="color: #ff0000;">5</span>,
<span style="color: #080;">+</span>                     sep<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;&quot;</span>, v.<span style="">names</span><span style="color: #080;">=</span><span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;name&quot;</span>, <span style="color: #ff0000;">&quot;inc&quot;</span><span style="color: #080;">&#41;</span>,
<span style="color: #080;">+</span>                     timevar<span style="color: #080;">=</span><span style="color: #ff0000;">&quot;dadmom&quot;</span>, times<span style="color: #080;">=</span><span style="color: #0000FF; font-weight: bold;">c</span><span style="color: #080;">&#40;</span><span style="color: #ff0000;">&quot;dad&quot;</span>, <span style="color: #ff0000;">&quot;mom&quot;</span><span style="color: #080;">&#41;</span><span style="color: #080;">&#41;</span>
<span style="color: #080;">&gt;</span> r.<span style="">dadmomw</span><span style="color: #080;">&#91;</span><span style="color: #0000FF; font-weight: bold;">order</span><span style="color: #080;">&#40;</span>r.<span style="">dadmomw</span>$famid<span style="color: #080;">&#41;</span>,<span style="color: #080;">&#93;</span>
      famid dadmom  name  inc
<span style="color: #ff0000;">1</span>.<span style="">dad</span>     <span style="color: #ff0000;">1</span>    dad <span style="color: #ff0000;">30000</span> Bill
<span style="color: #ff0000;">1</span>.<span style="">mom</span>     <span style="color: #ff0000;">1</span>    mom <span style="color: #ff0000;">15000</span> Bess
<span style="color: #ff0000;">2</span>.<span style="">dad</span>     <span style="color: #ff0000;">2</span>    dad <span style="color: #ff0000;">22000</span>  Art
<span style="color: #ff0000;">2</span>.<span style="">mom</span>     <span style="color: #ff0000;">2</span>    mom <span style="color: #ff0000;">18000</span>  Amy
<span style="color: #ff0000;">3</span>.<span style="">dad</span>     <span style="color: #ff0000;">3</span>    dad <span style="color: #ff0000;">25000</span> Paul
<span style="color: #ff0000;">3</span>.<span style="">mom</span>     <span style="color: #ff0000;">3</span>    mom <span style="color: #ff0000;">50000</span>  Pat</pre></td></tr></table></div><p>Stata&#8217;s commands are certainly more direct (see below for what you would do for the last example in Stata). R&#8217;s commands sometimes tend to be a bit verbose, but in some ways, that might also help you remember what you&#8217;re doing. (I still don&#8217;t know what &#8220;i&#8221; and &#8220;j&#8221; in the Stata reshape commands stand for.) If you can afford the ~ $2,500 price tag, <a
href="http://ekonometrics.blogspot.com/2011/04/speeding-tickets-for-r-and-stata.html">Stata is</a> <a
href="http://ekonometrics.blogspot.com/2011/04/going-over-speed-limit.html">also faster</a>.</p><div
class="wp_codebox_msgheader"><span
class="right"><sup><a
href="http://www.ericbess.com/ericblog/2008/03/03/wp-codebox/#examples" target="_blank" title="WP-CodeBox HowTo?"><span
style="color: #99cc00">?</span></a></sup></span><span
class="left"><a
href="javascript:;" onclick="javascript:showCodeTxt('p1116code24'); return false;">View Code</a> STATA</span><div
class="codebox_clear"></div></div><div
class="wp_codebox"><table><tr
id="p111624"><td
class="line_numbers"><pre>1
2
3
4
5
6
</pre></td><td
class="code" id="p1116code24"><pre class="stata" style="font-family:monospace;">use http://www.ats.ucla.edu/stat/stata/modules/dadmomw, clear
list
use http://www.ats.ucla.edu/stat/stata/modules/dadmomw, clear
list
reshape long name  inc, i(famid) j(dadmom) string
list</pre></td></tr></table></div> <div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/2657Productions?a=sZV-XYjPD1k:k5JVmlvQrg0:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=sZV-XYjPD1k:k5JVmlvQrg0:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=sZV-XYjPD1k:k5JVmlvQrg0:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=sZV-XYjPD1k:k5JVmlvQrg0:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=sZV-XYjPD1k:k5JVmlvQrg0:gIN9vFwOqvQ" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=sZV-XYjPD1k:k5JVmlvQrg0:TzevzKxY174"><img src="http://feeds.feedburner.com/~ff/2657Productions?d=TzevzKxY174" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/2657Productions?a=sZV-XYjPD1k:k5JVmlvQrg0:bAAVSKdHlY4"><img src="http://feeds.feedburner.com/~ff/2657Productions?i=sZV-XYjPD1k:k5JVmlvQrg0:bAAVSKdHlY4" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/2657Productions/~4/sZV-XYjPD1k" height="1" width="1"/>]]></content:encoded> <wfw:commentRss>http://news.mrdwab.com/2011/04/18/reshaping-data-in-r-revisited/feed/</wfw:commentRss> <slash:comments>0</slash:comments> <feedburner:origLink>http://news.mrdwab.com/2011/04/18/reshaping-data-in-r-revisited/</feedburner:origLink></item> </channel> </rss>

