<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" version="2.0">

<channel>
	<title>time to bleed by Joe Damato</title>
	
	<link>http://timetobleed.com</link>
	<description>technical ramblings from a wanna-be unix dinosaur</description>
	<lastBuildDate>Sun, 01 Aug 2010 18:24:35 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.0</generator>
		<atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed" /><feedburner:info xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" uri="timetobleed" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><feedburner:emailServiceId xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">TimeToBleed</feedburner:emailServiceId><feedburner:feedburnerHostname xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">http://feedburner.google.com</feedburner:feedburnerHostname><item>
		<title>Slides from Defcon 18: Function hooking for OSX and Linux</title>
		<link>http://timetobleed.com/slides-from-defcon-18-function-hooking-for-osx-and-linux/</link>
		<comments>http://timetobleed.com/slides-from-defcon-18-function-hooking-for-osx-and-linux/#comments</comments>
		<pubDate>Sun, 01 Aug 2010 18:24:35 +0000</pubDate>
		<dc:creator>Aman Gupta</dc:creator>
				<category><![CDATA[debugging]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[monitoring]]></category>
		<category><![CDATA[osx]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[profiling]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1928</guid>
		<description><![CDATA[Function hooking for OSX and Linux]]></description>
			<content:encoded><![CDATA[<p><a title="View Function hooking for OSX and Linux on Scribd" href="http://www.scribd.com/doc/35191054/Function-hooking-for-OSX-and-Linux" style="margin: 12px auto 6px auto; font-family: Helvetica,Arial,Sans-serif; font-style: normal; font-variant: normal; font-weight: normal; font-size: 14px; line-height: normal; font-size-adjust: none; font-stretch: normal; -x-system-font: none; display: block; text-decoration: underline;">Function hooking for OSX and Linux</a> <object id="doc_42930970869868" name="doc_42930970869868" height="500" width="100%" type="application/x-shockwave-flash" data="http://d1.scribdassets.com/ScribdViewer.swf" style="outline:none;" rel="media:presentation" resource="http://d1.scribdassets.com/ScribdViewer.swf?document_id=35191054&#038;access_key=key-1ffxxqbbglaccfa347qr&#038;page=1&#038;viewMode=slideshow" xmlns:media="http://search.yahoo.com/searchmonkey/media/" xmlns:dc="http://purl.org/dc/terms/" ><param name="movie" value="http://d1.scribdassets.com/ScribdViewer.swf"><param name="wmode" value="opaque"><param name="bgcolor" value="#ffffff"><param name="allowFullScreen" value="true"><param name="allowScriptAccess" value="always"><param name="FlashVars" value="document_id=35191054&#038;access_key=key-1ffxxqbbglaccfa347qr&#038;page=1&#038;viewMode=slideshow"><embed id="doc_42930970869868" name="doc_42930970869868" src="http://d1.scribdassets.com/ScribdViewer.swf?document_id=35191054&#038;access_key=key-1ffxxqbbglaccfa347qr&#038;page=1&#038;viewMode=slideshow" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" height="500" width="100%" wmode="opaque" bgcolor="#ffffff"></embed></object> </p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=sOBoqVFypHE:Wl5ZtXUVCe8:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=sOBoqVFypHE:Wl5ZtXUVCe8:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=sOBoqVFypHE:Wl5ZtXUVCe8:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=sOBoqVFypHE:Wl5ZtXUVCe8:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=sOBoqVFypHE:Wl5ZtXUVCe8:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=sOBoqVFypHE:Wl5ZtXUVCe8:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=sOBoqVFypHE:Wl5ZtXUVCe8:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=sOBoqVFypHE:Wl5ZtXUVCe8:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/sOBoqVFypHE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/slides-from-defcon-18-function-hooking-for-osx-and-linux/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>GCC optimization flag makes your 64bit binary fatter and slower</title>
		<link>http://timetobleed.com/gcc-optimization-flag-makes-your-64bit-binary-fatter-and-slower/</link>
		<comments>http://timetobleed.com/gcc-optimization-flag-makes-your-64bit-binary-fatter-and-slower/#comments</comments>
		<pubDate>Tue, 20 Jul 2010 12:59:53 +0000</pubDate>
		<dc:creator>Joe Damato</dc:creator>
				<category><![CDATA[debugging]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[testing]]></category>
		<category><![CDATA[x86]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1909</guid>
		<description><![CDATA[If you enjoy this article, subscribe (via RSS or e-mail) and follow me on twitter. The intention of this post is to highlight a subtle GCC optimization bug that leads to slower and larger code being generated than would have been generated without the optimization flag. UPDATED: Graphs are now 0 based on the y [...]]]></description>
			<content:encoded><![CDATA[<p><center><img src="http://timetobleed.com/images/large_bug.jpg" alt="" width="300" height="400" /></center><br />
If you enjoy this article, <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<p>The intention of this post is to highlight a subtle GCC optimization bug that leads to slower and larger code being generated than would have been generated without the optimization flag.</p>
<h2>UPDATED: Graphs are now 0 based on the y axis. Links in the tidbits section (below conclusion) for my ugly test harness and terminal session of the build of the test case in the bug report, objdump, and corresponding system information.</h2>
<h2>Hold the #gccfail tweets, son.</h2>
<p>Everyone fucks up. The point of this post is <em>not</em> to rag on GCC. If writing a C compiler was easy then every asshole with a keyboard would write one for fun.</p>
<h2>WARNING: THERE IS MATH, SCIENCE, AND GRAPHS BELOW.</h2>
<p>Watch yourself.</p>
<h2>The original bug report for <code>-fomit-frame-pointer</code>.</h2>
<p>I stumbled across a <a href="http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44958">bug report for GCC</a> that was very interesting. It points out a very subtle bug that occurs when the <code>-fomit-frame-pointer</code> flag is passed to GCC. The bug report is for 32bit code, however after some testing I found that this bug <strong>also rears its head in 64bit code</strong>.</p>
<h2>What is <code>-fomit-frame-pointer</code> supposed to do?</h2>
<p>The <code>-fomit-frame-pointer</code> flag is intended to direct GCC to avoid saving and restoring the frame pointer (<code>%ebp</code> or <code>%rbp</code>). This is supposed to make function calls faster, since the function is doing less work each invocation. It should also make function code take fewer bytes since there are fewer instructions being executed.</p>
<p>A caveat of using <code>-fomit-frame-pointer</code> is that it <em>may</em> make <strong>debugging impossible</strong> on certain systems. To combat this on Linux, <code>.debug_frame</code> and <code>.eh_frame</code> sections are added to ELF binaries to assist in the stack unwinding process when the frame pointer is omitted.</p>
<h2>What is the bug?</h2>
<p>The bug is that when <code>-fomit-frame-pointer</code> is used, GCC erroneously uses the frame pointer register as a general purpose register <em>when a different register could be used instead</em>.</p>
<p><strong>wat.</strong></p>
<p>The amd64 and i386 ABIs<sup>1</sup> <sup>2</sup> specify a list of caller and callee saved registers.</p>
<ul>
<li>The frame pointer register is callee saved. That means that if a function is going to use the frame pointer register, it must save and restore the value in the register.</li>
<li>The test case provided in the bug report shows that other <em>caller</em> saved registers were available for use.</li>
<li>Had the function used a caller saved register instead, there would be <em>no need</em> for the additional save and restore instructions in the function.</li>
<li>Removing those instructions would take fewer bytes and execute faster.</li>
</ul>
<h2>What are the consequences?</h2>
<p>Let&#8217;s take a look at two potential pieces of code.</p>
<p>The first piece is the code that would be generated if <code>-fomit-frame-pointer</code> <strong>is not used</strong>:</p>
<pre class="prettyprint">test1:
        pushq %rbp       ; save frame pointer
        movq %rsp,%rbp   ; update frame pointer to the current stack pointer
           ; here is where your function would do work
        leave            ; restore the stack pointer and frame pointer
        ret              ; return</pre>
<p><strong>Size: 6 bytes</strong>.</p>
<p>The above assembly sequence uses the frame pointer.</p>
<p>Let&#8217;s take a look at the code that is generated by GCC when <code>-fomit-frame-pointer</code> is used:</p>
<pre class="prettyprint">        sub $0x8, %rsp    ; make room on the stack
        movq %rbp, (%rsp) ; store rbp on the stack
          ; here is where your function would modify and use %rbp as needed
        movq (%rsp), %rbp ; restore %rbp
        add $0x8, %rsp    ; get rid of the extra stack space
        ret               ; return</pre>
<p><strong>Size: 17 bytes</strong>.</p>
<p>The above assembly sequence is what is generated when GCC decides to use the frame pointer register as a general purpose register. Since it is callee saved, it must be saved before being modified and restored after being modified.</p>
<h2>So <code>-fomit-frame-pointer</code> makes your binary fatter, but does it make it slower?</h2>
<p>Only one way to find out: <strong>do science.</strong></p>
<p>I built a simple (and very ugly) testing harness to test the above pieces of code to determine which piece of code is faster. Before we get into the benchmark results, I want to tell you why my benchmark is <em>bullshit</em>.</p>
<p>Yes, <em>bullshit</em>.</p>
<p>You see, it makes me sad when people post benchmarks and neglect to tell others why their benchmark may be inaccurate. So, lemme start the trend.</p>
<p>This benchmark is useless because:</p>
<ul>
<li>Reading the CPU cycle counter is unreliable (more on this below the conclusion). I also tracked wall clock time, too.</li>
<li>I don&#8217;t have the ideal test environment. I ran this on bare metal hardware, and set the CPU affinity to keep the process pinned to a single CPU&#8230; <strong>BUT</strong></li>
<li><strong>I could have done better</strong> if I had pinned <code>init</code> to CPU0 (thereby forcing all children of init to be pinned to CPU0 &#8211; <strong>remember child processes inherit the affinity mask</strong>). I would have then had an entire CPU for nothing but my benchmark.</li>
<li><strong>I could have done better</strong> if I forced the CPU running my benchmark program to not handle any IRQs.</li>
<li><b>I only tested one version of GCC</b>: (Debian 4.3.2-1.1) 4.3.2</li>
<li><strong>I could have</strong> taken more samples.</li>
</ul>
<p>You can find more testing harness tidbits below the conclusion.</p>
<h2>Benchmark Results</h2>
<p>
<b>test 1</b> &#8212; Code sequence simulating using the  frame pointer.<br />
<b>test 2</b> &#8212; Code sequence simulating using the frame pointer as a general purpose register.
</p>
<h2>64bit results</h2>
<p><b><u>Using <code>-fomit-frame-pointer</code> is SLOWER (contrary to what you&#8217;d expect) than not using it!</u></b></p>
<table border="1" bordercolor="#000000" style="background-color:#ffffff" width="600" cellpadding="1" cellspacing="0">
<tr>
<td></td>
<td>cycles test 1</td>
<td>cycles test 2</td>
<td>microsecs test 1</td>
<td>microsecs test 2</td>
</tr>
<tr>
<td>mean</td>
<td>3514422987.92</td>
<td>4559685515.66</td>
<td>1882707.27</td>
<td>2442663.94</td>
</tr>
<tr>
<td>median</td>
<td>3507007423.5</td>
<td>4562511684.5</td>
<td>1878721.5</td>
<td>2444171.5</td>
</tr>
<tr>
<td>max</td>
<td>3922780211</td>
<td>4672066854</td>
<td>2101457</td>
<td>2502869</td>
</tr>
<tr>
<td>min</td>
<td>3502194976</td>
<td>4327782795</td>
<td>1876113</td>
<td>2318452</td>
</tr>
<tr>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>std dev</td>
<td>31927179.5632</td>
<td>15449507.8196</td>
<td>17103.7755</td>
<td>8275.49788</td>
</tr>
<tr>
<td>variance</td>
<td>1.02E+15</td>
<td>238687291867021</td>
<td>292539135.936</td>
<td>68483865.11835</td>
</tr>
</table>
<p></p>
<p>
<img src="http://timetobleed.com/images/64bit_cycles.png" alt="" />
</p>
<p>
<br />
<img src="http://timetobleed.com/images/64bit_microsecs.png" alt="" />
</p>
<p></p>
<h2>32bit results</h2>
<p><b><u>Using <code>-fomit-frame-pointer</code> is FASTER (as it should be) than not using it! The binary is still fatter, though.</u></b></p>
<table border="1" bordercolor="#000000" style="background-color:#ffffff" width="600" cellpadding="1" cellspacing="0">
<tr>
<td></td>
<td>cycles test 1</td>
<td>cycles test 2</td>
<td>microsecs test 1</td>
<td>microsecs test 2</td>
</tr>
<tr>
<td>mean</td>
<td>3502932799.49</td>
<td>3491263364.89</td>
<td>1876553.08</td>
<td>1870301.35</td>
</tr>
<tr>
<td>median</td>
<td>3501486586.5 </td>
<td>3492013955.5</td>
<td>1875778</td>
<td>1870702.5</td>
</tr>
<tr>
<td>max</td>
<td>3905163528</td>
<td>3731985243</td>
<td>2092032</td>
<td>1999259</td>
</tr>
<tr>
<td>min</td>
<td>3500916510</td>
<td>3408834436</td>
<td>1875472</td>
<td>1826144</td>
</tr>
<tr>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>std dev</td>
<td>10066939.1113</td>
<td>7992367.6913</td>
<td>5393.0412</td>
<td>4281.5466</td>
</tr>
<tr>
<td>variance</td>
<td>101343263071403</td>
<td>63877941312996.4</td>
<td>29084893.2588</td>
<td>18331640.9459</td>
</tr>
</table>
<p></p>
<p>
<img src="http://timetobleed.com/images/32bit_cycles.png" alt="" />
</p>
<p>
<br />
<img src="http://timetobleed.com/images/32bit_microsecs.png" alt="" />
</p>
<h2>Conclusion</h2>
<ul>
<li>GCC is a really complex piece of software; this bug is very subtle and may have existed for a while.</li>
<li>I&#8217;ve said this a few times, but knowing and understanding your system&#8217;s ABI is crucial for catching bugs like these.</li>
<li>Math and science are cool now, much like computers. You should use both.</li>
</ul>
<p>
Thanks for reading and don&#8217;t forget to <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<h2>Testing harness tidbits</h2>
<p>Each <strong>run</strong> of the benchmark executes either <code>test1</code> or <code>test2</code> (from above) 500,000,000 times. I did around 2500 runs for each test function.<br />
</p>
<p>
You can get the testing harness, a build script, and a test script here: <a href="http://gist.github.com/483524">http://gist.github.com/483524</a>
</p>
<p>You can look at the terminal session where I build the test from the original bug report on my system: <a href="http://gist.github.com/483494">http://gist.github.com/483494</a>
</p>
<p>
The code I used to read the CPU cycle counter looks like this:</p>
<pre class="prettyprint">static __inline__ unsigned long long rdtsc(void)
{
  unsigned long hi = 0, lo = 0;
  __asm__ __volatile__ ("lfence\n\trdtsc" : "=a"(lo), "=d"(hi));
  return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}</pre>
</p>
<p>
The <code>lfence</code> instruction is a serializing instruction that ensures that all load instructions which were issued before the <code>lfence</code> instruction have been executed before proceeding. I did this to make sure that the cycle counter was being read after all operations in the test functions were executed.<br />
<br />
The values returned by this function are misleading because CPU frequency may be scaled at any time. This is why I also measured wall clock time.<br />
</p>
<h2>References</h2>
<ol class="footnotes"><li id="footnote_0_1909" class="footnote"><a href="http://www.sco.com/developers/devspecs/abi386-4.pdf">http://www.sco.com/developers/devspecs/abi386-4.pdf</a></li><li id="footnote_1_1909" class="footnote"><a href="http://www.x86-64.org/documentation/abi.pdf ">http://www.x86-64.org/documentation/abi.pdf </a></li></ol><div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=8SEVsvwhbb8:bHa8VJv8XRg:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=8SEVsvwhbb8:bHa8VJv8XRg:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=8SEVsvwhbb8:bHa8VJv8XRg:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=8SEVsvwhbb8:bHa8VJv8XRg:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=8SEVsvwhbb8:bHa8VJv8XRg:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=8SEVsvwhbb8:bHa8VJv8XRg:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=8SEVsvwhbb8:bHa8VJv8XRg:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=8SEVsvwhbb8:bHa8VJv8XRg:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/8SEVsvwhbb8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/gcc-optimization-flag-makes-your-64bit-binary-fatter-and-slower/feed/</wfw:commentRss>
		<slash:comments>6</slash:comments>
		</item>
		<item>
		<title>Garbage Collection and the Ruby Heap (from railsconf)</title>
		<link>http://timetobleed.com/garbage-collection-and-the-ruby-heap-from-railsconf/</link>
		<comments>http://timetobleed.com/garbage-collection-and-the-ruby-heap-from-railsconf/#comments</comments>
		<pubDate>Tue, 08 Jun 2010 16:38:20 +0000</pubDate>
		<dc:creator>Joe Damato</dc:creator>
				<category><![CDATA[debugging]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[scaling]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[x86]]></category>
		<category><![CDATA[debug]]></category>
		<category><![CDATA[garbage collection]]></category>
		<category><![CDATA[GC]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[ltrace]]></category>
		<category><![CDATA[memory]]></category>
		<category><![CDATA[performance]]></category>
		<category><![CDATA[profiling]]></category>
		<category><![CDATA[x86_64]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1787</guid>
		<description><![CDATA[Download as PDF (15mb) Garbage Collection and the Ruby Heap]]></description>
			<content:encoded><![CDATA[<p><a style="float:right" href="http://dl.dropbox.com/u/1681973/gc-railsconf.pdf">Download as PDF (15mb)</a><br />
<a title="View Garbage Collection and the Ruby Heap on Scribd" href="http://www.scribd.com/doc/32718051/Garbage-Collection-and-the-Ruby-Heap" style="margin: 12px auto 6px auto; font-family: Helvetica,Arial,Sans-serif; font-style: normal; font-variant: normal; font-weight: normal; font-size: 14px; line-height: normal; font-size-adjust: none; font-stretch: normal; -x-system-font: none; display: block; text-decoration: underline;">Garbage Collection and the Ruby Heap</a> <object id="doc_179903367382288" name="doc_179903367382288" height="600" width="100%" type="application/x-shockwave-flash" data="http://d1.scribdassets.com/ScribdViewer.swf" style="outline:none;" ><param name="movie" value="http://d1.scribdassets.com/ScribdViewer.swf"><param name="wmode" value="opaque"><param name="bgcolor" value="#ffffff"><param name="allowFullScreen" value="true"><param name="allowScriptAccess" value="always"><param name="FlashVars" value="document_id=32718051&#038;access_key=key-1hl4d18vocqmc9ilk9a&#038;page=1&#038;viewMode=slideshow"><embed id="doc_179903367382288" name="doc_179903367382288" src="http://d1.scribdassets.com/ScribdViewer.swf?document_id=32718051&#038;access_key=key-1hl4d18vocqmc9ilk9a&#038;page=1&#038;viewMode=slideshow" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" height="600" width="100%" wmode="opaque" bgcolor="#ffffff"></embed></object></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=atXJm3rGBAo:Vkv0cT8Mvvs:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=atXJm3rGBAo:Vkv0cT8Mvvs:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=atXJm3rGBAo:Vkv0cT8Mvvs:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=atXJm3rGBAo:Vkv0cT8Mvvs:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=atXJm3rGBAo:Vkv0cT8Mvvs:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=atXJm3rGBAo:Vkv0cT8Mvvs:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=atXJm3rGBAo:Vkv0cT8Mvvs:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=atXJm3rGBAo:Vkv0cT8Mvvs:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/atXJm3rGBAo" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/garbage-collection-and-the-ruby-heap-from-railsconf/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Dynamic symbol table duel: ELF vs Mach-O, round 2</title>
		<link>http://timetobleed.com/dynamic-symbol-table-duel-elf-vs-mach-o-round-2/</link>
		<comments>http://timetobleed.com/dynamic-symbol-table-duel-elf-vs-mach-o-round-2/#comments</comments>
		<pubDate>Tue, 01 Jun 2010 12:59:46 +0000</pubDate>
		<dc:creator>Joe Damato</dc:creator>
				<category><![CDATA[linux]]></category>
		<category><![CDATA[osx]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[x86]]></category>
		<category><![CDATA[debug]]></category>
		<category><![CDATA[elf]]></category>
		<category><![CDATA[mach-o]]></category>
		<category><![CDATA[x86_64]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1668</guid>
		<description><![CDATA[If you enjoy this article, subscribe (via RSS or e-mail) and follow me on twitter. The intention of this post is to continue highlighting some of the similarities and differences between ELF and Mach-O that I encountered while building memprof. The previous post in this series can be found here. What is a symbol table? [...]]]></description>
			<content:encoded><![CDATA[<p><center><img src="http://timetobleed.com/images/duel.jpg" alt="" width="300" height="400" /></center><br />
If you enjoy this article, <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<p>The intention of this post is to continue highlighting <b>some</b> of the similarities and differences between <code>ELF</code> and <code>Mach-O</code> that I encountered while building <a href="http://github.com/ice799/memprof">memprof</a>. The previous post in this series can be found <a href="http://timetobleed.com/dynamic-linking-elf-vs-mach-o/">here</a>.</p>
<h2>What is a symbol table?</h2>
<p>A <b>symbol table</b> is simply a list of names  in an object. The names in the list may be names of functions, initialized/uninitialized memory regions, or other things depending on the object format. The <b>symbol table</b> does <b>not</b> need to be mapped into a running process and is only useful for debugging. The <b>symbol table</b> (and other sections) may be removed from an object when you use <code>strip</code>.</p>
<h2>Symbol tables in <code>ELF</code> objects</h2>
<p>An entry in the symbol table in an <b>ELF</b> object can best be described by the following <code>struct</code> from <code>/usr/include/elf.h</code>:</p>
<pre class="prettyprint">
typedef struct
{
  Elf64_Word    st_name;                /* Symbol name (string tbl index) */
  unsigned char st_info;                /* Symbol type and binding */
  unsigned char st_other;               /* Symbol visibility */
  Elf64_Section st_shndx;               /* Section index */
  Elf64_Addr    st_value;               /* Symbol value */
  Elf64_Xword   st_size;                /* Symbol size */
} Elf64_Sym;
</pre>
<p></p>
<p>In most cases, this structure is used to find the mapping from a symbol name to the address where it lives. Although, different symbol types (specified by <code>st_info</code>) provide mappings from symbols to other data.</p>
<p>The <code>st_name</code> field is an index into a section called <code>strtab</code> which is just a table of strings.</p>
<h2>Symbol tables in <code>Mach-O</code> objects</h2>
<p>Let&#8217;s take a look at the <code>struct</code> for a symbol table entry in a <b>Mach-O</b> object from <code>/usr/include/mach-o/nlist.h</code>:</p>
<pre class = "prettyprint">
struct nlist_64 {
    union {
        uint32_t  n_strx; /* index into the string table */
    } n_un;
    uint8_t n_type;        /* type flag */
    uint8_t n_sect;        /* section number or NO_SECT */
    uint16_t n_desc;       /* see <mach-o/stab.h> */
    uint64_t n_value;      /* value of this symbol (or stab offset) */
};
</pre>
<p></p>
<p>It looks very similar. The immediately noticeable difference with <code>ELF</code>:
<ul>
<li><b>lack of <code>size</code> field</b> &#8211; The only noticeable difference on your first glance is the lack of a size field. The size field in <b>ELF</b> objects describes the number of bytes occupied by the symbol. This is actually pretty useful, especially for <a href="http://github.com/ice799/memprof">memprof</a>. The <i>lack</i> of this field in <b>Mach-O</b> was a source of frustration for <a href="http://twitter.com/jakedouglas">Jake</a> when he was implementing Mach-O support.
</ul>
</p>
<h2>What is a <i>dynamic</i> symbol table?</h2>
<p>Shared objects in both <code>Mach-O</code> and <code>ELF</code> have a symbol table listing <i>only</i> functions that are exporteed by the object.</p>
<p>This table is used during dynamic linking and is mapped into the process&#8217; address space when the object is loaded, unlike the symbol table which is just used for debugging.</p>
<p>The <b>dynamic symbol table</b> is a <i>subset</i> of the <b>symbol table</b>. </p>
<h2>Dynamic symbol table in ELF objects</h2>
<p>The dynamic symbol table in ELF objects is stored in a section named <code>dynsym</code>. The indexes stored in the <code>st_name</code> field (from the structure listed above) are indexes into the string table in a section named <code>dynstr</code>. <code>dynstr</code> is a string table specifically for entries in the dynamic symbol table.</p>
<p>If you know the symbol you care about, you can simply calculate a hash of the symbol name to find the symbol table entry for that symbol. Unfortunately, there is not very much documentation about the hash function that is to be used.</p>
<p>Your two options are:
<ul>
<li>You&#8217;ll need to either read the source for <a href="http://www.gnu.org/software/binutils/">binutils</a>,</li>
<li>check out a useful post on a <a href="http://sourceware.org/ml/binutils/2006-10/msg00377.html">mailing list</a>. </li>
</ul>
<p>The sections storing the hash table data for an object are called <code>.hash</code> and <code>.gnu.hash</code>.</p>
<h2>Dynamic symbol table in Mach-O objects</h2>
<p>Finding the dynamic symbol table in a Mach-O object is a bit complicated. The pieces to the puzzle are found across different structures and the documentation on how it all works is sparse.</p>
<p><code>Mach-O</code> objects have a load command called <code>LC_DYSYMTAB</code> which describes information about the dynamic symbol table in <code>Mach-O</code> objects.</p>
<p>I&#8217;ve shortened the structure definition, as it is quite large and contains documentation about stuff that is not directly relevant to this post. From <code>/usr/include/mach-o/loader.h</code>:</p>
<pre class="prettyprint">
struct dysymtab_command {
    uint32_t cmd; /* LC_DYSYMTAB */
    uint32_t cmdsize; /* sizeof(struct dysymtab_command) */

    /* .... */

    /*
     * The sections that contain "symbol pointers" and "routine stubs" have
     * indexes and (implied counts based on the size of the section and fixed
     * size of the entry) into the "indirect symbol" table for each pointer
     * and stub.  For every section of these two types the index into the
     * indirect symbol table is stored in the section header in the field
     * reserved1.  An indirect symbol table entry is simply a 32bit index into
     * the symbol table to the symbol that the pointer or stub is referring to.
     * The indirect symbol table is ordered to match the entries in the section.
     */
    uint32_t indirectsymoff; /* file offset to the indirect symbol table */
    uint32_t nindirectsyms;  /* number of indirect symbol table entries */

    /* .... */
};
</pre>
<p></p>
<p>The <code>LC_DYSYMTAB</code> load command provides the fields <code>indirectsymoff</code> and <code>nindirectsyms</code> which describe the offset into the file where the indirect symbol tables lives and the number of entries in the table, respectively.</p>
<p>The dynamic symbol table in <code>Mach-O</code> is surprisingly simple. Each entry in the table is just a 32bit index into the symbol table. The dynamic symbol table is just a list of indexes and nothing else. </p>
<p>It turns out there are a few more pieces to the puzzle.</p>
<p>Take a look at the definition for a <code>Mach-O</code> section:</p>
<pre class="prettyprint">
struct section_64 { /* for 64-bit architectures */
  char    sectname[16]; /* name of this section */
  char    segname[16];  /* segment this section goes in */
  uint64_t  addr;   /* memory address of this section */
  uint64_t  size;   /* size in bytes of this section */
  uint32_t  offset;   /* file offset of this section */
  uint32_t  align;    /* section alignment (power of 2) */
  uint32_t  reloff;   /* file offset of relocation entries */
  uint32_t  nreloc;   /* number of relocation entries */
  uint32_t  flags;    /* flags (section type and attributes)*/
  uint32_t  reserved1;  /* reserved (for offset or index) */
  uint32_t  reserved2;  /* reserved (for count or sizeof) */
  uint32_t  reserved3;  /* reserved */
};
</pre>
</p>
<p>It turns out that the fields <code>reserved1</code> and <code>reserved2</code> are useful too.</p>
<p>If a section_64 structure is describing a <code>symbol_stub</code> or <code>__la_symbol_ptr</code> sections (read the <a href="http://timetobleed.com/dynamic-linking-elf-vs-mach-o/">previous post</a> to learn about these sections), then the <code>reserved1</code> field hold the <i>index into the dynamic symbol table</i> for the sections entries in the table.</p>
<p><code>symbol_stub</code> sections also make use of the <code>reserved2</code> field; the size of a single stub entry is stored in <code>reserved2</code> otherwise, the field is set to 0.</p>
<h2>Two notable differences between the dynamic symbol tables</h2>
<ul>
<li>There is an explicit section in <code>ELF</code> that contains <code>Elf64_Sym</code> entries. On <code>Mach-O</code> it&#8217;s just a list of 32bit offsets.</li>
<li><code>ELF</code> provides a <code>.hash</code> section and/or <code>.gnu_hash</code> section to speed up symbol lookup. <code>Mach-O</code> does not.</li>
</ul>
<h2>What happens when you run <code>strip</code>?</h2>
<p>Let&#8217;s use <code>strip</code> with no options (other than the filename).</p>
<p>On <code>ELF</code>:</p>
<ul>
<li>All <code>.debug_*</code> sections are removed. These sections contain extra debugging information that helps debuggers figure out more precisely what went wrong.</li>
<li><code>.symtab</code> section is removed.</li>
<li><code>.strtab</code> section is removed.</li>
</ul>
<p>On <code>Mach-O</code>:</p>
<ul>
<li>Only undefined symbols and dynamic symbols are left in the symbol table. Everything else is removed.</li>
</ul>
<h2>How to <code>strip</code> so I can debug later (linux only)</h2>
<p>If you decide to <code>strip</code> your binary please be considerate to future hackers who may need to debug your app for some reason.</p>
<p>You can be considerate by following the directions in <code>strip(1)</code>:</p>
<blockquote><p>
           1. Link the executable as normal.  Assuming that is is called<br />
               &#8220;foo&#8221; then&#8230;</p>
<p>           2. Run &#8220;objcopy &#8211;only-keep-debug foo foo.dbg&#8221; to<br />
               create a file containing the debugging info.</p>
<p>           3. Run &#8220;objcopy &#8211;strip-debug foo&#8221; to create a<br />
               stripped executable.</p>
<p>           4. Run &#8220;objcopy &#8211;add-gnu-debuglink=foo.dbg foo&#8221;<br />
               to add a link to the debugging info into the stripped executable.
</p></blockquote>
<p>And don&#8217;t forget to put your debugging information somewhere easily accessible and googleable.</p>
<p>If you do this: <b>you are cool</b>. If you don&#8217;t&#8230;</p>
<h2>Conclusion</h2>
<ol>
<li>I like the way ELF does dynamic symbol tables, the <code>gnu_debuglink</code> section, and the lookup hash table for dynamic symbols. All of these pieces are really useful and I am glad they exist.</li>
<li>The indirect symbol table was a bit of a pain to track down on <code>Mach-O</code> as the information is hard to parse on the first pass. To be fair, it is all there if you google around a bit and put the pieces together.</li>
<li>On Linux, if you strip, please add a <code>gnu_debuglink</code> section and put the debug information somewhere I can find it.</li>
</ol>
<p>
Thanks for reading and don&#8217;t forget to <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<h2>References</h2>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xnbhIfjuGyc:ig_GXHHTQjo:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xnbhIfjuGyc:ig_GXHHTQjo:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xnbhIfjuGyc:ig_GXHHTQjo:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=xnbhIfjuGyc:ig_GXHHTQjo:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xnbhIfjuGyc:ig_GXHHTQjo:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=xnbhIfjuGyc:ig_GXHHTQjo:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xnbhIfjuGyc:ig_GXHHTQjo:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=xnbhIfjuGyc:ig_GXHHTQjo:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/xnbhIfjuGyc" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/dynamic-symbol-table-duel-elf-vs-mach-o-round-2/feed/</wfw:commentRss>
		<slash:comments>27</slash:comments>
		</item>
		<item>
		<title>WARNING: American Express fails miserably at basic security.</title>
		<link>http://timetobleed.com/warning-american-express-fails-miserably-at-basic-security/</link>
		<comments>http://timetobleed.com/warning-american-express-fails-miserably-at-basic-security/#comments</comments>
		<pubDate>Tue, 25 May 2010 18:54:55 +0000</pubDate>
		<dc:creator>Joe Damato</dc:creator>
				<category><![CDATA[security]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[vulnerability]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1711</guid>
		<description><![CDATA[If you enjoy this article, subscribe (via RSS or e-mail) and follow me on twitter. As of 3:35pm PST on 5/25/2010 it seems to be fixed. wireshark shows only TLS traffic now, nothing in the clear. Pretty quick fix, since this was published at 11:54am. Good deal. This article is going to reveal a pretty [...]]]></description>
			<content:encoded><![CDATA[<p><center><img src="http://timetobleed.com/images/americanexpress.jpg" alt="" width="400" height="300" /></center><br />
If you enjoy this article, <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<h2>As of 3:35pm PST on 5/25/2010 it seems to be fixed. wireshark shows only TLS traffic now, nothing in the clear. Pretty quick fix, since this was published at 11:54am. Good deal.</h2>
<p>
<b>This article is going to reveal a pretty serious error in a web form on the American Express Network website. <u>I would strongly recommend NOT filling out the web form described below</u>.</b></p>
<h2>Daily Wish from the American Express Network</h2>
<p><a href="http://dailywish.amexnetwork.com">Daily wish</a> from the <a href="http://www1.amexnetwork.com/?issuerName=us_amexnetworkdefault">American Express Network</a> sent me an email this morning trying to get me to sign up for their deal of the day service where they offer a very limited quantity of products for a low price.</p>
<p>Sounds simple enough, right?</p>
<p>Well, the time of the sale is not released until the day the sale occurs, <b>unless</b> you are an American Express cardholder. If you are a card holder, you get a special landing page on their website telling you that if you sign up, you can get the sale times before the sale date.</p>
<p>The white arrow below points to the tab that only appears if you clicked through from an email from American Express. The red arrow below points to the sign up button. Take a look:<br />
<br />
<img src="http://timetobleed.com/images/amexlanding2.png" width="650"/>
</p>
<h2>Sign up page</h2>
<p>After clicking the sign up button (red arrow above), a lightbox appears asking for:</p>
<ul>
<li><b>First and last name</b></li>
<li><b>American Express credit card number</b></li>
<li><b>Security code</b></li>
<li><b>Expiration date</b></li>
<li><b>Billing zip</b></li>
</ul>
<p>Quite a bit of personal information, much of it sensitive. [sarcarsm]<b>Don&#8217;t worry the page is secure</b>[/sarcasm], see the form and the white arrow below:<br />
<br />
<img src="http://timetobleed.com/images/amexsignup.png">
</p>
<h2>The code from the form</h2>
<p>This form looked very suspicious to me, so I decided to take a look at the code to see if the <code>action</code> for this sign up form was over <code>HTTPS</code>. Check it:</p>
<pre>
&lt;form name="form1" method="post" action="preid2.aspx?ct=7" onsubmit="javascript:return WebForm_OnSubmit();" id="form1"&gt;
</pre>
<p>
<p>So the action is to a handler at <code>http://dailywish.amexnetwork.com/preid2.aspx?ct=7</code>. <u><b>The lack of <code>https</code> doesn&#8217;t make me feel very good.</b></u></p>
<p>Maybe the <code>WebForm_OnSubmit()</code> function is doing something that might make this secure?  Let&#8217;s take a look:<br />
</p>
<pre>
&lt;script type="text/javascript">
//&lt;![CDATA[
function WebForm_OnSubmit() {
if (typeof(ValidatorOnSubmit) == "function" &#038;&#038; ValidatorOnSubmit() == false) return false;
return true;
}
//]]&gt;
&lt;/script&gt;
</pre>
<p>
<br />
So it looks like that function is just a validator. It is really starting to feel like this form is insecure.
</p>
<p>Let&#8217;s bring out wireshark and see what it has to say.</p>
<h2>Wireshark packet sniff</h2>
<p>So I filled out the form with <u>fake information</u> and sniffed the <code>POST</code> to the server.</p>
<p><b>The Daily Wish sign up form from the American Express Network is sending credit card numbers, expiration dates, and all the other personal information on the sign up form <u><i>in the clear</i></u> back to their server.</b></p>
<p><i>Holy. Fuck.</i></p>
<p>
<img src="http://timetobleed.com/images/amexfail.png">
</p>
<h2>Conclusion</h2>
<ul>
<li><b>Do NOT fill out the form until American Express fixes this issue.</b></li>
</ul>
<p>Thanks for reading and don&#8217;t forget to <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=BMBoA9Ou5TI:pf4-ZD9IMHc:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=BMBoA9Ou5TI:pf4-ZD9IMHc:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=BMBoA9Ou5TI:pf4-ZD9IMHc:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=BMBoA9Ou5TI:pf4-ZD9IMHc:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=BMBoA9Ou5TI:pf4-ZD9IMHc:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=BMBoA9Ou5TI:pf4-ZD9IMHc:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=BMBoA9Ou5TI:pf4-ZD9IMHc:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=BMBoA9Ou5TI:pf4-ZD9IMHc:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/BMBoA9Ou5TI" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/warning-american-express-fails-miserably-at-basic-security/feed/</wfw:commentRss>
		<slash:comments>31</slash:comments>
		</item>
		<item>
		<title>Dynamic Linking: ELF vs. Mach-O</title>
		<link>http://timetobleed.com/dynamic-linking-elf-vs-mach-o/</link>
		<comments>http://timetobleed.com/dynamic-linking-elf-vs-mach-o/#comments</comments>
		<pubDate>Wed, 12 May 2010 14:00:09 +0000</pubDate>
		<dc:creator>Joe Damato</dc:creator>
				<category><![CDATA[debugging]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[osx]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[x86]]></category>
		<category><![CDATA[dynamic linking]]></category>
		<category><![CDATA[elf]]></category>
		<category><![CDATA[mach-o]]></category>
		<category><![CDATA[x86_64]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1613</guid>
		<description><![CDATA[If you enjoy this article, subscribe (via RSS or e-mail) and follow me on twitter. The intention of this post is to highlight some of the similarities and differences between ELF and Mach-O dynamic linking that I encountered while building memprof. I hope to write more posts about similarities and differences in other aspects of [...]]]></description>
			<content:encoded><![CDATA[<p><center><img src="http://timetobleed.com/images/linking.jpg" alt="" width="400" height="300" /></center><br />
If you enjoy this article, <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<p>The intention of this post is to highlight <b>some</b> of the similarities and differences between <code>ELF</code> and <code>Mach-O</code> dynamic linking that I encountered while building <a href="http://github.com/ice799/memprof">memprof</a>.</p>
<p> I hope to write <b>more posts about similarities and differences in other aspects of Mach-O and ELF</b> that I stumbled across to shed some light on what goes on down there and provide (in some cases) the only documentation.</p>
<h2>Procedure Linkage Table</h2>
<p>The procedure linkage table (PLT) is used to determine the absolute address of a function at runtime. Both Mach-O and ELF objects have PLTs that are generated at compile time. The initial table simply invokes the dynamic linker which finds the symbol you want. The way this works is very similar at a high level in ELF and Mach-O, but there are some implementation differences that I thought were worth mentioning.</p>
<h2>Mach-O PLT arrangement</h2>
<p>Mach-O objects have several different sections across different <i>segments</i> that are all involved to create a PLT entry for a specific symbol.</p>
<p>Consider the following assembly stub which calls out to the PLT entry for <code>malloc</code>:</p>
<pre class="prettyprint">
# MACH-O calling a PLT entry (ELF is nearly identical)
0x000000010008c504 [str_new+52]:	callq  0x10009ebbc [dyld_stub_malloc]
</pre>
<p>
<p>The <code>dyld_stub</code> prefix is added by GDB to let the user know that the <code>callq</code> instruction is calling a PLT entry and not <code>malloc</code> itself. The address <code>0x10009ebbc</code> is the first instruction of <code>malloc</code>&#8216;s PLT entry in this Mach-O object. In Mach-O terminology, the instruction at <code>0x10009ebbc</code> is called a <b>symbol stub</b>. Symbol stubs in Mach-O objects are found in the <code>__TEXT</code> segment in the <code>__symbol_stub1</code> section.</p>
<p>Let&#8217;s examine some instructions at the symbol stub address above:</p>
<pre class="prettyprint">
# MACH-O "symbol stubs" for malloc and other functions
0x10009ebbc [dyld_stub_malloc]:	  jmpq   *0x3ae46(%rip)        # 0x1000d9a08
0x10009ebc2 [dyld_stub_realloc]:  jmpq   *0x3ae48(%rip)        # 0x1000d9a10
0x10009ebc8 [dyld_stub_seekdir$INODE64]:	jmpq   *0x3ae4c(%rip)  # 0x1000d9a20
. . . .
</pre>
<p></p>
<p>Each Mach-O <b>symbol stub</b> is just a single <code>jmpq</code> instruction. That <code>jmpq</code> instruction either:</p>
<ul>
<li>Invokes the dynamic linker to find the symbol and transfer execution there</li>
<p><b><u>OR</u></b></p>
<li>Transfers execution directly to the function.</li>
</ul>
<p><i>via</i> an entry in a table. </p>
<p>In the example above, GDB is telling us that the address of the table entry for <code>malloc</code> is <code>0x1000d9a08</code>. This table entry is stored in a section called the <code>__la_symbol_ptr</code> within the <code>__DATA</code> segment.</p>
<p>Before malloc has been resolved, the address in that table entry points to a helper function which (eventually) invokes the dynamic linker to find <code>malloc</code> and fill in its address in the table entry.</p>
<p>Let&#8217;s take a look at what a few entries of the helper functions look like:</p>
<pre class="prettyprint">
# MACH-O stub helpers
0x1000a08d4 [stub helpers+6986]:	pushq  $0x3b73
0x1000a08d9 [stub helpers+6991]:	jmpq   0x10009ed8a [stub helpers]
0x1000a08de [stub helpers+6996]:	pushq  $0x3b88
0x1000a08e3 [stub helpers+7001]:	jmpq   0x10009ed8a [stub helpers]
0x1000a08e8 [stub helpers+7006]:	pushq  $0x3b9e
0x1000a08ed [stub helpers+7011]:	jmpq   0x10009ed8a [stub helpers]
. . . .
</pre>
</p>
<p>Each symbol that has a PLT entry has 2 instructions above; a pair of <code>pushq</code> and <code>jmpq</code>. This instruction sequence sets an ID for the desired function and then invokes the dynamic linker. The dynamic linker looks up this ID so it knows which function it should be looking for.</p>
<h2>ELF PLT arrangement</h2>
<p>ELF objects have the same mechanism, but organize each PLT entry into chunks instead of splicing them out across different sections. Let&#8217;s take a look at a PLT entry for malloc in an ELF object:</p>
<pre class="prettyprint">
# ELF complete PLT entry for malloc
0x40f3d0 [malloc@plt]:	jmpq   *0x2c91fa(%rip)        # 0x6d85d0
0x40f3d6 [malloc@plt+6]:	pushq  $0x2f
0x40f3db [malloc@plt+11]:	jmpq   0x40f0d0
. . . .
</pre>
<p></p>
<p>Much like a Mach-O object, an ELF object uses a table entry to direct the flow of execution to either invoke the dynamic linker or transfer directly to the desired function if it has already been resolved.</p>
<p>Two differences to point out here: </p>
<ol>
<li>ELF puts the entire PLT entry together in nicely named section called <code>plt</code> instead of splicing it out across multiple sections.</li>
<li>The table entries indirected through with the initial <code>jmpq</code> instruction are stored in a section named: <code>.got.plt</code>.</li>
</ol>
<h2>Both invoke an assembly trampoline&#8230;</h2>
<p>Both Mach-O and ELF objects are set up to invoke the runtime dynamic linker. Both need an assembly trampoline to bridge the gap between the application and the linker. On 64bit Intel based systems, linkers in both systems must comply to the same Application Binary Interace (ABI).</p>
<p><b>Strangely enough</b>, the two linkers <b>have slightly different assembly trampolines even though they share the same calling convention<sup>1</sup>  <sup>2</sup>.</b></p>
<p>Both trampolines ensure that the program stack is 16-byte aligned to comply with the amd64 ABI&#8217;s calling convention. Both trampolines also take care to save the &#8220;general purpose&#8221; caller-saved registers prior to invoking the dynamic link, but it turns out that the trampoline in Linux <b>does not save or restore the SSE registers.</b> It turns out that this &#8220;shouldn&#8217;t&#8221; matter, so long as glibc takes care not to use any of those registers in the dynamic linker. OSX takes a more conservative approach and saves and restores the SSE registers before and after calling out the dynamic linker.</p>
<p>I&#8217;ve included a snippet from the two trampolines below and some comments so you can see the differences up close.</p>
<h2>Different trampolines for the same ABI</h2>
<p>The OSX trampoline:</p>
<pre class="prettyprint">
dyld_stub_binder:
  pushq   %rbp
  movq    %rsp,%rbp
  subq    $STACK_SIZE,%rsp  # at this point stack is 16-byte aligned because two meta-parameters where pushed
  movq    %rdi,RDI_SAVE(%rsp) # save registers that might be used as parameters
  movq    %rsi,RSI_SAVE(%rsp)
  movq    %rdx,RDX_SAVE(%rsp)
  movq    %rcx,RCX_SAVE(%rsp)
  movq    %r8,R8_SAVE(%rsp)
  movq    %r9,R9_SAVE(%rsp)
  movq    %rax,RAX_SAVE(%rsp)
  movdqa    %xmm0,XMMM0_SAVE(%rsp)
  movdqa    %xmm1,XMMM1_SAVE(%rsp)
  movdqa    %xmm2,XMMM2_SAVE(%rsp)
  movdqa    %xmm3,XMMM3_SAVE(%rsp)
  movdqa    %xmm4,XMMM4_SAVE(%rsp)
  movdqa    %xmm5,XMMM5_SAVE(%rsp)
  movdqa    %xmm6,XMMM6_SAVE(%rsp)
  movdqa    %xmm7,XMMM7_SAVE(%rsp)
  movq    MH_PARAM_BP(%rbp),%rdi  # call fastBindLazySymbol(loadercache, lazyinfo)
  movq    LP_PARAM_BP(%rbp),%rsi
  call    __Z21_dyld_fast_stub_entryPvl
</pre>
</p>
<p>The OSX trampoline saves all the caller saved registers <b>as well as</b> the the <code>%xmm0 - %xmm7</code> registers prior to invoking the dynamic linker with that last call instruction. These registers are all restored after the call instruction, but I left that out for the sake of brevity.</p>
<p>The Linux trampoline:</p>
<pre class="prettyprint">
  subq $56,%rsp
  cfi_adjust_cfa_offset(72) # Incorporate PLT
  movq %rax,(%rsp)  # Preserve registers otherwise clobbered.
  movq %rcx, 8(%rsp)
  movq %rdx, 16(%rsp)
  movq %rsi, 24(%rsp)
  movq %rdi, 32(%rsp)
  movq %r8, 40(%rsp)
  movq %r9, 48(%rsp)
  movq 64(%rsp), %rsi # Copy args pushed by PLT in register.
  movq %rsi, %r11   # Multiply by 24
  addq %r11, %rsi
  addq %r11, %rsi
  shlq $3, %rsi
  movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_offset
  call _dl_fixup    # Call resolver.
</pre>
</p>
<p>The Linux trampoline doesn&#8217;t touch the SSE registers because it assumes that the dynamic linker will not modify them thus avoiding a save and restore.</p>
<h2>Conclusion</h2>
<ul>
<li>Tracing program execution from call site to the dynamic linker is pretty interesting and there is a lot to learn along the way.</li>
<li>glibc not saving and restoring <code>%xmm0-%xmm7</code> kind of scares me, but there is a unit test included that disassembles the built ld.so searching it to make sure that those registers are never touched. It is still a bit frightening.</li>
<li>Stay tuned for more posts explaining other interesting similarities and differences between Mach-O and ELF coming soon.</li>
</ul>
<p>Thanks for reading and don&#8217;t forget to <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<h2>References</h2>
<ol class="footnotes"><li id="footnote_0_1613" class="footnote"><a href="http://developer.apple.com/mac/library/documentation/DeveloperTools/Conceptual/LowLevelABI/140-x86-64_Function_Calling_Conventions/x86_64.html#//apple_ref/doc/uid/TP40005035-SW1">http://developer.apple.com/mac/library/documentation/DeveloperTools/Conceptual/LowLevelABI/140-x86-64_Function_Calling_Conventions/x86_64.html#//apple_ref/doc/uid/TP40005035-SW1</a></li><li id="footnote_1_1613" class="footnote"><a href="http://www.x86-64.org/documentation/abi.pdf">http://www.x86-64.org/documentation/abi.pdf</a></li></ol><div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xGRcw9hXGS8:1XU9ANsSFEo:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xGRcw9hXGS8:1XU9ANsSFEo:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xGRcw9hXGS8:1XU9ANsSFEo:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=xGRcw9hXGS8:1XU9ANsSFEo:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xGRcw9hXGS8:1XU9ANsSFEo:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=xGRcw9hXGS8:1XU9ANsSFEo:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=xGRcw9hXGS8:1XU9ANsSFEo:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=xGRcw9hXGS8:1XU9ANsSFEo:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/xGRcw9hXGS8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/dynamic-linking-elf-vs-mach-o/feed/</wfw:commentRss>
		<slash:comments>28</slash:comments>
		</item>
		<item>
		<title>Descent into Darkness: Understanding your system’s binary interface is the only way out</title>
		<link>http://timetobleed.com/descent-into-darkness-understanding-your-systems-binary-interface-is-the-only-way-out/</link>
		<comments>http://timetobleed.com/descent-into-darkness-understanding-your-systems-binary-interface-is-the-only-way-out/#comments</comments>
		<pubDate>Mon, 15 Mar 2010 19:11:19 +0000</pubDate>
		<dc:creator>Joe Damato</dc:creator>
				<category><![CDATA[bugfix]]></category>
		<category><![CDATA[debugging]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[scaling]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[x86]]></category>
		<category><![CDATA[debug]]></category>
		<category><![CDATA[garbage collection]]></category>
		<category><![CDATA[GC]]></category>
		<category><![CDATA[memory]]></category>
		<category><![CDATA[performance]]></category>
		<category><![CDATA[syscall]]></category>
		<category><![CDATA[x86_64]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1602</guid>
		<description><![CDATA[Download as PDF (3mb) Descent into Darkness: Understanding your system&#8217;s binary interface is the only way out.]]></description>
			<content:encoded><![CDATA[<p><a style="float:right" href="http://dl.dropbox.com/u/1681973/abi.pdf">Download as PDF (3mb)</a><br />
<a title="View Descent into Darkness: Understanding your system's binary interface is the only way out. on Scribd" href="http://www.scribd.com/doc/28264000/Descent-into-Darkness-Understanding-your-system-s-binary-interface-is-the-only-way-out" style="margin: 12px auto 6px auto; font-family: Helvetica,Arial,Sans-serif; font-style: normal; font-variant: normal; font-weight: normal; font-size: 14px; line-height: normal; font-size-adjust: none; font-stretch: normal; -x-system-font: none; display: block; text-decoration: underline;">Descent into Darkness: Understanding your system&#8217;s binary interface is the only way out.</a> <object id="doc_50009547124029" name="doc_50009547124029" height="600" width="100%" type="application/x-shockwave-flash" data="http://d1.scribdassets.com/ScribdViewer.swf" style="outline:none;" ><param name="movie" value="http://d1.scribdassets.com/ScribdViewer.swf"><param name="wmode" value="opaque"><param name="bgcolor" value="#ffffff"><param name="allowFullScreen" value="true"><param name="allowScriptAccess" value="always"><param name="FlashVars" value="document_id=28264000&#038;access_key=key-nywmlzldrcxb47d7tv9&#038;page=1&#038;viewMode=slideshow"><embed id="doc_50009547124029" name="doc_50009547124029" src="http://d1.scribdassets.com/ScribdViewer.swf?document_id=28264000&#038;access_key=key-nywmlzldrcxb47d7tv9&#038;page=1&#038;viewMode=slideshow" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" height="600" width="100%" wmode="opaque" bgcolor="#ffffff"></embed></object>	</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=06sViW5znHU:S5urYIHYBq0:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=06sViW5znHU:S5urYIHYBq0:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=06sViW5znHU:S5urYIHYBq0:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=06sViW5znHU:S5urYIHYBq0:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=06sViW5znHU:S5urYIHYBq0:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=06sViW5znHU:S5urYIHYBq0:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=06sViW5znHU:S5urYIHYBq0:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=06sViW5znHU:S5urYIHYBq0:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/06sViW5znHU" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/descent-into-darkness-understanding-your-systems-binary-interface-is-the-only-way-out/feed/</wfw:commentRss>
		<slash:comments>7</slash:comments>
		</item>
		<item>
		<title>EventMachine: scalable non-blocking i/o in ruby</title>
		<link>http://timetobleed.com/eventmachine-scalable-non-blocking-io-in-ruby/</link>
		<comments>http://timetobleed.com/eventmachine-scalable-non-blocking-io-in-ruby/#comments</comments>
		<pubDate>Fri, 12 Mar 2010 20:07:39 +0000</pubDate>
		<dc:creator>Aman Gupta</dc:creator>
				<category><![CDATA[linux]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[scaling]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[x86]]></category>
		<category><![CDATA[performance]]></category>
		<category><![CDATA[x86_64]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1574</guid>
		<description><![CDATA[Download as PDF (40mb) EventMachine: scalable non-blocking i/o in ruby]]></description>
			<content:encoded><![CDATA[<p><a style="float:right" href="http://dl.dropbox.com/u/635/em_export.pdf">Download as PDF (40mb)</a><br />
<a title="View EventMachine: scalable non-blocking i/o in ruby on Scribd" href="http://www.scribd.com/doc/28253878/EventMachine-scalable-non-blocking-i-o-in-ruby" style="margin: 12px auto 6px auto; font-family: Helvetica,Arial,Sans-serif; font-style: normal; font-variant: normal; font-weight: normal; font-size: 14px; line-height: normal; font-size-adjust: none; font-stretch: normal; -x-system-font: none; display: block; text-decoration: underline;">EventMachine: scalable non-blocking i/o in ruby</a> <object id="doc_298923438833050" name="doc_298923438833050" height="600" width="100%" type="application/x-shockwave-flash" data="http://d1.scribdassets.com/ScribdViewer.swf" style="outline:none;" ><param name="movie" value="http://d1.scribdassets.com/ScribdViewer.swf"><param name="wmode" value="opaque"><param name="bgcolor" value="#ffffff"><param name="allowFullScreen" value="true"><param name="allowScriptAccess" value="always"><param name="FlashVars" value="document_id=28253878&#038;access_key=key-1rb2iijpl7bew7i1f04i&#038;page=1&#038;viewMode=slideshow"><embed id="doc_298923438833050" name="doc_298923438833050" src="http://d1.scribdassets.com/ScribdViewer.swf?document_id=28253878&#038;access_key=key-1rb2iijpl7bew7i1f04i&#038;page=1&#038;viewMode=slideshow" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" height="600" width="100%" wmode="opaque" bgcolor="#ffffff"></embed></object></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=1IR37wpxT30:xirEQ_0rW-s:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=1IR37wpxT30:xirEQ_0rW-s:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=1IR37wpxT30:xirEQ_0rW-s:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=1IR37wpxT30:xirEQ_0rW-s:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=1IR37wpxT30:xirEQ_0rW-s:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=1IR37wpxT30:xirEQ_0rW-s:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=1IR37wpxT30:xirEQ_0rW-s:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=1IR37wpxT30:xirEQ_0rW-s:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/1IR37wpxT30" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/eventmachine-scalable-non-blocking-io-in-ruby/feed/</wfw:commentRss>
		<slash:comments>7</slash:comments>
		</item>
		<item>
		<title>Garbage Collection Slides from LA Ruby Conference</title>
		<link>http://timetobleed.com/garbage-collection-slides-from-la-ruby-conference/</link>
		<comments>http://timetobleed.com/garbage-collection-slides-from-la-ruby-conference/#comments</comments>
		<pubDate>Sat, 20 Feb 2010 22:03:14 +0000</pubDate>
		<dc:creator>Aman Gupta</dc:creator>
				<category><![CDATA[bugfix]]></category>
		<category><![CDATA[debugging]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[debug]]></category>
		<category><![CDATA[garbage collection]]></category>
		<category><![CDATA[GC]]></category>
		<category><![CDATA[memory]]></category>
		<category><![CDATA[performance]]></category>
		<category><![CDATA[profiling]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1569</guid>
		<description><![CDATA[Garbage Collection and the Ruby Heap]]></description>
			<content:encoded><![CDATA[<p><a title="View Garbage Collection and the Ruby Heap on Scribd" href="http://www.scribd.com/doc/27174770/Garbage-Collection-and-the-Ruby-Heap" style="margin: 12px auto 6px auto; font-family: Helvetica,Arial,Sans-serif; font-style: normal; font-variant: normal; font-weight: normal; font-size: 14px; line-height: normal; font-size-adjust: none; font-stretch: normal; -x-system-font: none; display: block; text-decoration: underline;">Garbage Collection and the Ruby Heap</a> <object id="doc_629766057039419" name="doc_629766057039419" height="600" width="100%" type="application/x-shockwave-flash" data="http://d1.scribdassets.com/ScribdViewer.swf" style="outline:none;" ><param name="movie" value="http://d1.scribdassets.com/ScribdViewer.swf"><param name="wmode" value="opaque"><param name="bgcolor" value="#ffffff"><param name="allowFullScreen" value="true"><param name="allowScriptAccess" value="always"><param name="FlashVars" value="document_id=27174770&#038;access_key=key-2g5x6qhwa28yz3ia1hih&#038;page=1&#038;viewMode=slideshow"><embed id="doc_629766057039419" name="doc_629766057039419" src="http://d1.scribdassets.com/ScribdViewer.swf?document_id=27174770&#038;access_key=key-2g5x6qhwa28yz3ia1hih&#038;page=1&#038;viewMode=slideshow" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" height="600" width="100%" wmode="opaque" bgcolor="#ffffff"></embed></object>	</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=c0NqcU8bidQ:byhEvlFBv8o:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=c0NqcU8bidQ:byhEvlFBv8o:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=c0NqcU8bidQ:byhEvlFBv8o:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=c0NqcU8bidQ:byhEvlFBv8o:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=c0NqcU8bidQ:byhEvlFBv8o:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=c0NqcU8bidQ:byhEvlFBv8o:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=c0NqcU8bidQ:byhEvlFBv8o:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=c0NqcU8bidQ:byhEvlFBv8o:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/c0NqcU8bidQ" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/garbage-collection-slides-from-la-ruby-conference/feed/</wfw:commentRss>
		<slash:comments>17</slash:comments>
		</item>
		<item>
		<title>String together global offset tables to build a Ruby memory profiler</title>
		<link>http://timetobleed.com/string-together-global-offset-tables-to-build-a-ruby-memory-profiler/</link>
		<comments>http://timetobleed.com/string-together-global-offset-tables-to-build-a-ruby-memory-profiler/#comments</comments>
		<pubDate>Mon, 25 Jan 2010 12:59:56 +0000</pubDate>
		<dc:creator>Joe Damato</dc:creator>
				<category><![CDATA[debugging]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[systems]]></category>
		<category><![CDATA[x86]]></category>
		<category><![CDATA[debug]]></category>
		<category><![CDATA[memory]]></category>
		<category><![CDATA[profiling]]></category>
		<category><![CDATA[x86_64]]></category>

		<guid isPermaLink="false">http://timetobleed.com/?p=1539</guid>
		<description><![CDATA[If you enjoy this article, subscribe (via RSS or e-mail) and follow me on twitter. Disclaimer The tricks, techniques, and ugly hacks in this article are PLATFORM SPECIFIC, DANGEROUS, and NOT PORTABLE. This is the third article in a series of articles describing a set of low level hacks that I used to create memprof [...]]]></description>
			<content:encoded><![CDATA[<p><center><img src="http://timetobleed.com/images/got.jpg" alt="" width="400" height="300" /></center><br />
If you enjoy this article, <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<h2>Disclaimer</h2>
<p><i>The tricks, techniques, and ugly hacks in this article are  <b>PLATFORM SPECIFIC</b>, <b>DANGEROUS</b>, and <b>NOT PORTABLE</b>. </i></p>
<p>This is the third article in a series of articles describing a set of low level hacks that I used to create <a href="http://github.com/ice799/memprof">memprof</a> a Ruby level memory profiler. <b>You should be able to survive without reading the other articles in this series</b>, but you can check them out <a href="http://timetobleed.com/rewrite-your-ruby-vm-at-runtime-to-hot-patch-useful-features/">here</a> and <a href="http://timetobleed.com/hot-patching-inlined-functions-with-x86_64-asm-metaprogramming/">here</a>.</p>
<h2>How is this different from the other hooking articles/techniques?</h2>
<p>The previous articles explained how to insert trampolines in the <code>.text</code> segment of a binary. This article explains a cool technique for hooking functions in the <code>.text</code> segment of <i>shared libraries</i>, allowing your handler to run, and then resuming execution. Hooking shared libraries turns out to be less work than hooking the binary (in the case of Ruby, that is), but making it all happen was a bit tricky. Read on to learn more.</p>
<h2>The &#8220;problem&#8221; with shared libraries</h2>
<p>The problem is that if a trampoline is inserted into the code of the shared library, the trampoline will need to invoke the dynamic linker to resolve the function that is being hooked, call the function, do whatever additional logic is desired, and then resume execution.</p>
<p><b>In other words you need to (somehow) insert a trampoline for a function that will call the function being trampolined without ending up in an infinite loop.</b></p>
<p>The additional complexity occurs because when shared libraries are loaded, the kernel decides at runtime where exactly in memory the library should be loaded. Since the exact location of symbols is not known at link time, a procedure linkage table (<code>.plt</code>) is created so that the program and the dynamic linker can work together to resolve symbol addresses.</p>
<p>I explained how <code>.plt</code>s work in a <a href="http://timetobleed.com/extending-ltrace-to-make-your-rubypythonperlphp-apps-faster/">previous article</a>, but looking at this again is worthwhile. I&#8217;ve simplified the explanation a bit<sup>1</sup>, but at a high level:</p>
<ol>
<li>Program calls a function in a shared object, the link editor makes sure that the program jumps to a stub function in the <code>.plt</code></li>
<li>The program sets some data up for the dynamic linker and then hands control over to it.</li>
<li>The dynamic linker looks at the info set up by the program and fills in the absolute address of the function that was called in the <code>.plt</code> in the global offset table (<code>.got</code>).</li>
<li>Then the dynamic linker calls the function.</li>
<li>Subsequent calls to the same function jump to the same stub in the <code>.plt</code>, but every time after the first call the absolute address is already in the <code>.got</code> (because when the dynamic linker is invoked the first time, it fills in the absolute address in the <code>.got</code>).</p>
</ol>
<p>Disassembling a short Ruby VM function that calls <code>rb_newobj</code> (a memory allocation routine that we&#8217;d like to hook), shows the calls to the <code>.plt</code>:</p>
<p><pre class="prettyprint">
000000000001af10 <ary_alloc>:
   . . . .
   1af14:       e8 e7 c6 ff ff          callq  17600 [rb_newobj@plt]
   . . . .
</pre>
</p>
<p>
Let&#8217;s take a look at the corresponding <code>.plt</code> stub:</p>
<pre class="prettyprint">
0000000000017600 <rb_newobj@plt>:
   17600:       ff 25 6a 9c 2c 00       jmpq   *0x2c9c6a(%rip) # 2e1270 [_GLOBAL_OFFSET_TABLE_+0x288]
   17606:       68 4e 00 00 00          pushq  $0x4e
   1760b:       e9 00 fb ff ff          jmpq   17110 <_init+0x18>
</pre>
</p>
<p><b><u>Important fact:</u></b> The program and each shared library has its own <code>.plt</code> and <code>.got</code> sections (amongst other sections). Keep this in mind as it&#8217;ll be handy very shortly.</p>
<p>That is a lot of stub code to reproduce in the trampoline. Reproducing that stuff in the trampoline shouldn&#8217;t be hard, but invites a large number of bugs over to play. <i>Is there a better way?</i></p>
<h2>What is a global offset table (<code>.got</code>)?</h2>
<p>The global offset table (<code>.got</code>) is a table of absolute addresses that can be filled in at runtime. In the assembly dump above, the <code>.got</code> entry for <code>rb_newobj</code> is referenced in the <code>.plt</code> stub code.</p>
<h2>Intercepting a function call</h2>
<p>It would be <b>awesome</b> if it were possible to overwrite the <code>.got</code> entry for <code>rb_newobj</code> and insert the address of a trampoline. But how would the intercepting function call <code>rb_newobj</code> itself without ending up in an infinite loop?</p>
<p>The <b>important fact</b> above comes in to save the day.</p>
<p>Since each shared object has its own <code>.plt</code> and <code>.got</code> sections, it is possible to overwrite the <code>.got</code> entry for <code>rb_newobj</code> in <i>every shared object except for the object where the trampoline lives</i>. Then, when <code>rb_newobj</code> is called, the <code>.plt</code> entry will redirect execution to the trampoline. The trampoline then calls out to its <code>.plt</code> entry for <code>rb_newobj</code> which is left untouched allowing <code>rb_newobj</code> to be resolved and called out to successfully.</p>
<h2>Not as easy as it sounds, though</h2>
<p>This solution is less work than the other hooking methods, but it has its own particular details as well:</p>
<ol>
<li>You&#8217;ll need to walk the link map at runtime to determine the base address for the shared library you are hooking (it could be anywhere).</li>
<li>Next, you&#8217;ll need to parse the <code>.rela.plt</code> section which contains information on the location of each <code>.plt</code> stub, relative to the base address of the shared object.</li>
<li>Once you have the address of the <code>.plt</code> stub, you&#8217;ll need to determine the absolute address of the <code>.got</code> entry by parsing the first instruction of the <code>.plt</code> stub (a <code>jmp</code>) as seen in the disassembly above.</li>
<li>Finally, you can write to the <code>.got</code> entry the address of your trampoline, as long as the trampoline <b>lives in a different shared library</b>.</li>
</ol>
<p>You&#8217;ve now successfully managed to poison the <code>.got</code> entry of a symbol in one shared library to direct execution to your own function which can then call the intercepted function itself without getting stuck in an infinite loop.</p>
<h2>Conclusion</h2>
<ul>
<li>There are lots of sections in each ELF object. Each section is special and important.</li>
<li>ELF documentation can be difficult to obtain and understand.</li>
<li>Got pretty lucky this time around. I was getting a little worried that it would get complicated. Made it out alive, though.</li>
</ul>
<p>Thanks for reading and don&#8217;t forget to <a rel="alternate" type="application/rss+xml" href="http://feeds.feedburner.com/TimeToBleed">subscribe (via RSS or e-mail)</a> and <a href="http://twitter.com/joedamato">follow me on twitter.</a></p>
<h2>References</h2>
<ol class="footnotes"><li id="footnote_0_1539" class="footnote"><a href="http://www.x86-64.org/documentation/abi.pdf">System V Application Binary Interface AMD64 Architecture Processor Supplement, p 78</a></li></ol><div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=j81ThL_ZTtE:qO7ijgG8G6c:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=j81ThL_ZTtE:qO7ijgG8G6c:qj6IDK7rITs"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?d=qj6IDK7rITs" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=j81ThL_ZTtE:qO7ijgG8G6c:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=j81ThL_ZTtE:qO7ijgG8G6c:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=j81ThL_ZTtE:qO7ijgG8G6c:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=j81ThL_ZTtE:qO7ijgG8G6c:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/TimeToBleed?a=j81ThL_ZTtE:qO7ijgG8G6c:gIN9vFwOqvQ"><img src="http://feeds.feedburner.com/~ff/TimeToBleed?i=j81ThL_ZTtE:qO7ijgG8G6c:gIN9vFwOqvQ" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/TimeToBleed/~4/j81ThL_ZTtE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://timetobleed.com/string-together-global-offset-tables-to-build-a-ruby-memory-profiler/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss><!-- Dynamic page generated in 0.644 seconds. --><!-- Cached page generated by WP-Super-Cache on 2010-08-01 11:25:39 -->
