<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" gd:etag="W/&quot;DU8BSXg_fCp7ImA9WhFSFUU.&quot;"><id>tag:blogger.com,1999:blog-21557504</id><updated>2013-06-18T16:37:38.644-04:00</updated><category term="palindromic codons" /><category term="cancer" /><category term="Sobel" /><category term="superoxide" /><category term="Bordetella" /><category term="Amazon" /><category term="FeatView" /><category term="RAID" /><category term="Rothia" /><category term="apoptosis" /><category term="frameshift mutation" /><category term="genome" /><category term="Clostridium" /><category term="bioinformatics" /><category term="FDA" /><category term="bacteria" /><category term="Anaplasma" /><category term="Candidatus" /><category term="G+C" /><category term="Flash" /><category term="fractal" /><category term="bookmarklet" /><category term="catalase" /><category term="xpath" /><category term="Carsonella ruddii" /><category term="SynMap" /><category term="performance" /><category term="endosymbiosis" /><category term="Lateral Gene Transfer" /><category term="Salems" /><category term="fresh writing" /><category term="retrovirus" /><category term="deamination" /><category term="DFT" /><category term="diabetes" /><category term="Adobe" /><category term="multimorbid" /><category term="carcinogenic" /><category term="Darwin" /><category term="Ford Doolittle" /><category term="GC content" /><category term="polypharmacy" /><category term="comparative genomics" /><category term="genetics" /><category term="information theory" /><category term="DNA" /><category term="KH domain" /><category term="FASTA" /><category term="AT pressure" /><category term="synteny" /><category term="graphics" /><category term="transformation" /><category term="cigarettes" /><category term="anaerobes" /><category term="nitroreductase" /><category term="sling" /><category term="DCT" /><category term="Dreamweaver" /><category term="Proteus mirabilis" /><category term="purine loading" /><category term="amino acids" /><category term="health care" /><category term="GC:AT ratio" /><category term="ribosomes" /><category term="protein blast" /><category term="patent" /><category term="Actinomyces" /><category term="Europa" /><category term="JAI" /><category term="antisense translation" /><category term="convolution" /><category term="Chargaff" /><category term="scriptlet" /><category term="menthol" /><category term="CMS" /><category term="Halobacteriacea" /><category term="epidemiology" /><category term="DNA repair" /><category term="redundancy" /><category term="CQ" /><category term="original writing" /><category term="json" /><category term="endosymbiont" /><category term="cigarets" /><category term="Vista" /><category term="jcr" /><category term="nonsense proteins" /><category term="Clade 3 catalase" /><category term="workflow" /><category term="omics" /><category term="wobble codon" /><category term="tobacco" /><category term="health care costs" /><category term="Darwinism" /><category term="DSM-V" /><category term="S1" /><category term="symbiosis" /><category term="imaging" /><category term="3D JPEG" /><category term="evolution" /><category term="PNPase" /><category term="anaerobic catalase" /><category term="phage" /><category term="GenomeView" /><category term="E. coli" /><category term="mutations" /><category term="speciation" /><category term="Ralstonia" /><category term="Archaea" /><category term="2D" /><category term="ecommerce" /><category term="Chargaff's second rule" /><category term="proteins" /><category term="mosquito" /><category term="trite phrases" /><category term="purine-pyrimidine ratio" /><category term="genomics" /><category term="Shakespeare" /><category term="bioinformatics bookmarklet" /><category term="codon" /><category term="mitochondria" /><category term="Business Catalyst" /><category term="tblastn" /><category term="ribosomal S1" /><category term="Thiomicrospira" /><category term="WCM" /><category term="obesity" /><category term="lung cancer" /><category term="http://www.blogger.com/post-create.g?blogID=21557504" /><category term="phylogenetic tree" /><category term="G+C content" /><category term="Szybalski" /><category term="bacterial genetics" /><category term="Anaeromyxobacter" /><category term="Claude Shannon" /><category term="programming" /><category term="Adobe CQ" /><category term="crustacean" /><category term="ankyrin" /><category term="parasite" /><category term="Clostridium botulinum" /><category term="new gene creation" /><category term="Java" /><category term="bacterial ecology" /><category term="antisense DNA" /><category term="thermal vent" /><category term="Szybalski's rule" /><category term="DNA replication" /><category term="hydrogen peroxide" /><category term="cliche" /><category term="mutation" /><category term="polynucleotide phosphorylase" /><category term="comorbidity" /><category term="tree of life" /><category term="proteomics" /><category term="Chlamydia" /><category term="parity rule" /><category term="smoking" /><category term="microbial genetics" /><category term="Wolbachia" /><category term="kernel" /><category term="investment" /><category term="Adobe Muse" /><category term="FFT" /><category term="multifunctional catalase" /><category term="entropy" /><category term="erroneous translation" /><category term="BLAST query" /><category term="Polynucleobacter" /><category term="non-heme catalase" /><category term="writing" /><category term="genes" /><category term="replication" /><title>assertTrue( )</title><subtitle type="html">"I can explain."

&lt;br&gt;&lt;br&gt;&lt;br&gt;by Kas Thomas</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>554</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/assertTrue" /><feedburner:info xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" uri="asserttrue" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;CUMEQnw9fCp7ImA9WhFSFEk.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-2270099339352897733</id><published>2013-06-17T00:30:00.000-04:00</published><updated>2013-06-17T00:30:03.264-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-06-17T00:30:03.264-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="PNPase" /><category scheme="http://www.blogger.com/atom/ns#" term="KH domain" /><category scheme="http://www.blogger.com/atom/ns#" term="antisense translation" /><category scheme="http://www.blogger.com/atom/ns#" term="evolution" /><category scheme="http://www.blogger.com/atom/ns#" term="nonsense proteins" /><category scheme="http://www.blogger.com/atom/ns#" term="antisense DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="Darwinism" /><category scheme="http://www.blogger.com/atom/ns#" term="polynucleotide phosphorylase" /><category scheme="http://www.blogger.com/atom/ns#" term="new gene creation" /><category scheme="http://www.blogger.com/atom/ns#" term="Darwin" /><category scheme="http://www.blogger.com/atom/ns#" term="Rothia" /><category scheme="http://www.blogger.com/atom/ns#" term="ribosomal S1" /><category scheme="http://www.blogger.com/atom/ns#" term="S1" /><title>An Example of Antisense Proteogenesis?</title><content type="html">The question of how organisms develop entirely new genes is one of the most important open questions in biology. One possibility is that new genes often develop through accidental translation of antisense strands of DNA.&lt;br /&gt;
&lt;br /&gt;
An example of this can be seen with the S1 protein of the 30S bacterial &lt;a href="http://en.wikipedia.org/wiki/Ribosomal_protein" target="_blank"&gt;ribosome&lt;/a&gt;. If you take the amino-acid sequence for an S1 gene and use it as the query sequence in a &lt;a href="http://www.ncbi.nlm.nih.gov/books/NBK21106/def-item/app11/?report=objectonly" target="_blank"&gt;blast-p&lt;/a&gt; (protein blast), you'll mostly get back hits on other S1 proteins, but you'll also get minor (low-fidelity) hits on polynucleotide phosphorylase. Why? When you do a blast search, the search engine, by default, looks at &lt;i&gt;both &lt;/i&gt;DNA strands of target genes (sense and antisense strands) to see if there's a potential sequence match with the query. If there's a match on the antisense strand, it will be reported along with "sense" matches. In the case of the S1 protein, blast-p searches often report weak &lt;i&gt;antisense &lt;/i&gt;hits on polynucleotide phosphorylase in addition to strong &lt;i&gt;sense &lt;/i&gt;hits on ribosomal S1.&lt;br /&gt;
&lt;br /&gt;
Ribosomal proteins are, of course, among the most highly conserved proteins in nature. It turns out that polynucleotide phosphorylase (PNPase) is very highly conserved as well. It's an enzyme that occurs in every life form (bacteria, fungi, plants, animals), absent only in a scant handful of microbial endosymbionts that have lost the majority of their genes through deletions. While the chemical function of PNPase is well understood (it catalyzes the interconversion of nucleoside diphosphates to RNA), its physiologic purpose is not well understood, although &lt;a href="http://jb.asm.org/content/194/20/5613.abstract" target="_blank"&gt;recent research shows&lt;/a&gt; that PNPase-knockout mutants of &lt;i&gt;E. coli&lt;/i&gt; exhibit lower mutation rates. (Hence, PNPase may actually be involved in generating mutations.)&lt;br /&gt;
&lt;br /&gt;
The bacterium &lt;i&gt;Rothia mucilaginosa&lt;/i&gt;, strain DY18, has a (putative) PNPase gene at a genome offset of 1277514. When this gene is used as the query for a blast-p search, the hits that come back include many strong matches for the S1 ribosomal proteins of various organisms. By "strong match," I mean better than 80% sequence identity coupled with an E-value (expectation value) of zero. (Recall that the &lt;i&gt;E-value&lt;/i&gt; represents the approximate odds of the match in question happening due to random chance. &lt;br /&gt;
&lt;br /&gt;
If we use the Genome Viewer at &lt;a href="http://genomevolution.org/"&gt;genomevolution.org&lt;/a&gt; to look at the PNPase gene of &lt;i&gt;Rothia mucilaginosa&lt;/i&gt;, we see something extraordinarily peculiar (look carefully at the graphic below). Click to enlarge the following image, or better yet, to see this genome view for yourself, go to &lt;a href="http://genomevolution.org/CoGe/GenomeView.pl?chr=1&amp;amp;ds=61353&amp;amp;x=1277514&amp;amp;z=4;gstid=1" target="_blank"&gt;this link&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-0xd7rk-UhK0/UbxzsQTLA-I/AAAAAAAABwk/2aJkcNwefFE/s1600/PNP.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="346" src="http://4.bp.blogspot.com/-0xd7rk-UhK0/UbxzsQTLA-I/AAAAAAAABwk/2aJkcNwefFE/s400/PNP.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Notice the presence of overlapping sense and antisense open reading frames on a portion of DNA from &lt;i&gt;Rothia mucilaginosa&lt;/i&gt;. The top reading frame contains the gene for polynucleotide phosphorylase. The lower (-1 strand) reading frame contains ribosomal S1. To see this in your own browser, go to &lt;a href="http://genomevolution.org/CoGe/GenomeView.pl?chr=1&amp;amp;ds=61353&amp;amp;x=1277514&amp;amp;z=4;gstid=1" target="_blank"&gt;this link&lt;/a&gt;.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
Notice that there are overlapping genes. On the top strand is the gene for PNPase; on the bottom strand, in the same location, is a gene for ribosomal S1. These are &lt;i&gt;bidirectionally overlapping open reading frames&lt;/i&gt;, something occasionally encountered in virus nucleic acids but rarely seen in bacterial or other genomes.&lt;br /&gt;
&lt;br /&gt;
How do we explain this anomaly? It could be just that: an anomaly, two open reading frames that happen to overlap (but that aren't necessarily translated &lt;i&gt;in vivo&lt;/i&gt;). Or it could be that at some point, many millions of years ago, the ribosomal S1 gene of a &lt;i&gt;Rothia&lt;/i&gt; ancestor was erroneously translated via the antisense strand, producing a protein with PNPase characteristics. We don't know why PNPase confers survival value (its physiologic purpose is not fully understood), but we do know, with a fair degree of certainty, that PNPase does, in fact, confer survival value—because every organism, at every level of the tree of life, has at least one copy of PNPase. Once &lt;i&gt;Rothia&lt;/i&gt;'s ancestor, through whatever process, opened up a reading frame on the antisense strand of ribosomal S1, the reading frame &lt;i&gt;stayed &lt;/i&gt;open, because it conferred survival value. In this way, the first &lt;i&gt;Rothia &lt;/i&gt;PNPase was born. (Arguably.)&lt;br /&gt;
&lt;br /&gt;
At some point in its history, &lt;i&gt;Rothia &lt;/i&gt;duplicated its PNPase gene and placed a new copy at genome offset 1650959. Over time, this second copy diverged from the original copy, becoming more like &lt;i&gt;E. coli &lt;/i&gt;PNPase (which is also to say, less S1-like). &lt;i&gt;Rothia&lt;/i&gt;'s second PNPase shows a blast-p similarity of 45% (in terms of AA identities) to &lt;i&gt;E. coli&lt;/i&gt; PNPase, with E-value 4.0e-147. It shows a blast-p similarity of 26% (AA identities) with &lt;i&gt;E. coli&lt;/i&gt; ribosomal S1 (E-value: 4.0e-17). Neither &lt;i&gt;E. coli&lt;/i&gt; PNPase nor &lt;i&gt;Rothia &lt;/i&gt;PNPase-2 overlaps an S1 gene. However, both are colocated with the ribosomal S15 protein gene. And you'll find (if you look at lots of bacterial genomes) that PNPase is almost always located immediately next to an S15 ribosomal gene.&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;Rothia &lt;/i&gt;PNPase is an example of an enzyme that may very well have started out as an antisense copy of another protein (the S1 ribosomal protein). Of course, the mere presence of bidirectionally overlapping open reading frames doesn't prove that both frames are actually transcribed and translated &lt;i&gt;in vivo&lt;/i&gt;. But the fact that blast-p searches using PNPase as the query almost always turn up faint S1 echoes (in a wide variety of organisms) is highly suggestive of an ancestral relationship between the two proteins.&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/2270099339352897733/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/06/an-example-of-antisense-proteogenesis.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2270099339352897733?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2270099339352897733?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/06/an-example-of-antisense-proteogenesis.html" title="An Example of Antisense Proteogenesis?" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-0xd7rk-UhK0/UbxzsQTLA-I/AAAAAAAABwk/2aJkcNwefFE/s72-c/PNP.png" height="72" width="72" /><thr:total>1</thr:total></entry><entry gd:etag="W/&quot;DEMEQX48cSp7ImA9WhFSE0g.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-5372934505641497647</id><published>2013-06-16T00:20:00.000-04:00</published><updated>2013-06-16T00:20:00.079-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-06-16T00:20:00.079-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="palindromic codons" /><category scheme="http://www.blogger.com/atom/ns#" term="new gene creation" /><category scheme="http://www.blogger.com/atom/ns#" term="nonsense proteins" /><category scheme="http://www.blogger.com/atom/ns#" term="antisense translation" /><category scheme="http://www.blogger.com/atom/ns#" term="antisense DNA" /><title>Evolution and Antisense Translation of DNA</title><content type="html">Yesterday I offered a theory for new gene creation which might be called the Erroneous Translation Theory. Basically, I proposed that new proteins arise through frameshifted and/or reversed translation of nucleic acids (translation of antisense strands of DNA).&lt;br /&gt;
&lt;br /&gt;
Erroneous translation of DNA offers interesting possibilities for gain of function. (Recall that most point mutations result in loss of function, and one of the &lt;a href="http://asserttrue.blogspot.com/2013/06/the-trouble-with-darwin.html" target="_blank"&gt;major criticisms&lt;/a&gt; of Darwinian theory is that evolution based on accumulation of point mutations cannot account for gain-of-function events.) Wholesale mistranslation via frameshift errors and/or wrong-strand transcription allow for the sudden emergence of entirely new classes of proteins. The unit of change is no longer the single base-pair polymorphism but the functional domain or motif. &lt;br /&gt;
&lt;br /&gt;
An important aspect of antisense-strand translation has to do with stop codons. In DNA, the sequences TCA, TTA, and CTA specify amino acids serine, leucine, and leucine, respectively. But when these three codons are complemented, then read in 5'-to-3' direction—in other words, when they're antisense-translated—they form the &lt;i&gt;stop codons&lt;/i&gt; TGA, TAA, and TAG, which tell the cell's protein-making machinery to terminate the production of the current polypeptide. Thus, if a typical gene containing codons TCA, TTA, and CTA  is translated "backwards," translation will end prematurely: It will end as soon as a stop codon is encountered.&lt;br /&gt;
&lt;br /&gt;
How important a consideration is this in the real world? Consider the following DNA sequence, which represents the gene for the cytidine deaminase enzyme of &lt;i&gt;Clostridium botulinum&lt;/i&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-size: x-small;"&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;&lt;span style="color: red;"&gt;&amp;gt;Clostridium botulinum A strain ATCC 19397(v1, unmasked), Name: ABS32549.1, CLB_0040, Type: CDS, Feature Location: (Chr: 1, 37028..37465) Genomic Location: 37028-37465&lt;/span&gt;
&lt;span style="color: red;"&gt;ATGAATGATTATATAGAATATGCAATAATTGAAGCAAAAAAAGCATTAGCAATAGGAGAAGTACCTGTTGGAGCTATTATAGTTAAAGAAAATAAAATTATAGCAAAAAGTCATAATTTAAAAGAGTCATTGAAGGATCCAACAGCTCATGCAGAGATATTAGCTATAAAAGAAGCTTGCAATACAATACATAATTGGAGATTAAAAGGATGTAAGATGTATGTAACATTAGAACCATGTGCTATGTGTGCTAGTGCAATAATTCAATCTAGAATAAGTGAATTGCATATAGGAACCTTTGATCCAGTGGGAGGGGCTTGTGGATCAGTAGTAAATATAACAAATAATAGTTATTTAAAAAATAATTTAAATATTAAATGGTTATATGATGATGAATGTAGTAGAATAATAACAAATTTTTTTAAAAATATTAGATAA&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;br /&gt;
The above sequence is the "sense" strand of the DNA, in 5'-to-3' direction. The sequence below is the corresponding 3'-to-5' complementary sequence (in other words, what's on the antisense strand of DNA):&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-size: x-small;"&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;&lt;span style="color: red;"&gt;TACTTACTAATATATCTTATACGTTATTAACTTCGTTTTTTTCGTAATCGTTATCCTCTTCATGGACAACCTCGATAATATCAATTTCTTTTATTTTAATATCGTTTTTCAGTATTAAATTTTCTCAGTAACTTCCTAGGTTGTCGAGTACGTCTCTATAATCGATATTTTCTTCGAACGTTATGTTATGTATTAACCTCTAATTTTCCTACATTCTACATACATTGTAATCTTGGTACACGATACACACGATCACGTTATTAAGTTAGATCTTATTCACTTAACGTATATCCTTGGAAACTAGGTCACCCTCCCCGAACACCTAGTCATCATTTATATTGTTTATTATCAATAAATTTTTTATTAAATTTATAATTTACCAATATACTACTACTTACATCATCTTATTATTGTTTAAAAAAATTTTTATAATCTATT&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
When the antisense sequence is translated in the normal 5'-to-3' direction, the following amino acid sequence results:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="color: red;"&gt;&lt;span class="Apple-style-span" style="border-collapse: separate; font-family: 'Times New Roman'; font-size: xx-small; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Consolas,'Lucida Console',monospace; font-size: 12px; white-space: pre-wrap;"&gt;LSNIFKKICYYSTTFIII*PFNI*IIF*ITIICYIYY*STSPSHWIKGSYMQFTYSRLNYCTSTHSTWF*CYIHLTSF*SPIMYCIASFFYS*YLCMSCWILQ*LF*IMTFCYNFIFFNYNSSNRYFSYC*CFFCFNYCIFYIIIH&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
This sequence of 146 amino acids (shown here using &lt;a href="http://www.bio.davidson.edu/biology/aatable.html" target="_blank"&gt;standard one-letter amino-acid abbreviations&lt;/a&gt;) contains 10 stop codons (depicted as asterisks). Any attempt to translate the antisense strand of the &lt;i&gt;C. botulinum&lt;/i&gt; cytidine deaminase gene will result in (at best) a series of short oligopeptides.&lt;br /&gt;
&lt;br /&gt;
It's tempting to conclude that this is nature's ingenious way of preventing the occurrence of nonsense proteins. Translate the wrong strand of DNA by mistake, and translation quickly terminates. (In the above example, a stop codon occurs every 14 amino acids, on average.) But before you jump to that conclusion, consider the cytidine deaminase gene of &lt;i&gt;Anaeromyxobacter dehalogenans strain 2CP-C&lt;/i&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;&lt;span style="color: red; font-size: x-small;"&gt;GTGGACGAGCGCGAGGCGATGCAGGAGGCGCTGGGGCTGGCGCGCGAGGCGGCGGCCCGCGGCGAGGTGCCGGTCGGCGCGGTGGCGCTGTTCGAGGGCCGCGTGGTCGGCCGCGGCGCGAACGCCCGCGAGGCGGCGCGCGATCCCACCGCGCACGCGGAGCTCCTCGCGATCCAGGAGGCGGCGCGCACCCTCGGGCGCTGGCGCCTCACCGGCGTCACGCTGGTGGTGACGCTCGAGCCCTGCGCCATGTGCGCCGGCGCCATGGTGCTCGCCCGCATCGACCGGCTCGTCTACGGGGCGAGCGATCCCAAGGCCGGCTGCACCGGCTCCCTCCAGGACCTGTCGGCGGACCCCCGGCTGAACCACCGGTTCCCGGTGGAGCGCGGCCTGCTGGCCGAGGAGTCCGGCGAGCTCCTCCGGGCCTTCTTCCGGGCCCGCCGGGGCGCCGGGAACGGAAACGGCAACGGCGGCGAGGGTTAG&lt;/span&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;br /&gt;
The translation of the antisense version of this gene is:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;&lt;span style="color: red; font-size: x-small;"&gt;LTLAAVAVSVPGAPAGPEEGPEELAGLLGQQAALHREPVVQPGVRRQVLEGAGAAGLGIARPVDEPVDAGEHHGAGAHGAGLERHHQRDAGEAPAPEGARRLLDREELRVRGGIARRLAGVRAAADHAALEQRHRADRHLAAGRRLARQPQRLLHRLALVH&lt;/span&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;br /&gt;
Which contains no stop codons! Why does one version of the gene give ten stop codons when anti-translated, whereas the other version gives zero stop codons? &lt;i&gt;Clostridium botulinum&lt;/i&gt; has a genome G+C content of 28% whereas the DNA of &lt;i&gt;Anaeromyxobacter dehalogenans&lt;/i&gt; has a G+C content of 74%. The two organisms favor entirely different codons. &lt;i&gt;Anaeromyxobacter &lt;/i&gt;uses codons TCA, TTA, and CTA only 0.03%, 0%, and 0.02% of the time, respectively. &lt;i&gt;Clostridium &lt;/i&gt;uses the same codons 1.72%, 5.62%, and 4.67% of the time&lt;i&gt;—over 200 times more often&lt;/i&gt; &lt;i&gt;than Anaeromyxobacter&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
Bottom line: Almost any gene in &lt;i&gt;Anaeromyxobacter &lt;/i&gt;(or any high-GC organism, it turns out) can be antisense-translated without generating stop codons. Stop codons occur in antisense genes in inverse proportion to the amount of G+C in the gene. &amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
If it's true that antisense-strand translation is (or has been) an important source of new proteins in nature, the foregoing observation is tremendously relevant, because it means successful reverse translation has likely occurred far more often in high-GC organisms than in low-GC organisms. It suggests that bacteria with high G+C content in their genomes may, in fact, have been the incubators of early proteins. It implies a "GC Eden" scenario in which early life forms had predominantly high-GC genomes. Low-GC organisms then arose through continuous "AT pressure," from large numbers of accumulated GC-to-AT transition mutations. (We know that GC-to-AT transition mutations occur at a much higher rate than AT-to-GC transitions; this fact is not in dispute.)&lt;br /&gt;
&lt;br /&gt;
Even so, we have to ask: What is the evidence for reverse (antisense-strand) translation having occurred in nature? &lt;i&gt;Is&lt;/i&gt; there any such evidence?&lt;br /&gt;
&lt;br /&gt;
More on this subject tomorrow.&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/5372934505641497647/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/06/evolution-and-antisense-translation-of.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/5372934505641497647?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/5372934505641497647?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/06/evolution-and-antisense-translation-of.html" title="Evolution and Antisense Translation of DNA" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><thr:total>1</thr:total></entry><entry gd:etag="W/&quot;CEYFSXczeCp7ImA9WhFSEko.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-585423794019158984</id><published>2013-06-15T00:30:00.000-04:00</published><updated>2013-06-15T00:55:18.980-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-06-15T00:55:18.980-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="palindromic codons" /><category scheme="http://www.blogger.com/atom/ns#" term="new gene creation" /><category scheme="http://www.blogger.com/atom/ns#" term="frameshift mutation" /><category scheme="http://www.blogger.com/atom/ns#" term="evolution" /><category scheme="http://www.blogger.com/atom/ns#" term="erroneous translation" /><title>Thoughts on New Gene Origination</title><content type="html">The other day, I wrote a &lt;a href="http://asserttrue.blogspot.com/2013/06/the-trouble-with-darwin.html" target="_blank"&gt;damning critique of Darwin's theory&lt;/a&gt; and offered nothing in the way of a positive alternative to the traditional view of accumulated-point-mutations as a driving force for evolution. It's easy to take potshots at someone else's theory and walk away. As a rule, I don't like naysayers who criticize something, then offer nothing in return. So I'd like to take a moment to try to offer a different perspective on evolution. In particular, I'd like to offer my own theory as to how new genes arise.&lt;br /&gt;
&lt;br /&gt;
The question of where new genes comes from is, of course, one of the foremost open problems in biology. Current theory revolves mostly around &lt;i&gt;gene duplication&lt;/i&gt; followed by modification of the duplicated gene (via mutations and deletions) under survival pressure [reference 4 below]. Gene fusion and fission have also been proposed as mechanisms for gene origination [3]. In addition, genes derived from noncoding DNA have recently been described in &lt;i&gt;Drosophila &lt;/i&gt;[1]. Likewise, &lt;a href="http://en.wikipedia.org/wiki/Transposable_element" target="_blank"&gt;transposons&lt;/a&gt; (genes that jump from one location to another) have been implicated in gene biogensis [3]. &lt;br /&gt;
&lt;br /&gt;
The problem with these theories is that various enzymes are required in order for duplication, transposition, fusion, fission, etc., to occur (to say nothing of transcription, translation initiation, translation elongation, and so on), and existing theories don't explain how these participating enzymes appeared, themselves, in the first place. A fully general theory has to start from the assumption that in pre-cellular, pre-chromosomal, pre-organismic times, genes (if they existed) may have occurred singly, with multiple copies arising through &lt;i&gt;non-enzymatic&lt;/i&gt; replication. Likewise, we should assume that early protein-making machinery was probably non-enzymatic, which is to say entirely RNA-based (i.e., &lt;a href="http://en.wikipedia.org/wiki/Ribozyme" target="_blank"&gt;ribozymal&lt;/a&gt;). If the idea of catalytic RNA is new to you or sounds unreasonably farfetched, please review the &lt;a href="http://www.nobelprize.org/nobel_prizes/chemistry/laureates/1989/press.html" target="_blank"&gt;1989 Nobel Prize research by Altman and Cech&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
The fundamental mechanisms of &lt;i&gt;de novo&lt;/i&gt; gene creation available in pre-enzymatic times might well have been nothing more than ribozymal &lt;i&gt;duplication &lt;/i&gt;of nucleic acid sequences followed by &lt;i&gt;erroneous translation&lt;/i&gt;. "Erroneous translation" can be of two fundamental types: &lt;a href="http://en.wikipedia.org/wiki/Frameshift_mutation" target="_blank"&gt;frameshifted translation&lt;/a&gt;, and reverse translation. (Reverse translation here means transcription of the &lt;i&gt;antisense &lt;/i&gt;strand of DNA and subsequent translation to a polypeptide.)&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-XnanJ6In9tI/Ubur-3JZu3I/AAAAAAAABwA/EB57KGJ7ImU/s1600/frameshift.gif" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="293" src="http://2.bp.blogspot.com/-XnanJ6In9tI/Ubur-3JZu3I/AAAAAAAABwA/EB57KGJ7ImU/s400/frameshift.gif" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;DNA is parsed 3 bases at a time (the 3-base combinations are called &lt;i&gt;codons&lt;/i&gt;; each codon corresponds to an amino acid). If a single base is spuriously added to, or deleted from, a gene, the reading frame is disrupted and a hugely different amino-acid sequence results. This is called a &lt;i&gt;frameshift error&lt;/i&gt; or &lt;i&gt;frameshift mutation&lt;/i&gt;. &lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
Spurious addition or deletion of a single base to a free-floating piece of single-stranded genetic material (RNA or DNA) is all that's needed in order to cause frameshifted translation. The protein that results from a frameshift error is, of course, in general, vastly different from the original protein.&lt;br /&gt;
&lt;br /&gt;
If pre-organismic nucleic acids were single-stranded, then &lt;i&gt;reverse &lt;/i&gt;translation would require 3'-to-5' reading of the nucleic acid as well as 5'-to-3' reading. If, on the other hand, early nucleic acids were double-stranded, then 5'-to-3' (normal direction) translation of each strand would suffice to give one normal and one reverse translation product. (Note for non-biologists: In all known current organisms, reading of DNA and RNA takes place in the 5'-to-3' direction only.)&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-gzTWkr-ebR0/Ubu9l1hBLrI/AAAAAAAABwQ/3Oqg7r-a1q0/s1600/RNA_chemical_structure.GIF" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="314" src="http://2.bp.blogspot.com/-gzTWkr-ebR0/Ubu9l1hBLrI/AAAAAAAABwQ/3Oqg7r-a1q0/s320/RNA_chemical_structure.GIF" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Nucleic acids (RNA and DNA) have &lt;i&gt;directionality&lt;/i&gt;, defined by the orientation of sugar backbone molecules in terms of their 5' and 3' carbons.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
It's interesting to speculate on the role of reverse translation in production of novel proteins, especially as it applies to early biological systems. We don't know if early systems relied on triplet codons (or even if all four bases—guanine, cytosine, adenine, thymine—existed from the beginning). We also don't know if there were 20 amino acids in the beginning. There may have been fewer (or more).&lt;br /&gt;
&lt;br /&gt;
A novel possibility is that early triplet codons were &lt;i&gt;palindromic &lt;/i&gt;(giving identical semantics when read in either direction). There are 16 palindromic codons in the codon lexicon (AGA, GAG, CAC, ACA, ATA, TAT, AAA, and so on) which today encode 15 amino acids out of the 20 commonly used. In a palindromic-codon world, the distinction between "sense" and "antisense" nucleic acid sequences vanishes, because a single-stranded gene made up of palindromic codons could be translated in either direction to give a polypeptide with the same sequence, the only chirality arising from N- to C-terminal polarity. For example, the sequence GGG-CAC-GCG-AAA would give a polypeptide of &lt;i&gt;glycine-histidine-alanine-lysine&lt;/i&gt; whether translated forward or backward, the only difference being that the forward version would have glycine at the N-terminus whereas the reverse version would have glycine at the C-terminus. The secondary and tertiary structures of the two versions would be the same. As long as catalytic function didn't directly depend on an amino or carboxy terminus of an end-acid, the two proteins would also be &lt;i&gt;functionally &lt;/i&gt;indistinguishable.&lt;br /&gt;
&lt;br /&gt;
Codon palindromicity is potentially important in any system in which single-stranded genes are bidirectionally translated, because in the case where a gene &lt;i&gt;does &lt;/i&gt;happen to rely heavily on palindromic codons, the reverse-translated product will (for the reasons just explained) have the potential to be &lt;i&gt;functionally paralogous&lt;/i&gt; to the forward-translated product (to an extent matching the extent of palindromic-codon usage). But this assumes that in early organisms (or pre-organismic soups), single-stranded genes could be translated in the 5'-to-3' direction &lt;i&gt;or &lt;/i&gt;the 3'-to-5' direction. &lt;br /&gt;
&lt;br /&gt;
It turns out modern organisms differ markedly in the degree to which they use palindromic codons, and there are (remarkably) some prokaryotes whose genes use an &lt;i&gt;average &lt;/i&gt;of ~40% palindromic codons. The complementary strand of DNA would, of course, contain palindromic complements: AGA opposite TCT, CCC opposite GGG, etc.&lt;br /&gt;
&lt;br /&gt;
All of this makes for interesting conjecture, but does any of it really apply to the natural world? For example: Do organisms actually employ strategies of "erroneous translation" in creating new proteins? Did today's microbial meta-proteome arise through mechanisms involving frameshifted and/or reverse translation? Is there any evidence of such processes, one way or the other? Tomorrow I want to continue on this theme, presenting a little data to back up some of these strange ideas. Please join me; and bring a biologist-friend with you!&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;References&lt;/b&gt;
&lt;br /&gt;
1. Begun, D., et al. Evidence for de novo evolution of testis-expressed genes in the &lt;i&gt;Drosophila yakuba/Drosophila erecta&lt;/i&gt; clade. &lt;i&gt;Genetics &lt;/i&gt;176, 1131–1137 (2007).
&lt;br /&gt;
2. Fechotte, C., &amp;amp; Pritham, E. DNA transposons and the evolution of eukaryotic genomes. Annual Review of Genetics 41, 331–368 (2007)
&lt;br /&gt;
3. Jones, C. D., &amp;amp; Begun, D. J. Parallel evolution of chimeric fusion genes. &lt;i&gt;Proceedings of the National Academy of Sciences&lt;/i&gt; 102, 11373–11378 (2005).
&lt;br /&gt;
4. Ohno, S. &lt;i&gt;Evolution by Gene Duplication &lt;/i&gt;(Springer-Verlag, Berlin, 1970).
&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/585423794019158984/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/06/thoughts-on-new-gene-origination.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/585423794019158984?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/585423794019158984?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/06/thoughts-on-new-gene-origination.html" title="Thoughts on New Gene Origination" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-XnanJ6In9tI/Ubur-3JZu3I/AAAAAAAABwA/EB57KGJ7ImU/s72-c/frameshift.gif" height="72" width="72" /><thr:total>1</thr:total></entry><entry gd:etag="W/&quot;C0MEQXw7cSp7ImA9WhFSEEw.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-2307755645069810387</id><published>2013-06-12T00:30:00.000-04:00</published><updated>2013-06-12T00:30:00.209-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-06-12T00:30:00.209-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="Darwin" /><category scheme="http://www.blogger.com/atom/ns#" term="evolution" /><category scheme="http://www.blogger.com/atom/ns#" term="mutations" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><category scheme="http://www.blogger.com/atom/ns#" term="speciation" /><category scheme="http://www.blogger.com/atom/ns#" term="Darwinism" /><title>The Trouble with Darwin</title><content type="html">As a biologist, I find Darwin's theory hugely disappointing. It's better than the alternative (which is to believe in magic, basically), but not by much, sadly.&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-TIlO49OJDrA/UbfTXyggmvI/AAAAAAAABvg/HY_s69QoOaI/s1600/charles-darwin.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://4.bp.blogspot.com/-TIlO49OJDrA/UbfTXyggmvI/AAAAAAAABvg/HY_s69QoOaI/s200/charles-darwin.jpg" width="190" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;i&gt;Charles Darwin died before Mendel &lt;br /&gt;proved the existence of genes&lt;/i&gt;.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
As scientific theories go, the theory of evolution is easily the weakest of all major scientific theories. It's a commendable piece of work in its ability to stir discussion, but terrible in most other ways.&lt;br /&gt;
&lt;br /&gt;
To be useful, a scientific theory has to do a minimum of two things: explain what can be observed, and provide testable predictions. Darwin's theory is weak on the first count and useless on the second.&lt;br /&gt;
&lt;br /&gt;
Evolutionary theory explains practically nothing, because every explanation of the theory is rooted in "survival of the fittest," which is a circular notion, utterly content-free. "Fittest" means most able to survive. Survival of the fittest means survival of those who survive.&lt;br /&gt;
&lt;br /&gt;
Ironically, Darwin's landmark work was called &lt;a href="http://www.literature.org/authors/darwin-charles/the-origin-of-species/" target="_blank"&gt;On the Origin of Species&lt;/a&gt;. Yet it doesn't actually explain speciation, except in the most vacuous and speculative of terms. Of course, we can't set too high an expectation for Darwin, since he didn't live to see the publication of Mendel's work (the word "genetics" wouldn't exist until more than 20 years after Darwin's death), but still. Speciation is portrayed by Darwin as the outcome of the accumulation of small, gradual changes. That's all the explanation he offers.&lt;br /&gt;
&lt;br /&gt;
But the explanation is wrong. Or at least it doesn't accord well with the facts. It doesn't explain &lt;a href="http://en.wikipedia.org/wiki/Cambrian_explosion" target="_blank"&gt;the Cambrian Explosion&lt;/a&gt;, for example, or the sudden appearance of intelligence in hominids, or the rapid recovery (and net expansion!) of the biosphere in the wake of at least &lt;a href="https://en.wikipedia.org/wiki/Extinction_event#Major_extinction_events" target="_blank"&gt;five super-massive extinction events&lt;/a&gt; in the most recent 15% of Earth's existence.&lt;br /&gt;
&lt;br /&gt;
One of the most frustrating aspects of evolutionary theory (this is no fault of the theory's, though) is that it is so hard to test in the laboratory. The fact is, no one has ever seen speciation happen in the laboratory, under repeatable conditions, and until that happens we're at a distinct disadvantage for understanding speciation. (Incidentally, I don't count plant hybridization or breeding anomalies in fruit flies whose sexuality is under the control of microbial endosymbionts as examples of speciation.) &lt;br /&gt;
&lt;br /&gt;
When I was in school, we were taught that mutations in DNA were the driving force behind evolution, an idea that is now thoroughly discredited. The overwhelming majority of non-neutral mutations are deleterious (they reduce, not increase, survival). Most mutations lead to &lt;i&gt;loss of function&lt;/i&gt; (this is easily demonstrated in the lab), not gain of function. Evolutionary theory is great at explaining things like the loss of eyesight by cave-dwelling creatures (e.g., bats). It's terrible at explaining gain of function.&lt;br /&gt;
&lt;br /&gt;
Even if mutations were capable of driving evolution, they simply don't happen fast enough to account for observed rates of speciation. In bacteria, the &lt;a href="http://www.pnas.org/content/96/22/12638.full.pdf" target="_blank"&gt;measured rate&lt;/a&gt; of 16S rRNA divergence due to point mutations is only 1% per 50 million years. And yet, there were &lt;i&gt;no flowering plants on earth&lt;/i&gt; as recently as 150 million years ago! Does it take a biologist to see the disconnect?&lt;br /&gt;
&lt;br /&gt;
I bring all this up because I've spent some time recently doing genomics research aimed at exploring mechanisms for new-protein creation/differentiation (mechanisms not relying wholly nor even &lt;i&gt;mainly&lt;/i&gt; on point mutations), and I wanted to set the stage for discussing that research here. Over the next week or so, I'll be presenting some new ideas and findings. Hopefully, we can put some much-needed flesh on Darwin by exploring testable notions of how new protein motifs can arise quickly (without reliance on magic). &lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/2307755645069810387/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/06/the-trouble-with-darwin.html#comment-form" title="12 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2307755645069810387?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2307755645069810387?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/06/the-trouble-with-darwin.html" title="The Trouble with Darwin" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-TIlO49OJDrA/UbfTXyggmvI/AAAAAAAABvg/HY_s69QoOaI/s72-c/charles-darwin.jpg" height="72" width="72" /><thr:total>12</thr:total></entry><entry gd:etag="W/&quot;AkEEQnY-cSp7ImA9WhFTGE4.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-7960713977638599450</id><published>2013-06-10T00:30:00.000-04:00</published><updated>2013-06-10T00:30:03.859-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-06-10T00:30:03.859-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Clade 3 catalase" /><category scheme="http://www.blogger.com/atom/ns#" term="anaerobic catalase" /><category scheme="http://www.blogger.com/atom/ns#" term="catalase" /><category scheme="http://www.blogger.com/atom/ns#" term="non-heme catalase" /><category scheme="http://www.blogger.com/atom/ns#" term="multifunctional catalase" /><category scheme="http://www.blogger.com/atom/ns#" term="Archaea" /><category scheme="http://www.blogger.com/atom/ns#" term="Halobacteriacea" /><title>A Catalase Conundrum</title><content type="html">When I was in grad school (U.C. Davis) in the late 1970s, the bacterial world was simply the prokaryotic world, and vice versa. There hadn't yet come a distinction between eubacteria and &lt;a href="http://en.wikipedia.org/wiki/Archaea" target="_blank"&gt;Archaea&lt;/a&gt;. But now we know, or think we know, that prokaryota come in two fundamental flavors: the true bacteria (eubacteria), and the Archaea (primitive extremophiles). If you were to want to count organelles (mitochondria, chloroplasts, others) as a third fundamental grouping, I suppose you could, with some justification.&lt;br /&gt;
&lt;br /&gt;
At this writing, about 400 distinct Archaeal isolates, belonging to 
around 75 genera, have been DNA-sequenced. You can see a list of them by going to &lt;a href="http://genomevolution.org/CoGe/OrganismView.pl?org_desc=Archaea"&gt;http://genomevolution.org/CoGe/OrganismView.pl?org_desc=Archaea&lt;/a&gt; and looking in the Organisms box. You'll see over 200 organisms listed, but bear in mind they belong to only about 75 genera. (Most genera are represented by more than one species and/or more than one isolate per species, in other words.)&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-F3hxQRmirvE/UbNwQ4RkhmI/AAAAAAAABvQ/YP3-F2Kqkio/s1600/borax.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-F3hxQRmirvE/UbNwQ4RkhmI/AAAAAAAABvQ/YP3-F2Kqkio/s320/borax.jpg" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Salt-loving Archaea species have been found growing in borax-saturated&lt;br /&gt;
desert ponds. The species growing in this small lake produce a&lt;br /&gt;
carotenoid pigment that gives the water a pink appearance.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
The Archaea were once thought to be exclusively anaerobes, but it turns out there are a couple dozen aerobic (or facultatively anaerobic) genera in the group. In my own spare-time research, I've found that about 20% of the 75 sequenced Archaeons (all of them obligate anaerobes) have a &lt;a href="http://en.wikipedia.org/wiki/Catalase" target="_blank"&gt;catalase&lt;/a&gt; gene. (Catalase is the enzyme that breaks hydrogen peroxide down to water and oxygen.) Oddly, very few of the aerobic Archaea (except for the Halobacteriaceae group) show any evidence of having catalase. This is exactly the reverse of what's expected. In the rest of the living kingdom (from bacteria to higher plants and animals), aerobes universally have catalase; strict anaerobes don't have catalase (or at least, they aren't supposed to; but see &lt;a href="http://asserttrue.blogspot.com/2013/06/strict-anaerobes-that-produce-catalase.html" target="_blank"&gt;this post&lt;/a&gt; for some surprising exceptions).&lt;br /&gt;
&lt;br /&gt;
This is a hugely unexpected finding: &lt;i&gt;Many anaerobic Archaeons have catalase, but not all aerobic ones do.&lt;/i&gt; Some enterprising grad student should tackle this and make a thesis project out of it.&lt;br /&gt;
&lt;br /&gt;
In case you're that student, here are some additional clues.&lt;br /&gt;
&lt;br /&gt;
Let's back up for a second and look at the Big Picture. No matter where on the Tree of Life you go, catalases come in only a few major types. (See the excellent &lt;a href="http://www.loewenlabs.com/peter/wp-content/themes/atahualpa/Manuscripts/CMLS_61_192.pdf" target="_blank"&gt;2003 review paper by Chelikani, Fita, and Loewen&lt;/a&gt; for details.) For example, there are heme-containing and non-heme catalases. Most of the time, what we think of as "catalase" is &lt;i&gt;heme-containing&lt;/i&gt; catalase (and yes, that means it contains iron). In the heme-containing group, you have &lt;i&gt;monofunctional &lt;/i&gt;catalase as well as &lt;i&gt;bifunctional &lt;/i&gt;catalase-peroxidases or hydroperoxidases (katG). The monofunctionals come in big- and small-subunit varieties. (The biggies have subunits of 75 kDa or more and comprise just over 2100 base-pairs of DNA. The smalls have subunits under 60 kDa and typically top out at 1500 base-pairs.)&lt;br /&gt;
&lt;br /&gt;
Here's what you really need to know: Within the monofunctionals, there are three &lt;i&gt;clades &lt;/i&gt;(major subgroupings) of catalase. Clades 1 and 3 are &lt;i&gt;small-subunit&lt;/i&gt; enzymes. Clade 1 is primarily of plant origin and is relatively rare in bacteria (the best-known examples probably being katX of &lt;i&gt;Bacillus subtilis&lt;/i&gt; and catF of &lt;i&gt;Pseudomonas syringae&lt;/i&gt;). Clade 3 takes in a huge number of catalases from bacteria, fungi, and various eukaryotes. (For Clade 3, think &lt;i&gt;Staphylococcus &lt;/i&gt;catalase.)&amp;nbsp; Clade 2 is the &lt;i&gt;large&lt;/i&gt;-subunit enzyme (think &lt;i&gt;E. coli&lt;/i&gt; katE catalase).&lt;br /&gt;
&lt;br /&gt;
The multifunctionals tend to be large (over 2100 base-pairs of DNA). &lt;br /&gt;
&lt;br /&gt;
The non-heme catalases contain manganese instead of iron and are not your typical catalases. Let's leave it at that.&lt;br /&gt;
&lt;br /&gt;
What do the Archaeons produce? From what little probing I've done, it seems the anaerobic Archaeons that have catalase use a modified Clade 3 type of enzyme that has little in common with other Clade 3 catalases. A few of the methane producers show good sequence agreement with&lt;i&gt; Bacteroides fragilis &lt;/i&gt;catalase, but &lt;i&gt;most &lt;/i&gt;anaerobic Archaeal catalases do &lt;i&gt;not &lt;/i&gt;show good sequence concordance with &lt;i&gt;any &lt;/i&gt;known eubacterial catalases. So it's entirely possible that a fourth clade of purely Archaeal small-subunit catalases (unlike anything else in the plant or animal worlds) awaits characterization. &lt;br /&gt;
&lt;br /&gt;
The aerobic Archaeons that have catalase are all halophiles (members of the &lt;i&gt;Halobacteriaceae&lt;/i&gt;), and all have large-subunit multifunctional peroxidases similar to those of the Cyanobacteria.&lt;br /&gt;
&lt;br /&gt;
Mysteries waiting to be solved:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Why is it the aerobes &lt;i&gt;Sulfolobus, Pyrobaculum,&lt;/i&gt; and &lt;i&gt;Aeropyrum &lt;/i&gt;do not appear to have catalase? Is it that they don't have catalase, or do they have some as-yet-undiscovered new type of catalase?&lt;/li&gt;
&lt;li&gt;Why is it that certain methane-generating anaerobes (e.g., &lt;i&gt;Methanosarcina&lt;/i&gt;) have Clade 3 catalases but the rest of the methane-producing Archaea have catalases that don't match anything else in the living world? Did the former group get their catalase(s) by way of horizontal gene transfer from anaerobic eubacteria?&lt;/li&gt;
&lt;li&gt;Did the multifunctional catalases of the &lt;i&gt;Halobacteriaceae &lt;/i&gt;originally come from cyanobacteria (perhaps by way of plasmids)?&lt;/li&gt;
&lt;li&gt;What overlap, if any, exists between Archaeal catalases and the catalases of algal chloroplasts?&lt;/li&gt;
&lt;/ul&gt;
If you find the answers to any of these, let me know!&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/7960713977638599450/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/06/a-catalase-conundrum.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7960713977638599450?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7960713977638599450?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/06/a-catalase-conundrum.html" title="A Catalase Conundrum" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-F3hxQRmirvE/UbNwQ4RkhmI/AAAAAAAABvQ/YP3-F2Kqkio/s72-c/borax.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;D0QNRn8-eSp7ImA9WhFTFks.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-2170243598752833146</id><published>2013-06-08T00:18:00.000-04:00</published><updated>2013-06-08T00:23:17.151-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-06-08T00:23:17.151-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="proteomics" /><category scheme="http://www.blogger.com/atom/ns#" term="bacteria" /><category scheme="http://www.blogger.com/atom/ns#" term="protein blast" /><category scheme="http://www.blogger.com/atom/ns#" term="Proteus mirabilis" /><category scheme="http://www.blogger.com/atom/ns#" term="anaerobes" /><category scheme="http://www.blogger.com/atom/ns#" term="tblastn" /><category scheme="http://www.blogger.com/atom/ns#" term="catalase" /><category scheme="http://www.blogger.com/atom/ns#" term="BLAST query" /><title>Strict Anaerobes that Produce Catalase</title><content type="html">One thing every new bacteriology student learns on Day One is that some microbes are strict anaerobes (completely unable to use oxygen), and a universal characteristic of strict anaerobes is that they lack an important enzyme called &lt;i&gt;catalase&lt;/i&gt; that breaks down hydrogen peroxide to oxygen and water. The idea is that anaerobes don't &lt;i&gt;need &lt;/i&gt;to have catalase, because they don't live in the kind of highly oxidized environments where hydrogen peroxide forms. Lack of catalase is supposedly why many anaerobes are killed upon exposure to air. According to legend, once oxygen gets into the cells, hydrogen peroxide starts to build up, and with no catalase to break it down, anaerobes choke on toxic peroxides.&lt;br /&gt;
&lt;br /&gt;
I'll let you in on a little secret, though. This nice-sounding story (about peroxide buildup killing anaerobes upon exposure to air) &lt;a href="http://asserttrue.blogspot.com/2013/04/hydrogen-peroxide-and-scientific-dogma.html" target="_blank"&gt;turns out to be mostly conjecture&lt;/a&gt;, not well supported by science. Even the bit about anaerobes lacking catalase&lt;i&gt; isn't completely true.&lt;/i&gt; Many anaerobes do make catalase.&lt;br /&gt;
&lt;br /&gt;
For today's post, I did a protein-sequence &lt;a href="http://www.clarkfrancis.com/blast/Blast_what_and_how.html" target="_blank"&gt;BLAST&lt;/a&gt; search against several families of obligate anaerobes using the &lt;i&gt;katA &lt;/i&gt;gene of &lt;i&gt;Proteus mirabilis&lt;/i&gt; as a reference, and I was quickly able to identify two dozen strict anaerobes that do, in fact, have a catalase gene (see table below).&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;b&gt;&lt;u&gt;Table 1: Strict Anaerobes that Produce Catalase&lt;/u&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;u&gt;(tblastn query: Proteus mirabilis katA gene)&lt;/u&gt;&lt;b&gt;&lt;u&gt; &lt;/u&gt;&lt;/b&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div align="center"&gt;
&lt;table border="1" cellpadding="4" cellspacing="0" style="width: 100%px;"&gt;
 &lt;colgroup&gt;&lt;col width="162*"&gt;&lt;/col&gt;
 &lt;col width="29*"&gt;&lt;/col&gt;
 &lt;col width="33*"&gt;&lt;/col&gt;
 &lt;col width="32*"&gt;&lt;/col&gt;
 &lt;/colgroup&gt;&lt;tbody&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Organism&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Length (AA)&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;E-value&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Percent
   identities&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Alkaliphilus metalliredigens
   strain QYMF &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;475&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;4.0E-97&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;40.0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Anaerococcus prevotii strain
   DSM 20548 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;473&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;2.0E-162&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;59.6&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Anaerococcus vaginalis strain
   ATCC 51170 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;482&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;3.0E-171&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;61.4&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Bacteroides coprocola strain
   DSM 17136 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;479&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;68.6&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Bacteroides coprophilus strain
   DSM 18228 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;477&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;68.3&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Bacteroides eggerthii strain
   1_2_48FAA &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;478&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;69.6&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Bacteroides intestinalis strain
   DSM 17393 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;478&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;70.0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Bacteroides ovatus strain
   3_8_47FAA &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;478&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;69.0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Bacteroides plebeius strain DSM
   17135&lt;/span&gt;&lt;/span&gt;&lt;/i&gt;&lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;479&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;68.2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Bacteroides thetaiotaomicron
   strain VPI-5482 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;480&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;68.7&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Clostridium botulinum A3 strain
   Loch Maree &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;341&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;4.0E-67&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;38.1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Clostridium botulinum B1 strain
   Okra &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;463&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;1.0E-67&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;33.9&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Clostridium hathewayi strain
   WAL-18680 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;474&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;7.0E-167&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;58.6&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Clostridium lentocellum strain
   DSM 5427 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;476&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;2.0E-168&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;59.4&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Clostridium phytofermentans
   strain ISDg &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;472&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;3.0E-107&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;43.6&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Desulfitobacterium
   dichloroeliminans strain LMG P-21439 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;477&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;72.3&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Desulfitobacterium hafniense
   DCB-2 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;493&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;1.0E-100&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;39.9&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Desulfosporosinus youngiae
   strain DSM 17734 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;491&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;7.0E-103&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;41.1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Desulfotomaculum ruminis strain
   DSM 2154 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;477&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;2.0E-142&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;52.2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Dethiobacter alkaliphilus
   strain AHT 1 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;468&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;5.0E-102&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;40.3&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Lachnospiraceae bacterium
   strain 3_1_57FAA_CT1 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;470&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;1.0E-165&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;59.5&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Propionibacterium acnes strain
   266 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;444&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;3.0E-114&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;47.2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Syntrophobotulus glycolicus
   strain DSM 8271 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;484&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;2.0E-102&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;40.7&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="63%"&gt;&lt;i&gt;&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;Veillonella sp. strain 3_1_44 &lt;/span&gt;&lt;/span&gt;&lt;/i&gt;
  &lt;/td&gt;
  &lt;td width="11%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;474&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;div align="CENTER"&gt;
&lt;span style="font-family: Arial;"&gt;&lt;span style="font-size: x-small;"&gt;66.0&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;br /&gt;
Each entry in this table represents a protein-sequence (not DNA sequence) match between a gene in the organism listed and the catalase gene of &lt;i&gt;Proteus mirabilis&lt;/i&gt;. (&lt;i&gt;Proteus &lt;/i&gt;is a facultative anaerobe related to &lt;i&gt;E. coli&lt;/i&gt; and &lt;i&gt;Salmonella&lt;/i&gt;.) The length of each organism's catalase enzyme, in amino acids, is shown under Length. (By way of reference, the &lt;i&gt;Proteus &lt;/i&gt;catalase is 484 amino acids long.) E-value is the so-called &lt;i&gt;expectation value&lt;/i&gt;, a measure of how likely the sequence match would be by chance. All of the values shown are extraordinarily low. "Percent identities" is the percentage of amino-acid matches between the &lt;i&gt;Proteus &lt;/i&gt;enzyme and the target organism's enzyme. Values in the 30% to 40% range are not unusual for functionally related enzymes in otherwise distantly related organisms. Values above 60% tend to suggest a phylogenetic relationship, whereas in two organisms that are known to be unrelated, a value above 70% would (in many cases) be considered evidence of possible horizontal gene transfer.&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
Here's the protein-blast query sequence I used, in case you want to verify these results (or go looking for more catalase-producing anaerobes):&lt;br /&gt;
&lt;br /&gt;
&lt;dl&gt;
&lt;dt&gt;&lt;span style="font-family: Lucida Sans Typewriter;"&gt;&lt;span style="font-size: xx-small;"&gt;&amp;gt;Proteus
 mirabilis strain HI4320(v1, unmasked), Name: PMI1740,
 YP_002151471.1, katA, Type: CDS, Feature Location: (Chr: 1,
 1861974..1863428) Genomic Location: 1861974-1863428&lt;/span&gt;&lt;/span&gt;&lt;/dt&gt;
&lt;dt&gt;
 &lt;span style="font-family: Lucida Sans Typewriter;"&gt;&lt;span style="font-size: xx-small;"&gt;MEKKKLTTAAGAPVVDNNNVITAGPRGPMLLQDVWFLEKLAHFDREVIPERRMHAKGSGAFGTFTVTHDITKYTRAKIFSEVGKKTEMFARFSTVAGER&lt;/span&gt;&lt;/span&gt;&lt;/dt&gt;
&lt;dt&gt;
 &lt;span style="font-family: Lucida Sans Typewriter;"&gt;&lt;span style="font-size: xx-small;"&gt;GAADAERDIRGFALKFYTEEGNWDMVGNNTPVFYLRDPLKFPDLNHIVKRDPRTNMRNMAYKWDFFSHLPESLHQLTIDMSDRGLPLSYRFVHGFGSHT&lt;/span&gt;&lt;/span&gt;&lt;/dt&gt;
&lt;dt&gt;
 &lt;span style="font-family: Lucida Sans Typewriter;"&gt;&lt;span style="font-size: xx-small;"&gt;YSFINKDNERFWVKFHFRCQQGIKNLMDDEAEALVGKDRESSQRDLFEAIERGDYPRWKLQIQIMPEKEASTVPYNPFDLTKVWPHADYPLMDVGYFEL&lt;/span&gt;&lt;/span&gt;&lt;/dt&gt;
&lt;dt&gt;
 &lt;span style="font-family: Lucida Sans Typewriter;"&gt;&lt;span style="font-size: xx-small;"&gt;NRNPDNYFSDVEQAAFSPANIVPGISFSPDKMLQGRLFSYGDAHRYRLGVNHHQIPVNAPKCPFHNYHRDGAMRVDGNSGNGITYEPNSGGVFQEQPDF&lt;/span&gt;&lt;/span&gt;&lt;/dt&gt;
&lt;dt&gt;
 &lt;span style="font-family: Lucida Sans Typewriter;"&gt;&lt;span style="font-size: xx-small;"&gt;KEPPLSIEGAADHWNHREDEDYFSQPRALYELLSDDEHQRMFARIAGELSQASKETQQRQIDLFTKVHPEYGAGVEKAIKVLEGKDAK&lt;/span&gt;&lt;/span&gt;&lt;/dt&gt;
&lt;/dl&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/2170243598752833146/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/06/strict-anaerobes-that-produce-catalase.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2170243598752833146?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2170243598752833146?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/06/strict-anaerobes-that-produce-catalase.html" title="Strict Anaerobes that Produce Catalase" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><thr:total>1</thr:total></entry><entry gd:etag="W/&quot;C0cEQH4-cSp7ImA9WhFTEEs.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-8872564714009652150</id><published>2013-06-01T00:30:00.000-04:00</published><updated>2013-06-01T00:30:01.059-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-06-01T00:30:01.059-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="proteomics" /><category scheme="http://www.blogger.com/atom/ns#" term="Szybalski's rule" /><category scheme="http://www.blogger.com/atom/ns#" term="Szybalski" /><category scheme="http://www.blogger.com/atom/ns#" term="genome" /><category scheme="http://www.blogger.com/atom/ns#" term="Chargaff" /><category scheme="http://www.blogger.com/atom/ns#" term="G+C content" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="purine loading" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><category scheme="http://www.blogger.com/atom/ns#" term="mitochondria" /><title>A New Biological Constant?</title><content type="html">Earlier, I gave &lt;a href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is-broadly.html" target="_blank"&gt;evidence&lt;/a&gt; for a surprising relationship between the amount of G+C (guanine plus cytosine) in DNA and the amount of "purine loading" on the message strand in coding regions. The fact that message strands are often purine-rich is not new, of course; it's called Szybalski's Rule. What's new and unexpected is that the amount of G+C in the genome lets you &lt;i&gt;predict &lt;/i&gt;the amount of purine loading. Also, Szybalski's rule is not always right.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-qZqvIBPqEag/UakSbGwbYZI/AAAAAAAABtQ/7UJ2We6MXRM/s1600/BACTERIA+260.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="293" src="http://1.bp.blogspot.com/-qZqvIBPqEag/UakSbGwbYZI/AAAAAAAABtQ/7UJ2We6MXRM/s400/BACTERIA+260.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Genome A+T content versus message-strand purine content (A+G) for 260 bacterial genera. Chargaff's second parity rule predicts a horizontal line at Y = 0.50. (Szybalski's rule says that all points should lie at or above 0.50.) Surprisingly, as A+T approaches 1.0, A/T approaches the Golden Ratio.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
When you look at coding regions from many different bacterial species, you find that if a species has DNA with a G+C content below about 68%, it tends to have more purines than pyrimidines on the message strand (thus purine-rich mRNA). On the other hand, if an organism has extremely GC-rich DNA (G+C &amp;gt; 68%), a gene's message strand tends to have &lt;i&gt;more pyrimidines than purines&lt;/i&gt;. What it means is that Szybalski's Rule is correct &lt;i&gt;only for organisms with genome G+C content less than 68%&lt;/i&gt;. And Chargaff's second parity rule (which says that A=T an G=C even within a &lt;i&gt;single strand&lt;/i&gt; of DNA) is flat-out wrong all the time, except at the 68% G+C point, where Chargaff is right now and then by chance.&lt;br /&gt;
&lt;br /&gt;
Since the last time I wrote on this subject, I've had the chance to look at more than 1,000 additional genomes. What I've found is that the relationship between purine loading and G+C content applies not only to bacteria (and archaea) and eukaryotes, but to mitochondrial DNA, chloroplast DNA, and virus genomes (plant, animal, phage), as well.&lt;br /&gt;
&lt;br /&gt;
The accompanying graphs tell the story, but I should explain a change in the way these graphs are prepared versus the graphs in my earlier posts. &lt;a href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is-broadly.html" target="_blank"&gt;Earlier&lt;/a&gt;, I plotted G+C along the X-axis and purine/pyrmidine ratio on the Y-axis. I now plot A+T on the X-axis instead of G+C, in order to convert an inverse relationship to a &lt;i&gt;direct &lt;/i&gt;relationship. Also, I now plot A+G (purines, as a mole fraction) on the Y-axis. Thus, X- and Y-axes are now &lt;i&gt;both &lt;/i&gt;expressed in mole fractions, hence both are normalized to the unit interval (i.e., all values range from 0..1).&lt;br /&gt;
&lt;br /&gt;
The graph above shows the relationship between genome A+T content and purine content of message strands in genomes for 260 bacterial genera. The straight line is regression-fitted to minimize the sum of squared absolute error. (Software by &lt;a href="http://zunzun.com/"&gt;http://zunzun.com&lt;/a&gt;.) The line conforms to:&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;y = a + bx&lt;/pre&gt;
&lt;pre&gt;&amp;nbsp;&lt;/pre&gt;
&lt;pre&gt;where:&lt;/pre&gt;
&lt;pre&gt;a =  0.45544384965539358
b =  0.14454244707261443&lt;/pre&gt;
&lt;br /&gt;
&lt;br /&gt;
The line predicts that if a genome were to consist entirely of G+C (guanine and cytosine), it would be 45.54% guanine, whereas if (in some mythical creature) the genome were to consist entirely of A+T (adenine and thymine), adenine would comprise 59.99% of the DNA. Interestingly, the 95% confidence interval permits a value of 0.61803 at X = 1.0, which would mean that as guanine and cytosine diminish to zero, A/T approaches the &lt;a href="http://www.mathsisfun.com/numbers/golden-ratio.html" target="_blank"&gt;Golden Ratio&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Do the most primitive bacteria (Archaea) also obey this relationship? Yes, they do. In preparing the graph below, I analyzed codon usage in 122 Archaeal genera to obtain A, G, T,&amp;nbsp; and C relative proportions in coding regions of genes. As you can see, the same basic relationship exists between purine content and A+T in Archaea as in Eubacteria. Regression analysis yielded a line with a slope of 0.16911 and a vertical offset 0.45865. So again, it's possible (or maybe it's just a &lt;i&gt;very &lt;/i&gt;strange coincidence) that A/T approaches the Golden Ratio as A+T approaches unity.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-2sc9t6QeaWs/UakjNFOZBWI/AAAAAAAABts/lBrbzRV8u4I/s1600/ARCHAEA+(125).png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="300" src="http://3.bp.blogspot.com/-2sc9t6QeaWs/UakjNFOZBWI/AAAAAAAABts/lBrbzRV8u4I/s400/ARCHAEA+(125).png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Analysis of coding regions in 122 Archaea reveals that the same relationship exists between A+T content and purine mole-fraction (A+G) as exists in eubacteria.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
For the graph below, I analyzed 114 eukaryotic genomes (everything from fungi and protists to insects, fish, worms, flowering and non-flowering plants, mosses, algae, and sundry warm- and cold-blooded animals). The slope of the generated regression line is 0.11567 and the vertical offset is 0.46116.&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-_CahWJUaJn4/UakihM18D1I/AAAAAAAABtc/pDbpkh-v7qg/s1600/EUKARYOTES+114.png" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="300" src="http://1.bp.blogspot.com/-_CahWJUaJn4/UakihM18D1I/AAAAAAAABtc/pDbpkh-v7qg/s400/EUKARYOTES+114.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Eukaryotic organisms (N=114).&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
Mitochondria and chloroplasts (see the two graphs below) show a good bit more scatter in the data, but regression analysis still comes back with positive slopes (0.06702 and .13188, respectively) for the line of least squared absolute error.&lt;br /&gt;
&lt;br /&gt;
&lt;table&gt;
&lt;tbody&gt;
&lt;tr&gt;&lt;td&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-wbOkSK51kzo/UakxSt4WPnI/AAAAAAAABuo/FzaerPvyR4Y/s1600/MITOCHONDRIA+(203).png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-wbOkSK51kzo/UakxSt4WPnI/AAAAAAAABuo/FzaerPvyR4Y/s320/MITOCHONDRIA+(203).png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Mitochondrial DNA (N=203).&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/td&gt;
&lt;td&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-xZHYw1S2lgE/Uakxacr80oI/AAAAAAAABuw/WxyopBesS8s/s1600/CHLOROPLASTS+(227).png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-xZHYw1S2lgE/Uakxacr80oI/AAAAAAAABuw/WxyopBesS8s/s320/CHLOROPLASTS+(227).png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Chloroplast DNA (N=227).&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
To see if this same fundamental relationship might hold even for &lt;i&gt;viral &lt;/i&gt;genetic material, I looked at codon usage in 229 varieties of bacteriophage and 536 plant and animal viruses ranging in size from 3Kb to over 200 kilobases. Interestingly enough, the relationship between A+T and message-strand purine loading does indeed apply to viruses, despite the absence of dedicated protein-making machinery in a virion.&lt;br /&gt;
&lt;br /&gt;
&lt;table&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-KjoQQ6X53vE/UakxkRLmWhI/AAAAAAAABu4/EecHEbBVGOA/s1600/VIRUSES+536.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-KjoQQ6X53vE/UakxkRLmWhI/AAAAAAAABu4/EecHEbBVGOA/s320/VIRUSES+536.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Plant and animal viruses (N=536).&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/td&gt;
&lt;td&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-l0oHQ_hT2rU/UakxrcD72wI/AAAAAAAABvA/xd_dbZ-A1Nc/s1600/PHAGE+229.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-l0oHQ_hT2rU/UakxrcD72wI/AAAAAAAABvA/xd_dbZ-A1Nc/s320/PHAGE+229.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Bacteriophage (N=229).&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
For the 536 plant and animal viruses (above left), the regression line has a slope of 0.23707 and meets the Y-axis at 0.62337 when X = 1.0. For bacteriophage (above right), the line's slope is 0.13733 and the vertical offset is 0.46395. (When inspecting the graphs, take note that the vertical-axis scaling is not the same for each graph. Hence the slopes are deceptive.) The Y-intercept at X = 1.0 is 0.60128. So again, it's possible A/T approaches the golden ratio as A+T approaches 100%.&lt;br /&gt;
&lt;br /&gt;
The fact that viral nucleic acids follow the same purine trajectories as their hosts perhaps shouldn't come as a surprise, because viral genetic material is (in general) highly adapted to host machinery. Purine loading appropriate to the A+T milieu is just another adaptation.&lt;br /&gt;
&lt;br /&gt;
It's striking that so many genomes, from so many diverse organisms (eubacteria, archaea, eukaryotes, viruses, bacteriophages, plus organelles), follow the same basic law of approximately&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;span style="font-size: large;"&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;A+G = 0.46 + 0.14 * (A+T)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;br /&gt;
The above law is as universal a law of biology as I've ever seen. The only question is what to call the slope term. It's clearly a biological constant of considerable significance. Its physical interpretation is clear: It's the rate at which purines are accumulated in mRNA as genome A+T content increases. It says that a 1% increase in A+T content (or a 1% &lt;i&gt;decrease &lt;/i&gt;in genome&amp;nbsp; G+C content) is worth a 0.14% increase in purine content in message strands. Maybe it should be called the purine rise rate? The purine amelioration rate?&lt;br /&gt;
&lt;br /&gt;
Biologists, please feel free to get in touch to discuss. I'm interested in hearing your ideas. Reach out to me on LinkedIn, or simply leave a comment below.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/8872564714009652150/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/06/a-new-biological-constant.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/8872564714009652150?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/8872564714009652150?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/06/a-new-biological-constant.html" title="A New Biological Constant?" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-qZqvIBPqEag/UakSbGwbYZI/AAAAAAAABtQ/7UJ2We6MXRM/s72-c/BACTERIA+260.png" height="72" width="72" /><thr:total>3</thr:total></entry><entry gd:etag="W/&quot;DkEEQHc_cSp7ImA9WhBaGUo.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-3553011181881743649</id><published>2013-05-31T00:30:00.000-04:00</published><updated>2013-05-31T00:30:01.949-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-31T00:30:01.949-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="bookmarklet" /><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics" /><category scheme="http://www.blogger.com/atom/ns#" term="omics" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><category scheme="http://www.blogger.com/atom/ns#" term="scriptlet" /><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics bookmarklet" /><title>A Bioinformatics Bookmarklet</title><content type="html">Sometimes you want to scrape some screen data and analyze it on the spot without copying it to another program. It turns out there's an easy way to do just that. Just highlight the information (by click-dragging the mouse to Select a section of screen data), then run a piece of JavaScript against the selection.&lt;br /&gt;
&lt;br /&gt;
Example: I do a lot of peeking and poking at DNA sequences on the web. Often, I'm interested in knowing various summary statistics for the DNA I'm looking at. For example, I might see a long sequence that looks like &lt;b&gt;&lt;span style="color: #274e13;"&gt;AGTTAGAAAACCTCAGCTACTAG&lt;/span&gt; &lt;/b&gt;(etc.) and wonder what the G+C content of that stream is. So I'll select the text by click-dragging across it. Then I'll obtain the text in JavaScript by calling &lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;getSelection().toString()&lt;/span&gt;. Then I parse the text and display the results in an alert dialog.&lt;br /&gt;
&lt;br /&gt;
Suppose I've selected a run of DNA on-screen and I want to know the base content (the amounts of G, C, T, and A).&lt;br /&gt;
&lt;span style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;text = getSelection().toString(); // get the data as a string
text = text.toUpperCase(); // optionally convert it to upper case

bases = new Object;&amp;nbsp; // create a place to store the base counts
bases.G = bases.C = bases.T = bases.A = 0; // initialize

// now loop over the string contents:
for (var i = 0; i &amp;lt; text.length; i++)
   bases[ text[i] ]++;  // bump the count for that base&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;// format the data for viewing&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;msg = "G: " + bases.G/text.length + "\n";&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;msg += "C: " + bases.C/text.length + "\n";&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;msg += "A: " + bases.A/text.length + "\n";&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;msg += "T: " + bases.T/text.length + "\n";&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;msg += "GC Content: " + (bases.G + bases.C)/text.length; &lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;// view it:&lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&lt;span style="color: #274e13; font-size: small;"&gt;alert( msg ); &lt;/span&gt;&lt;/pre&gt;
&lt;pre&gt;&amp;nbsp;&lt;/pre&gt;
If I run this script against a web page where I've highlighted some DNA text, I get:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-Ojou5Fcu5tQ/UadiOrH4MMI/AAAAAAAABtA/ENqACvh8nfU/s1600/demo.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img alt="bookmarklet showing results" border="0" src="http://2.bp.blogspot.com/-Ojou5Fcu5tQ/UadiOrH4MMI/AAAAAAAABtA/ENqACvh8nfU/s320/demo.jpg" title="bookmarklet showing results" /&gt;&amp;nbsp;&lt;/a&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;br /&gt;
The nice part is, you can put the above code in a &lt;a href="http://en.wikipedia.org/wiki/Bookmarklet" target="_blank"&gt;bookmarklet&lt;/a&gt;, associate the bookmarklet with a button, and keep it in your bookmark bar so that whenever you want to run the code, you can just point and click. To do the packaging, reformat the above code (or your modified version of it) as a single line of code preceded by "javascript:" (don't forget the colon), then set that code as the URL of a bookmark. Instead of going to a regular URL, the browser will see "javascript:" as the URL scheme and execute the code directly.&lt;br /&gt;
&lt;br /&gt;
Bookmarklets of this sort have proven to be a major productivity boon for me in various situations as I cruise the web. When I see data I want to analyze, I don't have to copy and paste it to Excel (or whatever). With a bookmarklet, I can analyze it instantly, &lt;i&gt;sur la vitre&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/3553011181881743649/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/a-bioinformatics-bookmarklet.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/3553011181881743649?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/3553011181881743649?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/a-bioinformatics-bookmarklet.html" title="A Bioinformatics Bookmarklet" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-Ojou5Fcu5tQ/UadiOrH4MMI/AAAAAAAABtA/ENqACvh8nfU/s72-c/demo.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DEEBQnk9fip7ImA9WhBaGE8.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-8818874643144404303</id><published>2013-05-29T07:15:00.002-04:00</published><updated>2013-05-29T07:24:13.766-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-29T07:24:13.766-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Szybalski" /><category scheme="http://www.blogger.com/atom/ns#" term="Chargaff" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="Chargaff's second rule" /><category scheme="http://www.blogger.com/atom/ns#" term="Clostridium botulinum" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><category scheme="http://www.blogger.com/atom/ns#" term="parity rule" /><title>A Very Simple Test of Chargaff's Second Rule</title><content type="html">We know that for double-stranded DNA, the number of purines (A, G) will always equal the number of pyrimidines (T, C), because complementarity depends on A:T and G:C pairings. But do purines have to equal pyrimidines in single-stranded DNA? &lt;a href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is-broadly.html" target="_blank"&gt;Chargaff's second parity rule says yes&lt;/a&gt;. Simple observation says no.&lt;br /&gt;
&lt;br /&gt;
Suppose you have a couple thousand single-stranded DNA samples. All you have to do to see if Chargaff's second rule is correct is create a graph of A versus T, where each point represents the A and T (adenine and thymine) amounts in a particular DNA sample. If A = T (as predicted by Chargaff), the graph should look like a straight line with a slope of 1:1.&lt;br /&gt;
&lt;br /&gt;
For fun, I grabbed the sequenced DNA genome of &lt;i&gt;Clostridium botulinum&lt;/i&gt; &lt;i&gt;A&lt;/i&gt; strain ATCC 19397 (available from the FASTA link on &lt;a href="http://www.ncbi.nlm.nih.gov/nuccore/NC_009697" target="_blank"&gt;this page&lt;/a&gt;; be ready for a several-megabyte text dump), which contains coding sequences for 3552 genes of average length 442 bases each, and for each gene, I plotted the A content versus the T content.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-8Si6gp25Vx0/UaXapoF6X-I/AAAAAAAABsg/9ETkXZn6Y4s/s1600/A+vs+T-2.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="300" src="http://4.bp.blogspot.com/-8Si6gp25Vx0/UaXapoF6X-I/AAAAAAAABsg/9ETkXZn6Y4s/s400/A+vs+T-2.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;A plot of thymine (T) versus adenine (A) content for all 3552 genes in &lt;i&gt;C. botulinum&lt;/i&gt; coding regions. The greyed area represents areas where T/A &amp;gt; 1. Most genes fall in the white area where A/T &amp;gt; 1. &lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
As you can see, the resulting cloud of points not only doesn't form a straight line of slope 1:1, it doesn't even cluster on the 45-degree line at all. The center of the cluster is well below the 45-degree line, and (this is the amazing part) the major axis of the cluster is almost at 90 degrees to the 45-degree line, indicating that the quantity A+T tends to be conserved.&lt;br /&gt;
&lt;br /&gt;
A similar plot of G versus C (below) shows a somewhat different scatter pattern, but again notice that the centroid of the cluster is well off the 45-degree centerline. This means Chargaff's second rule doesn't hold (except for the few genes that randomly fell on the centerline).&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-gmXcR38WSpY/UaXiv1J20XI/AAAAAAAABsw/V0NAQVlp9rk/s1600/G+vs+C+2.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="300" src="http://1.bp.blogspot.com/-gmXcR38WSpY/UaXiv1J20XI/AAAAAAAABsw/V0NAQVlp9rk/s400/G+vs+C+2.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;A plot of cytosine (C) versus guanine (G) for all genes in all coding regions of &lt;i&gt;C. botulinum&lt;/i&gt;. Again, notice that the points cluster well away from the 45-degree line (where they would have been expected to cluster, according to Chargaff).&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
The numbers of bases of each type in the botulinum genome are:&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;/span&gt;

&lt;pre&gt;G: 577108
C: 358170
T: 977095
A: 1274032
&lt;/pre&gt;
&lt;br /&gt;
Amazingly, there are 296,937 more adenines than thymines in the genome (here, I'm somewhat sloppily equating "genome" with combined coding regions). Likewise, excess guanines number 218,938. On average, each gene contains 73 excess purines (42 adenine and 31 guanine).&lt;br /&gt;
&lt;br /&gt;
The above graphs are in no way unique to &lt;i&gt;C. botulinum&lt;/i&gt;. If you do similar plots for other organisms, you'll see similar results, with excess purines being most numerous in organisms that have low G+C content. As explained in my &lt;a href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is-broadly.html" target="_blank"&gt;earlier posts on this subject&lt;/a&gt;, the purine/pyrimidine ratio (for coding regions) tends to be high in low-GC organisms and low in high-GC organisms, a relationship that holds across all bacterial and eukaryotic domains.&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/8818874643144404303/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/a-very-simple-test-of-chargaffs-second.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/8818874643144404303?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/8818874643144404303?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/a-very-simple-test-of-chargaffs-second.html" title="A Very Simple Test of Chargaff's Second Rule" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-8Si6gp25Vx0/UaXapoF6X-I/AAAAAAAABsg/9ETkXZn6Y4s/s72-c/A+vs+T-2.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CkMBR3szeCp7ImA9WhBaGE8.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-3037951910145319393</id><published>2013-05-28T09:17:00.000-04:00</published><updated>2013-05-29T05:40:56.580-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-29T05:40:56.580-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Szybalski's rule" /><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics" /><category scheme="http://www.blogger.com/atom/ns#" term="Chargaff" /><category scheme="http://www.blogger.com/atom/ns#" term="G+C content" /><category scheme="http://www.blogger.com/atom/ns#" term="purine-pyrimidine ratio" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="purine loading" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><title>Chargaff's Second Parity Rule is Broadly Violated</title><content type="html">Erwin Chargaff, working with sea-urchin sperm in the 1950s, observed that within double-stranded DNA, the amount of adenine equals the amount of thymine (A = T) and guanine equals cytosine (G = C), which we now know is the basis of "complementarity" in DNA. But Chargaff later went on to observe the same thing in studies of &lt;i&gt;single-stranded&lt;/i&gt; DNA, causing him to postulate that A = T and G = C more generally (within as well as across strands of DNA). The more general postulation is known as Chargaff's second parity rule. It says that A = T and G = C within a &lt;i&gt;single strand &lt;/i&gt;of DNA.&lt;br /&gt;
&lt;br /&gt;
The second parity rule seemed to make sense, because there was and is no &lt;i&gt;a priori&lt;/i&gt; reason to think that DNA or RNA, whether single-stranded or double-stranded, should contain more purines than pyrimidines (nor vice versa). All other factors being equal, nature should not "favor" one class of nucleotide over another. Therefore, across evolutionary times frames, one would expect purine and pyrimidine prevalences in nucleic acids to equalize.&lt;br /&gt;
&lt;br /&gt;
What we instead find, if we look at real-world DNA and RNA, is that individual strands seldom contain equal amounts of purines and pyrimidines. &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/4966069" target="_blank"&gt;Szybalski&lt;/a&gt; was the first to note that viruses (which usually contain single-stranded nucleic acids) often contain more purines than pyrimidines. &lt;a href="http://www.ncbi.nlm.nih.gov/pmc/articles/PMC310832/" target="_blank"&gt;Others&lt;/a&gt; have since verified what Szybalski found, namely that in many organisms, DNA is purine-heavy on the "sense" strand of coding regions, such that messenger RNA ends up richer in purines than pyrimidines. This is called Szybalski's rule.&lt;br /&gt;
&lt;br /&gt;
In a &lt;a href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is.html" target="_blank"&gt;previous post&lt;/a&gt;, I presented evidence (from analysis of the sequenced genomes of 93 bacterial genera) that Szybalski's rule not only is more often true than Chargaff's second parity rule, but in fact purine-loading of coding region "message" strands occurs in direct proportion to the amount of A+T (or in inverse propoertion to the amount of G+C) in the genome. At G+C contents below about 68%, DNA becomes heavier and heavier with purines on the message strand. At G+C contents above 68%, we find organisms in which the message strand is actually &lt;i&gt;pyrimidine&lt;/i&gt;-heavy instead of purine-heavy. &lt;br /&gt;
&lt;br /&gt;
I now present evidence that purine loading of message strands in proportion to A+T content is a universal phenomenon, applying to a wide variety of eukaryotic ("higher") life forms as well as bacteria.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-KNI6-wH13vA/UaSU32JfGPI/AAAAAAAABsQ/vZX8IGN5Ows/s1600/LAYERED.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="323" src="http://1.bp.blogspot.com/-KNI6-wH13vA/UaSU32JfGPI/AAAAAAAABsQ/vZX8IGN5Ows/s400/LAYERED.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;According to Chargaff's second parity rule, all points on this graph should fall on a horizontal line at y = 1. Instead, we see that Chargaff's rule is violated for all but a statistically insignificant subset of organisms. Pink/orange points represent eukaryotic species. Dark green data points represent bacterial genera. See text for discussion. Permission to reproduce this graph (with attribution) is granted.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
To create the accompanying graph, I did frequency analysis of codons for 58 eukaryotic life forms (pink data points) and 93 prokaryotes (dark green data points) in order to derive prevalences of the four bases (A, G, C, T) in coding regions of DNA. Eukaryotes that were studied included yeast, molds, protists, warm and cold-blooded animals, flowering and non-flowering plants, alga, and insects and crustaceans. The complete list of organisms is shown in a table further below.&lt;br /&gt;
&lt;br /&gt;
It can now be stated definitively that Chargaff's second parity rule is, in general, violated across all major forms of life. Not only that, it is violated in a &lt;i&gt;regular &lt;/i&gt;fashion, such that purine loading of mRNA increases with genome A+T content. Significantly, some organisms with very low A+T content (high G+C content) actually have &lt;i&gt;pyrimidine&lt;/i&gt;-loaded mRNA, but they are in a small minority.&lt;br /&gt;
&lt;br /&gt;
Purine loading is both common and extreme. For about 20% of organisms, the purine-pyrimidine ratio is above 1.2. For some organisms, the purine excess is more than 40%, which is striking indeed.&lt;br /&gt;
&lt;br /&gt;
Why should purines migrate to one strand of DNA while pyrimidines line up on the other strand? One possibility is that it minimizes spontaneous self-annealing of separated strands into secondary structures. Unrestrained "kissing" of intrastrand regions during transcription might lead to deleterious excisions, inversions, or other events. Poly-purine runs would allow the formation of many loops but few stems; in general, secondary structures would be rare.&lt;br /&gt;
&lt;br /&gt;
The significance of purine loading remains to be elucidated. But in the meantime, there can be no doubt that purine enrichment of message strands is indeed widespread and strongly correlates to genome A+T content. Chargaff's second parity rule is invalid, except in a trivial minority of cases.&lt;br /&gt;
&lt;br /&gt;
The prokaryotic organisms used in this study were &lt;a href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is.html" target="_blank"&gt;presented&lt;/a&gt; in a table previously. The eukaryotic organisms are shown in the following table:&lt;br /&gt;
&lt;br /&gt;
&lt;table border="1" cellpadding="4" cellspacing="0" style="width: 100%px;"&gt;
 &lt;colgroup&gt;&lt;col width="85*"&gt;&lt;/col&gt;
 &lt;col width="39*"&gt;&lt;/col&gt;
 &lt;col width="27*"&gt;&lt;/col&gt;
 &lt;col width="105*"&gt;&lt;/col&gt;
 &lt;/colgroup&gt;&lt;tbody&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Organism&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Comment&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;G+C%&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Purine ratio&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Chlorella
   variabilis strain NC64A&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;endosymbiont of Paramecium&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;68.76&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1055181128896376&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Chlamydomonas
   reinhardtii strain CC-503 cw92 mt+&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;unicellular alga&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;67.96&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0818749999999997&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Micromonas
   pusilla strain CCMP1545&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;unicellular alga&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;67.41&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1873268193087356&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Ectocarpus
   siliculosus strain Ec 32&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;alga&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;62.74&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.2090728330510347&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Sporisorium
   reilianum SRZ2&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;smut fungus&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;62.5&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;0.9776547360094916&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Leishmania
   major strain Friedlin&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;protozoan&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;62.47&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0325&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Oryza
   sativa Japonica Group&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;rice&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;54.77&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0668412348401317&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Takifugu
   rubripes (torafugu)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;fish&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;54.08&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0655094027691674&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Aspergillus
   fumigatus strain A1163&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;fungus&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;53.89&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.013091641490433&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Sus
   scrofa (pig)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;pig&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;53.77&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0680595779892428&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Drosophila
   melanogaster (fruit fly)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-size: x-small;"&gt;&lt;br /&gt;&lt;/span&gt;
  &lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;53.69&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0986989367655287&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Brachypodium
   distachyon line Bd21&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;grass&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;53.32&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0764746703677999&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Selaginella
   moellendorffii (Spikemoss)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;moss&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;52.83&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1014492753623195&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Equus
   caballus (horse)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;horse&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;52.29&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0844453711426192&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Pongo
   abelii (Sumatran orangutan)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;orangutan&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;52&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0929015146227405&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Homo sapiens&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;human&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;51.97&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0939049081896255&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Mus
   musculus (house mouse) strain mixed&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;mouse&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;51.91&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0827720297201582&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Tuber
   melanosporum (Perigord truffle) strain Mel28&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;truffle&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;51.4&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0836820083682006&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Phaeodactylum
   tricornutum strain CCAP 1055/1&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;diatom&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;51.06&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0418452745458253&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Arthroderma
   benhamiae strain CBS 112371&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;fungus&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.99&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0360268674944024&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Ornithorhynchus
   anatinus (platypus)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;platypus&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.97&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1121909993661525&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Taeniopygia
   guttata (Zebra finch)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;bird&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.81&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1344717182497328&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Trypanosoma
   brucei TREU927&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;sleeping sickness protozoan&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.78&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.106974784013486&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Danio
   rerio (zebrafish) strain Tuebingen&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;fish&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;49.68&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1195053003533566&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Gallus
   gallus&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;chicken&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;49.54&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1265418970650787&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Monodelphis
   domestica (gray short-tailed opossum)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;opossum&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;49.07&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0768110918544194&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Sorghum
   bicolor (sorghum)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;sorghum&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;48.93&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.046422719825232&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Thalassiosira
   pseudonana strain CCMP1335&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;diatom&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;47.91&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1403183213189638&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Hyaloperonospora
   arabidopsis&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;mildew&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;47.75&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.053039546400631&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Daphnia
   pulex (common water flea)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;water flea&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;47.57&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.058036633052068&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Physcomitrella
   patens subsp. patens&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;moss&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;47.33&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1727134477514667&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Anolis
   carolinensis (green anole)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;lizard&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;46.72&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.113765477057538&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Brassica
   rapa&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;flowering plant&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;46.29&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1056659411640803&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Fragaria
   vesca (woodland strawberry)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;strawberry&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;46.02&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1052853232259425&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Amborella
   trichopoda&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;flowering shrub&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;45.88&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0992441209406494&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Citrullus
   lanatus var. lanatus (watermelon)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;watermelon&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;44.5&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0855134984692458&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Capsella
   rubella&lt;/span&gt;&lt;/span&gt; &lt;/span&gt;
  &lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;mustard-family plant&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;44.37&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1041257367387034&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Arabidopsis
   thaliana (thale cress)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;cress&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;44.15&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.109853013573388&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Lotus
   Japonicus&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;lotus&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;44.11&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0773228019122847&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Populus
   trichocarpa (Populus balsamifera subsp. trichocarpa)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;tree&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;43.7&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1097672456226706&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Cucumis
   sativus (cucumber)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;cucumber&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;43.56&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0823847862298719&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Caenorhabditis
   elegans strain Bristol N2&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;worm&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;42.96&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.106320224719101&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Vitis
   vinifera (grape)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;grape&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;42.75&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0859833393697935&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Ciona
   intestinalis&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;tunicate&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;42.68&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.158652461848546&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Solanum
   lycopersicum (tomato)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;tomato&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;41.7&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1177&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Theobroma
   cacao (chocolate)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;chocolate&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;41.31&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1297481860862142&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Medicago
   truncatula (barrel medic) strain A17&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;flowering plant&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;40.78&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.093754366354618&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Apis
   mellifera (honey bee) strain DH4&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;honey bee&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;39.76&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.216042543762464&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Saccharomyces
   cerevisiae (bakers yeast) strain S288C&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;yeast&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;39.63&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1387641650630744&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Acyrthosiphon
   pisum (pea aphid) strain LSR1&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;aphid&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;39.35&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1651853457619772&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Debaryomyces
   hansenii strain CBS767&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;yeast&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;37.32&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1477345930856775&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Pediculus
   humanus corporis (human body louse) strain USDA&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;louse&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;36.57&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.2365791828213537&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Schistosoma
   mansoni strain Puerto Rico&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;trematode&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;35.94&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.0586902800658977&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Candida
   albicans strain WO-1&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;yeast&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;35.03&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1490291609944834&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Tetrapisispora
   phaffii CBS 4417 strain type CBS 4417&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;yeast&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;34.69&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.17503805175038&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Paramecium
   tetraurelia strain d4-2&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;protist&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;30.03&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.2494922903347117&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;nucleomorph
   Guillardia theta&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;endosymbiont&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;23.87&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.1529462427330803&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="33%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Plasmodium
   falciparum 3D7&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="15%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;malaria parasite&lt;/span&gt;&lt;/td&gt;
  &lt;td width="10%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;23.76&lt;/span&gt;&lt;/td&gt;
  &lt;td width="41%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: #1c00cf;"&gt;&lt;span style="font-family: Consolas, Lucida Console, monospace;"&gt;1.4471365638766511&lt;/span&gt;&lt;/span&gt;
   &lt;/span&gt;
  &lt;/td&gt;
 &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/3037951910145319393/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is-broadly.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/3037951910145319393?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/3037951910145319393?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is-broadly.html" title="Chargaff's Second Parity Rule is Broadly Violated" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-KNI6-wH13vA/UaSU32JfGPI/AAAAAAAABsQ/vZX8IGN5Ows/s72-c/LAYERED.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CEEEQn0yfip7ImA9WhBaFUk.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-2201269747607174240</id><published>2013-05-26T00:30:00.000-04:00</published><updated>2013-05-26T00:30:03.396-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-26T00:30:03.396-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Szybalski's rule" /><category scheme="http://www.blogger.com/atom/ns#" term="G+C content" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="wobble codon" /><category scheme="http://www.blogger.com/atom/ns#" term="codon" /><category scheme="http://www.blogger.com/atom/ns#" term="Chargaff's second rule" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><title>Chargaff's Second Parity Rule is Violated in Proportion to Genome A+T Content</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Erwin_Chargaff" target="_blank"&gt;Erwin Chargaff&lt;/a&gt; was the first to notice, in the early 1950s, before Watson and Crick deduced the structure of DNA, that the quantity of purines in DNA equals the quantity of pyrimidines (specifically, the amount of adenine equals the amount of thymine; and the amount of guanine equals the amount of cytosine). This observation was key to establishing the structure of DNA, and it is often cited as Chargaff's first parity rule. But Chargaff also made another observation (the second parity rule), namely that even within a &lt;i&gt;single strand&lt;/i&gt; of DNA, the amount of adenine tends to equal the amount of thymine and the amount of guanine tends to equal the amount of cytosine. &lt;br /&gt;
&lt;br /&gt;
It's easy to understand why the first parity rule holds true, because complementarity of DNA strands depends on A pairing with T and G pairing with C; these pairings give rise to the "rungs" of the DNA ladder and ensure that copying of strands occurs with total fidelity during cell division. But there doesn't seem to be any &lt;i&gt;a priori&lt;/i&gt; reason why the second parity rule should hold true. And in fact, it often &lt;i&gt;doesn't&lt;/i&gt; hold true, as &lt;a href="http://en.wikipedia.org/wiki/Wac%C5%82aw_Szybalski" target="_blank"&gt;Wacław Szybalski&lt;/a&gt; noted in 1966 when he &lt;a href="http://symposium.cshlp.org/content/31/123" target="_blank"&gt;reported&lt;/a&gt; finding imbalances of purines and pyrimidines in bacteriophage and other DNA samples. Szybalski observed that in most cases, protein-coding regions of DNA tend to have slightly more purines than pyrimidines on one strand and slightly more pyrimidines than purines on the other strand, such that messenger RNA ends up purine-heavy. &lt;br /&gt;
&lt;br /&gt;
If you're having trouble visualizing the situation, imagine a very short (12-base) "chromosome" containing 50% G+C content. One possibility is that one strand looks like GGGGGGTTTTTT and the other strand is CCCCCCAAAAAA. In this case half the purines (all the G's) are on one strand and half (A's) are on the other. But you could just as easily have strands be GGGGGGAAAAAA and CCCCCCTTTTTT. In this case, one strand is all-purines, the other all-pyrimidines. Both examples violate Chargaff's second rule, which requires that G = C and A = T within each strand (e.g., GGGCCCTTTAAA + CCCGGGAAATTT would obey the rule).&lt;br /&gt;
&lt;br /&gt;
To my knowledge, no one has yet reported the fact (which I'll now report) that the degree to which Chargaff's second parity rule is violated depends on the G+C content of the source genome (at least for bacteria). Simply put, organisms with a G+C content of around 68% obey Chargaff's rules. Organisms with more than 68% G+C content violate Chargaff's second rule in the direction of &lt;i&gt;pyrimidine &lt;/i&gt;loading of mRNA. Organisms with &lt;i&gt;less &lt;/i&gt;than 68% G+C content (which of course includes the overwhelming majority of organisms) have purine-heavy DNA, to a degree that depends on the amount of A+T in the DNA.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-y8AW2ZUQXI0/UaEktA1pJHI/AAAAAAAABrk/mSdoNwPrdgM/s1600/PurinePyrimidine+ratio+OVERALL+Zun+Zun.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="320" src="http://4.bp.blogspot.com/-y8AW2ZUQXI0/UaEktA1pJHI/AAAAAAAABrk/mSdoNwPrdgM/s400/PurinePyrimidine+ratio+OVERALL+Zun+Zun.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Purine/pyrimidine ratio (in coding regions) as a function of genome G+C content based on codon analysis of 93 organisms. As genomes become more A+T rich, mRNA becomes more heavily purine-loaded.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
The above graph shows how this relationship works. To create the graph, I did a statistical analysis of codon usage in 93 bacterial species. Organisms were chosen so as to obtain representatives across the AT/GC spectrum. No genus is represented more than once. In order to get as broad a sampling as possible, I included 14 intracellular symbionts with ultra-low G+C content (plus one such creature—&lt;i&gt;Candidatus Hodgkinia cicadicola&lt;/i&gt;—with a 58% G+C content); many extremophiles; heterotrophs and autotrophs; pathogens and non-pathogens; and organisms with large and small genomes. The complete organism list is presented in a table further below.&lt;br /&gt;
&lt;br /&gt;
Codon usage statistics for each organism were obtained using tools at&amp;nbsp;&lt;a href="http://genomevolution.org/"&gt;http://genomevolution.org&lt;/a&gt;. Relative prevalences of A, T, G, and C in the genomes' coding regions were determined by codon frequency analysis. The purine:pyrimidine ratio was simply calculated as (A+G)/(C+T) based on the codon-wise frequency of usage of each base.&lt;br /&gt;
&lt;br /&gt;
What we see is that while there is a good deal of noise in the data, nevertheless it's quite clear that purine/pyrimidine ratios increase sharply as genome G+C decreases.Organisms for which Chargaff's second rule holds true (points falling at y = 1.0) are in a small minority. Most organisms have purine-rich coding regions, resulting in purine-rich mRNA.&lt;br /&gt;
&lt;br /&gt;
Purine enrichment occurs for both adenine and guanine. For example, in &lt;i&gt;Clostridium botulinum&lt;/i&gt; (genome G+C = 28.21%), codon analysis reveals G/C/A/T relative abundances (on the coding strand) of 18.3/10.8/40.3/30.6.&lt;br /&gt;
&lt;br /&gt;
Intra-codon base position analysis reveals that purine enrichment is far more concentrated in position one of the codon than other positions. The graphs below show the purine balance on a position-by-position basis, for each base in a codon.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/--xX4AeuljXA/UaFXs4G9DGI/AAAAAAAABrw/M2bGMWg7CQM/s1600/B1+purine+ratio.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="320" src="http://1.bp.blogspot.com/--xX4AeuljXA/UaFXs4G9DGI/AAAAAAAABrw/M2bGMWg7CQM/s400/B1+purine+ratio.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
Most of the variation in purine/pyrimidine ratio happens in position 1 of the codon (the 'A' in ATG, for example). Notice that the purine/pyrimidine ratio in this position is well above 1.0 for all organisms.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-SbpPEfAVFGE/UaFXzShPJEI/AAAAAAAABr4/6d9b5_dgLEQ/s1600/B2+purine+ratio.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="320" src="http://2.bp.blogspot.com/-SbpPEfAVFGE/UaFXzShPJEI/AAAAAAAABr4/6d9b5_dgLEQ/s400/B2+purine+ratio.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
Variation in purine loading at the second position of the codon is more carefully controlled (notice that there is less "scatter" in this graph). The y-axis scale is different here than in the previous graph, hence the slope is quite a bit less pronounced than it looks. Also, notice that most of the points in this plot are below parity (i.e., below 1.0 on the y-axis), indicating that this codon position is relatively pyrimidine-rich. &lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-65FFQbnBAMw/UaFX4pO-5BI/AAAAAAAABsA/CfOwa-zwtvM/s1600/B3+purine+ratio.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="320" src="http://1.bp.blogspot.com/-65FFQbnBAMw/UaFX4pO-5BI/AAAAAAAABsA/CfOwa-zwtvM/s400/B3+purine+ratio.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
The third (so-called "wobble") position of the codon shows considerable variation in values, but the slope of the curve is less than in the previous two graphs, and this position is pyrimidine-rich for about two-thirds of the organisms. &lt;br /&gt;
&lt;br /&gt;
It's well known that GC-skew tends to be exaggerated in position 3 of the codon. For example, if the overall genome G+C is 70%, the position-wise G+C for the wobble base may be 90%. Surprisingly, we find that purine loading is most exaggerated in position 1 of the codon, not position 3. Not only is the slope of the purine-ratio curve shallower in position 3 than for the other two base positions, only position 1 is actually purine-heavy: positions 2 and 3 tend to be net pyrimidine-rich. This fact (that purine loading is primarily localized to codon position 1, whereas GC-skew is exaggerated in position 3) might indicate that the forces responsible for purine loading are entirely different from the forces responsible for GC skew.&lt;br /&gt;
&lt;br /&gt;
What might those forces be? What kinds of selection pressure might cause organisms to purine-load one strand of their DNA? One possibility is that purine loading of the coding strand is a strategy for protecting the "weaker" or more vulnerable strand from damage or mutations. Cytosine is thought to be particularly vulnerable to deamination (and later substitution with thymine, during repair). It's possible that the transcription process (which is &lt;i&gt;asymmetric&lt;/i&gt;, in that RNA polymerase operates against just one strand of DNA, leaving the other strand free) is protective of the antisense strand of DNA. That is, in transcription, RNA polymerase cloaks the antisense strand and in so doing renders that strand less vulnerable to deamination events, rogue methylations, etc., while transcription is taking place.&lt;br /&gt;
&lt;br /&gt;
An entirely different possibility is envisioned by an RNA World hypothesis. In this hypothesis, the genetic material of early ancestor organisms was single-stranded RNA. Since single-stranded RNA is not "complementary" to anything, there is no need for it to obey Chargaff symmetries. Thus, purine loading could have occurred prior to the advent of double-stranded DNA, and early organisms could have been uniformly AT-rich. In this model of the world, GC-rich genomes are a late development, and the processes responsible for creating GC-rich DNA led to genetic material with full Chargaff base parity.&lt;br /&gt;
&lt;br /&gt;
We may not know for a long time (if ever) what the mechanisms of purine enrichment are. But we know for sure that purine accumulation is a widespread phenomenon in the bacterial world (operating across diverse clades) and happens in a way that encourages purine-rich mRNA in organisms with low G+C content in their genomes. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Organisms used in this study:&lt;br /&gt;
&lt;br /&gt;
&lt;table border="1" cellpadding="4" cellspacing="0" style="width: 100%px;"&gt;
 &lt;colgroup&gt;&lt;col width="179*"&gt;&lt;/col&gt;
 &lt;col width="33*"&gt;&lt;/col&gt;
 &lt;col width="44*"&gt;&lt;/col&gt;
 &lt;/colgroup&gt;&lt;tbody&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;b&gt;Organism&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;b&gt;GC%&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;b&gt;genome size&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Anaeromyxobacter dehalogenans
   2CP-1&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;74.67&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5009007&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Cellulomonas flavigena strain
   DSM 20109&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;74.29&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4123179&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Xylanimonas cellulosilytica
   strain DSM 15894&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;72.47&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3831380&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Streptomyces
   bingchenggensis strain BCW-1&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;70.75&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;11936683&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Myxococcus fulvus strain HW-1&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;70.63&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;9003593&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Rubrobacter
   xylanophilus strain DSM 9941&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;70.48&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3225748&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Rhodospirillum centenum ATCC
   51521&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;70.46&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4355543&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Actinomyces sp. oral taxon 175
   strain F0384&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;68.73&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3133330&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Rhodococcus
   equi strain ATCC 33707&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;68.72&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5259057&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Acidovorax
   avenae subsp. citrulli strain AAC00-1&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;68.53&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5352772&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Bordetella bronchiseptica
   strain RB50&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;68.08&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5339179&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Alicycliphilus denitrificans
   strain K601&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;67.81&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5070751&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Stenotrophomonas
   maltophilia strain JV3&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;66.89&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4544477&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Rhodobacter
   capsulatus strain SB 1003&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;66.56&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3871920&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Pseudomonas
   aeruginosa strain PA7&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;66.45&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;6588339&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Ralstonia
   eutropha strain H16&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;66.29&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;7416678&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Xanthomonas campestris pv.
   raphani strain 756C&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;65.29&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4941214&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Thioalkalivibrio
   sp. strain HL-EbGR7&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;65.06&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3470516&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Rhodopseudomonas
   palustris strain BisB18&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;64.96&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5513844&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Brevundimonas
   diminuta strain ATCC 11568&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;64.51&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3369316&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Rhodothermus
   marinus strain DSM 4252&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;64.09&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3386737&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Bradyrhizobium
   japonicum strain USDA 110&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;64.06&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;9105828&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Mycobacterium
   tuberculosis strain C&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;63.82&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4379118&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Thermanaerovibrio
   acidaminovorans strain DSM 6589&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;63.79&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1848474&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Halomonas elongata DSM 2581
   strain type DSM 2581&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;63.61&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4061296&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Novosphingobium nitrogenifigens
   strain DSM 19370&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;63.43&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4182647&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Polaromonas sp. strain JS666 &lt;/span&gt;
  &lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;62.24&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5898676&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Desulfovibrio africanus strain
   Walvis Bay&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;61.42&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4200534&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Candidatus Desulforudis
   audaxviator strain MP104C&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;60.85&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2349476&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Burkholderia rhizoxinica strain
   HKI 454&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;60.68&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3750138&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Slackia heliotrinireducens
   strain DSM 20476&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;60.21&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3165038&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Candidatus Nitrospira defluvii&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;59.03&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4317083&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Halogeometricum borinquense DSM
   11551&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;58.43&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3944467&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Candidatus Hodgkinia cicadicola
   strain Dsem&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;58.39&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;143795&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Sideroxydans lithotrophicus
   strain ES-1&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;57.54&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3003656&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Cenarchaeum symbiosum A&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;57.37&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2045086&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Serratia sp. strain AS12&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;55.96&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5443009&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Acidaminococcus
   fermentans strain DSM 20731&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;55.84&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2329769&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Hyperthermus butylicus strain
   DSM 5456&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;53.74&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1667163&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Methanosaeta thermophila
   (Methanothrix thermophila PT) strain PT&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;53.55&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1879471&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Neisseria gonorrhoeae strain
   NCCP11945&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;53.37&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2236178&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Treponema paraluiscuniculi
   strain Cuniculi A&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;52.74&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1133390&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Pseudovibrio sp. strain FO-BEG1&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;52.38&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5916782&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Nitrosococcus halophilus strain
   Nc4&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;51.60&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4145260&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Herpetosiphon
   aurantiacus DSM 785&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.84&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;6785430&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Escherichia coli B strain
   REL606&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.77&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4629812&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;
&lt;dl&gt;
&lt;dt&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Bdellovibrio
    bacteriovorus strain ATCC15356;&lt;/span&gt;&lt;/span&gt;&lt;/dt&gt;
&lt;/dl&gt;
&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.65&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3782950&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Pectobacterium
   wasabiae strain WPP163&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;50.48&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5063892&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Anaplasma
   centrale (Anaplasma marginale subsp. centrale str. Israel) strain
   Israel&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;49.98&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1206806&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Actinomyces
   coleocanis strain DSM 15436&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;49.47&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1723843&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Desulfotalea
   psychrophila strain LSv54&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;46.72&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3659634&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Polynucleobacter
   necessarius strain STIR1&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;45.56&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1560469&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Nitrosomonas sp. strain Is79A3&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;45.44&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3783444&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Coprothermobacter
   proteolyticus strain DSM 5265&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;44.77&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1424912&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Vibrio
   sp. Ex25 strain EX25&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;44.57&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5160431&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Geobacillus
   thermoglucosidans strain TNO-09.020&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;43.82&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3740238&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Waddlia chondrophila strain
   2032/99&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;43.59&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2139757&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Bacteroides fragilis strain
   638R&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;43.42&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5373121&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Thiomicrospira
   crunogena strain XCL-2&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;43.13&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2427734&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Coxiella
   burnetii strain CbuG_Q212&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;42.63&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2008870&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Chlamydia muridarum Nigg strain
   MoPn&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;40.27&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1080451&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Psychromonas ingrahamii strain
   37&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;40.09&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;4559598&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Nitratiruptor sp. strain
   SB155-2&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;39.69&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1877931&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Lactobacillus reuteri strain
   DSM 20016&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;38.87&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1999618&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Thermotoga lettingae strain TM&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;38.70&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2135342&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Streptococcus
   pyogenes strain Alab49&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;38.63&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1841271&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Bartonella bacilliformis strain
   ATCC 35685; KC583&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;38.24&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1445021&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Halothermothrix
   orenii strain DSM 9562; H 168&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;37.78&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2463968&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Staphylothermus marinus strain
   F1&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;35.73&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1570485&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Calditerrivibrio nitroreducens
   strain DSM 19672&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;35.69&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2216552&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Bacillus
   thuringiensis serovar andalousiensis strain BGSC 4AW1&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;34.96&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;5488844&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Desulfurobacterium
   thermolithotrophum&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;34.95&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1541968&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Wolbachia
   pipientis strain wPip&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;34.19&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1482455&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Nitrosopumilus maritimus strain
   SCM1&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;34.17&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1645259&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Staphylococcus aureus strain
   04-02981&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;32.90&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2821452&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Methanobrevibacter ruminantium
   strain M1&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;32.64&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2937203&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Rickettsia
   japonica strain YH&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;32.35&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1283087&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Methanocaldococcus fervens
   strain AG86 (v1) &lt;/span&gt;
  &lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;32.21&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1507251&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Mycoplasma
   genitalium G37 strain G-37&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;31.69&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;580076&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Nanoarchaeum equitans strain
   Kin4-M&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;31.56&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;490885&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Orientia tsutsugamushi strain
   Boryong&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;30.53&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;2127051&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Methanococcus aeolicus strain
   Nankai-3&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;30.04&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1569500&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Candidatus
   Pelagibacter ubique strain HTCC1062&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;29.68&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1308759&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Ehrlichia
   canis strain Jake&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;28.96&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1315030&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Arcobacter nitrofigilis strain
   DSM 7299&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;28.36&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3192235&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Clostridium
   botulinum A strain ATCC 19397&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;28.21&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;3863450&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="color: black; font-size: x-small;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Parvimonas
   sp. oral taxon 393 strain F0440&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;28.17&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1483165&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Candidatus Arthromitus sp.
   strain SFB-mouse-NYU&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;27.94&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;1569870&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Candidatus Blochmannia
   floridanus&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;27.38&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;705557&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Buchnera aphidicola
   (Acyrthosiphon pisum) strain 5A&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;25.69&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;653223&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Wigglesworthia glossinidia
   endosymbiont of Glossina brevipalpis&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;22.48&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;703004&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;&lt;span style="color: black;"&gt;&lt;span style="font-family: Arial, Helvetica, fixed;"&gt;Candidatus
   Sulcia muelleri strain CARI (v1)&lt;/span&gt;&lt;/span&gt; &lt;/span&gt;
  &lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;21.13&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;276511&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr valign="TOP"&gt;
  &lt;td width="70%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;Candidatus Carsonella ruddii
   strain PV (v1)&lt;/span&gt;&lt;/td&gt;
  &lt;td width="13%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;16.56&lt;/span&gt;&lt;/td&gt;
  &lt;td width="17%"&gt;&lt;span style="font-family: Arial; font-size: x-small;"&gt;159662&lt;/span&gt;&lt;/td&gt;
 &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/2201269747607174240/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2201269747607174240?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2201269747607174240?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/chargaffs-second-parity-rule-is.html" title="Chargaff's Second Parity Rule is Violated in Proportion to Genome A+T Content" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-y8AW2ZUQXI0/UaEktA1pJHI/AAAAAAAABrk/mSdoNwPrdgM/s72-c/PurinePyrimidine+ratio+OVERALL+Zun+Zun.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DEUEQX48eCp7ImA9WhBaFEg.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-1466756830194091704</id><published>2013-05-25T00:30:00.000-04:00</published><updated>2013-05-25T00:30:00.070-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-25T00:30:00.070-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="entropy" /><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics" /><category scheme="http://www.blogger.com/atom/ns#" term="information theory" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="codon" /><category scheme="http://www.blogger.com/atom/ns#" term="Clostridium botulinum" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><title>DNA: Full of Surprises</title><content type="html">DNA is full of surprises, one of them being the radically different ways in which it can be used to express information. We think of DNA as a four-letter language (A,T,G,C), but some organisms choose to "speak" mostly G and C. Others avoid G and C, preferring instead to "speak" A and T. The question is, if DNA is fundamentally a four-letter language, why would some organisms want to limit themselves to dialects that use mostly just two letters?&lt;br /&gt;
&lt;br /&gt;
The DNA of &lt;i&gt;Clostridium botulinum&lt;/i&gt; (the botulism bug; a common soil inhabitant) is extraordinarily deficient in G and C: over 70% of its DNA is A and T. The soil bacterium &lt;i&gt;Anaeromyxobacter dehalogenans&lt;/i&gt;, on the other hand, has DNA that's 74% G and C. Think of the constraints this puts on a coding system. Imagine that you want to store data using a four-letter alphabet, but you are required to use two of the four letters 74% of the time! Suddenly a two-bit-per-symbol encoding scheme (a four-letter code) starts to look and feel a lot more like a one-bit-per-symbol (two-letter) scheme.&lt;br /&gt;
&lt;br /&gt;
What kinds of information are actually stored in DNA? Several kinds, but bottom line, DNA is &lt;i&gt;primarily &lt;/i&gt;a system for specifying sequences of amino acids. The information is stored as three-letter "words" (GCA, ATG, TCG, etc.) called &lt;i&gt;codons&lt;/i&gt;. There are 64 possible length-3 words in a system that uses a 4-letter alphabet. Fortunately, there are only 20 amino acids. I say "fortunately," because imagine if there were 64 different amino acids (as there might be in extra-terrestrial life, say) and they had to occur in roughly equal amounts in all proteins. Every possible codon would &lt;i&gt;have &lt;/i&gt;to be used (in roughly equal numbers) and there would be no possibility of an organism like &lt;i&gt;C. botulinum&lt;/i&gt; developing a "preference" for A or T in its DNA. It is precisely because only 20 codons out of a possible 64 need be used that organisms like&lt;i&gt; C. botulinum &lt;/i&gt;(with a huge imbalance of AT vs. GC in its DNA) can exist.&lt;br /&gt;
&lt;br /&gt;
As it happens, all organisms do tend to use all 64 possible codons, but they use them with vastly varying frequencies, giving rise to codon "dialects." (Note that the mapping of 64 codons onto 20 amino acids means some codons are necessarily synonymous. For example, there are four different codons for glycine and six for leucine.) You might expect that an organism like &lt;i&gt;C. botulinum&lt;/i&gt; with mostly A and T in its DNA would "speak" in A- and T-rich codons. And you'd be right. Here's a chart showing which codons &lt;i&gt;C. botulinum&lt;/i&gt; actually uses, and at what frequencies:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-GZmXqVwPdik/UaAIHjYPqZI/AAAAAAAABrE/oZRtE66DCZM/s1600/C+botulinum+codon+chart.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/-GZmXqVwPdik/UaAIHjYPqZI/AAAAAAAABrE/oZRtE66DCZM/s400/C+botulinum+codon+chart.png" width="288" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
The green-highlighted codons are the ones &lt;i&gt;C. botulinum&lt;/i&gt; uses preferentially (with the usage frequencies shown as precentages). As you can see, the most-often-used codons tend to contain a lot of A and/or T. Which is exactly what you'd expect, given that the organism's DNA is 72% A and T.&lt;br /&gt;
&lt;br /&gt;
In theory, a 3-letter word in a 4-letter language can store six bits of information. But &lt;a href="http://asserttrue.blogspot.com/2013/05/information-theory-in-three-minutes.html" target="_blank"&gt;we know from information theory&lt;/a&gt; that the &lt;i&gt;actual &lt;/i&gt;information content of a word depends on how often it's used. If I send you a 100-word e-mail that contains the question "Why?" repeated 100 times, you're not really receiving the same amount of information as would be in a 100-word e-mail that contains text in which no word appears twice. &lt;br /&gt;
&lt;br /&gt;
The average information content of a &lt;i&gt;C. botulinum&lt;/i&gt; codon is easily calculated using the usage-frequencies shown above. (All you do is calculate &lt;i&gt;-F * log&lt;sub&gt;2&lt;/sub&gt;(F) &lt;/i&gt;for each codon and add up the results.) If you do the math, you find that &lt;i&gt;C. botulinum&lt;/i&gt; uses an average of 5.217 bits per codon, about 13% short of the theoretical six bits available. &lt;br /&gt;
&lt;br /&gt;
One might imagine that the more GC/AT-imbalanced an organism's DNA is, the more biased its codon preferences will be. This is exactly what we find if we plot codon entropy against genome G+C content for a range of organisms having DNA of various G+C contents.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-yfiuDFb9sfU/UaANCsuXjQI/AAAAAAAABrU/Vrni6xznTfs/s1600/400x400-EntropyFinal.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-yfiuDFb9sfU/UaANCsuXjQI/AAAAAAAABrU/Vrni6xznTfs/s320/400x400-EntropyFinal.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Average codon entropy versus genome G+C content for 90 microorganisms.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
In the above graph, you can see that when an organism's DNA is composed of equal amounts of the bases (G+C = 50%, A+T = 50%), the organism tends to use all codons more or less equally, and entropy approaches the theoretical limit of six bits per codon. But when an organism develops a particular "dialect" (of GC-rich DNA, or AT-rich DNA), it starts using a smaller and smaller codon vocabulary more and more intensively. This is what causes the curve to fall off sharply on either side of the graph.&lt;br /&gt;
&lt;br /&gt;
If you have an observant eye, you may have noticed that the two halves of the graph are not symmetrical, even though they look symmetrical at first glance. (Organisms on the high-GC side are using slightly less entropy per codon than low-GC organisms, for a given amount of genome GC/AT skew.) If you're a biologist, you might want to think about why this is so. I'll return to the subject in a future post. &lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/1466756830194091704/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/dna-full-of-surprises.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1466756830194091704?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1466756830194091704?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/dna-full-of-surprises.html" title="DNA: Full of Surprises" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-GZmXqVwPdik/UaAIHjYPqZI/AAAAAAAABrE/oZRtE66DCZM/s72-c/C+botulinum+codon+chart.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;A0cMRnY_cCp7ImA9WhBaE0Q.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-2876746432746894116</id><published>2013-05-24T07:30:00.000-04:00</published><updated>2013-05-24T08:38:07.848-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-24T08:38:07.848-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="proteomics" /><category scheme="http://www.blogger.com/atom/ns#" term="genetics" /><category scheme="http://www.blogger.com/atom/ns#" term="amino acids" /><category scheme="http://www.blogger.com/atom/ns#" term="entropy" /><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics" /><category scheme="http://www.blogger.com/atom/ns#" term="redundancy" /><category scheme="http://www.blogger.com/atom/ns#" term="genome" /><category scheme="http://www.blogger.com/atom/ns#" term="information theory" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="codon" /><category scheme="http://www.blogger.com/atom/ns#" term="proteins" /><title>Decrypting DNA</title><content type="html">In a previous post (&lt;a href="http://asserttrue.blogspot.com/2013/05/information-theory-in-three-minutes.html" target="_blank"&gt;"Information Theory in Three Minutes"&lt;/a&gt;), I hinted at the power of information theory to gage redundancy in a language. A fundamental finding of information theory is that when a language uses symbols in such a way that some symbols appear more often than others (for example when vowels turn up more often than consonants, in English), it's a tipoff to redundancy. &lt;br /&gt;
&lt;br /&gt;
DNA is a language with many hidden redundancies. It's a four-letter language, with symbol choices of A, G, C, and T (adenine, guanine, cytosine, and thymine), which means any given symbol should be able to convey two bits' worth of information, since log&lt;sub&gt;2&lt;/sub&gt;(4) is two. But it turns out, different organisms speak different "dialects" of this language. Some organisms use G and C twice as often as A and T, which (if you do the math) means each symbol is actually carrying a maximum of 1.837 bits (not 2 bits) of information.&lt;br /&gt;
&lt;br /&gt;
Consider how an alien visitor to earth might be able to use information theory to figure out terrestrial molecular biology.&lt;br /&gt;
&lt;br /&gt;
The first thing an alien visitor might notice is that there are four "symbols" in DNA (A, G, C, T).&lt;br /&gt;
&lt;br /&gt;
By analyzing the frequencies of various naturally occurring combinations of these letters, the alien would quickly determine that the natural "word length" of DNA is three.&lt;br /&gt;
&lt;br /&gt;
There are 64 possible 3-letter words that can be spelled with a 4-letter alphabet. So in theory, a 3-letter "word" in DNA should convey 6 bits worth of information (since 2 to the 6th power is 64). But an alien would look at many samples of earthly DNA, from many creatures, and do a summation of -F * log&lt;sub&gt;2&lt;/sub&gt;(F) for every 3-letter "word" used by a given creature's DNA (where F is simply the frequency of usage of the 3-letter combo). From this sort of analysis, the alien would find that even though 64 different codons (3-letter words) are, in fact, being used in earthly DNA, in actuality the &lt;i&gt;entropy per codon &lt;/i&gt;in some cases is as little as 4.524 bits. (Or at least, it approaches that value asymptotically.)&lt;br /&gt;
&lt;br /&gt;
Since 2 to the 4.524 power is 23, and since proteins (the predominant macromolecule in earthly biology) are made of amino acids, a canny alien would surmise that there must be around 23 different amino acids; and earthly DNA is a language for mapping 3-letters words to those 23 amino acids.&lt;br /&gt;
&lt;br /&gt;
As it turns out, the genetic code does use 3-letter "words" (codons) to specify amino acids, but there are 20 amino acids (not 23), with 3 "stop codons" reserved for telling the cell's protein-making machinery "this is the end of this protein; stop here."&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-zfAZTaEA758/UZ9Ko7KXrbI/AAAAAAAABq0/LyF5SyJ-s-8/s1600/E+coli+codon+chart.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-zfAZTaEA758/UZ9Ko7KXrbI/AAAAAAAABq0/LyF5SyJ-s-8/s1600/E+coli+codon+chart.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;i&gt;E. coli &lt;/i&gt;codon usage. &lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
The above chart shows the actual codon usage pattern for &lt;i&gt;E. coli&lt;/i&gt;. Note that all organisms use the same 3-letter codes for the same amino acids, and most organisms use all 64 possible codons, but the codons are used with vastly unequal frequencies. If you look in the upper right corner of the above chart, for example, you'll see that &lt;i&gt;E. coli &lt;/i&gt;uses CTG (one of the six codons for Leucine) far more often than CTA (another codon for Leucine). One of the open questions in biology is why organisms favor certain synonymous codons over others (a phenomenon called &lt;i&gt;codon usage bias&lt;/i&gt;). &lt;br /&gt;
&lt;br /&gt;
While DNA's 6-bit codon bandwidth permits 64 different codons, and while organisms do generally make use of all 64 codons, the uneven usage pattern means fewer than 6 bits of information are used per codon. To get the actual codon entropy, all you have to do is take each usage frequency and calculate -F *  log&lt;sub&gt;2&lt;/sub&gt;(F) for each codon, then sum. If you do that for &lt;i&gt;E. coli&lt;/i&gt;, you get 5.679 bits per codon. As it happens, &lt;i&gt;E. coli&lt;/i&gt; actually does make use of almost all the available bandwidth (of 6 bits) in its codons. This turns out not to be true for all organisms, however.&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/2876746432746894116/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/decrypting-dna.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2876746432746894116?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2876746432746894116?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/decrypting-dna.html" title="Decrypting DNA" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-zfAZTaEA758/UZ9Ko7KXrbI/AAAAAAAABq0/LyF5SyJ-s-8/s72-c/E+coli+codon+chart.png" height="72" width="72" /><thr:total>2</thr:total></entry><entry gd:etag="W/&quot;DkUEQ38_cSp7ImA9WhBaEE8.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-2499585145312271639</id><published>2013-05-20T00:30:00.000-04:00</published><updated>2013-05-20T00:30:02.149-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-20T00:30:02.149-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="DNA repair" /><category scheme="http://www.blogger.com/atom/ns#" term="Clostridium" /><category scheme="http://www.blogger.com/atom/ns#" term="genome" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA replication" /><category scheme="http://www.blogger.com/atom/ns#" term="Actinomyces" /><category scheme="http://www.blogger.com/atom/ns#" term="G+C content" /><category scheme="http://www.blogger.com/atom/ns#" term="AT pressure" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="mutations" /><title>Parsing the DNA Crazy Quilt</title><content type="html">A measure of how little we know about the real-world workings of evolution is that science still can't explain why some organisms have huge imbalances in the chemical composition of their DNA. If you look at the genome of &lt;i&gt;Clostridium botulinum&lt;/i&gt; (the botulism germ), 72% of the bases in its DNA are either 'A' or 'T': adenine or thymine. (The four possibilities are, of course, adenine, thymine, guanine, and cytosine.) Conversely, you can find many examples of organisms in which the DNA is mostly 'G' or 'C.' The question is why A, T, G, and C don't occur in roughly equal proportions (which is what you'd expect after millions of years of genetic averaging; you'd expect some sort of regression to the mean). &lt;br /&gt;
&lt;br /&gt;
Just to give you an idea of what GC/AT imbalance really looks like, here's the gene for the enzyme &lt;a href="http://en.wikipedia.org/wiki/Adenosine_deaminase" target="_blank"&gt;adenine deaminase&lt;/a&gt; from &lt;i&gt;Clostridium botulinum&lt;/i&gt;, with all the A and T values in red:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-size: x-small;"&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TATAAAAATATA&lt;/span&gt;C&lt;span style="color: red;"&gt;AAA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AAAT&lt;/span&gt;C&lt;span style="color: red;"&gt;TATAAAAATA&lt;/span&gt;C&lt;span style="color: red;"&gt;AAAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;CGGGG&lt;span style="color: red;"&gt;ATAT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTTAATAAATTT&lt;/span&gt;G&lt;span style="color: red;"&gt;ATA&lt;/span&gt;C&lt;span style="color: red;"&gt;AAA&lt;/span&gt;GCC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;TTT&lt;/span&gt;GGG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;TAA&lt;/span&gt;G&lt;span style="color: red;"&gt;TAAA&lt;/span&gt;
&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TTTAT&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GGGGC&lt;span style="color: red;"&gt;TT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;CCGGC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;ATAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TTATTATAAATT&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;GGC&lt;span style="color: red;"&gt;TTATAAAT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;ATA&lt;/span&gt;G&lt;span style="color: red;"&gt;AAAA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;
G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;GC&lt;span style="color: red;"&gt;AATTA&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AATT&lt;/span&gt;GC&lt;span style="color: red;"&gt;TTTA&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GC&lt;span style="color: red;"&gt;AAAA&lt;/span&gt;C&lt;span style="color: red;"&gt;ATT&lt;/span&gt;GC&lt;span style="color: red;"&gt;ATA&lt;/span&gt;GGGG&lt;span style="color: red;"&gt;AAAA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;TAATT&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GC&lt;span style="color: red;"&gt;AAAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AATATATT&lt;/span&gt;
GC&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;TTTTTTA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;ATATT&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;G&lt;span style="color: red;"&gt;AAT&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;AAT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTAA&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TAA&lt;/span&gt;GCG&lt;span style="color: red;"&gt;AATAT&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;TT&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;CGG&lt;span style="color: red;"&gt;AATA&lt;/span&gt;
&lt;span style="color: red;"&gt;TATAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AAATTT&lt;/span&gt;G&lt;span style="color: red;"&gt;TAAT&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CGG&lt;span style="color: red;"&gt;ATTAAAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;CG&lt;span style="color: red;"&gt;TTATAT&lt;/span&gt;G&lt;span style="color: red;"&gt;ATT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGC&lt;span style="color: red;"&gt;AA&lt;/span&gt;GGG&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TTAAAAATAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;CC
&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;AT&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;TTTT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;ATA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GCGGC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AAA&lt;/span&gt;C&lt;span style="color: red;"&gt;AAT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GGG&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AAATA&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;
GG&lt;span style="color: red;"&gt;ATTA&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AAAT&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;TT&lt;/span&gt;CCC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;TATA&lt;/span&gt;C&lt;span style="color: red;"&gt;TTTATT&lt;/span&gt;C&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AAA&lt;/span&gt;C&lt;span style="color: red;"&gt;TTTAAAA&lt;/span&gt;GC&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;TAAAA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TAA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;
GG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;ATTATT&lt;/span&gt;C&lt;span style="color: red;"&gt;TTTA&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AAA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;AAAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;ATTAAAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;ATATATT&lt;/span&gt;GC&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TAA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AAT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GCGG&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;TT&lt;/span&gt;
GC&lt;span style="color: red;"&gt;TAAAAT&lt;/span&gt;GCGCC&lt;span style="color: red;"&gt;TT&lt;/span&gt;GG&lt;span style="color: red;"&gt;AAT&lt;/span&gt;G&lt;span style="color: red;"&gt;TAT&lt;/span&gt;GC&lt;span style="color: red;"&gt;AAT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTTA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGC&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TTAAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;TAA&lt;/span&gt;G&lt;span style="color: red;"&gt;TAAA&lt;/span&gt;GCC&lt;span style="color: red;"&gt;ATTA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AAAATAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;
&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;ATTT&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;TTTTAATAT&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;ATA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CCC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;ATT&lt;/span&gt;GC&lt;span style="color: red;"&gt;TTAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;ATTTA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;ATATTATAAAA&lt;/span&gt;CG&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;TATA&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;GGG
&lt;span style="color: red;"&gt;ATA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GCC&lt;span style="color: red;"&gt;ATTAA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;AATT&lt;/span&gt;C&lt;span style="color: red;"&gt;AAAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TAA&lt;/span&gt;C&lt;span style="color: red;"&gt;AATAAATT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AAT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTT&lt;/span&gt;CC&lt;span style="color: red;"&gt;AAAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AATTA&lt;/span&gt;GG&lt;span style="color: red;"&gt;TT&lt;/span&gt;C&lt;span style="color: red;"&gt;TATAA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;AAAAT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;
G&lt;span style="color: red;"&gt;ATATT&lt;/span&gt;G&lt;span style="color: red;"&gt;TATTTATA&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;ATTTAAAA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TAAAAATAA&lt;/span&gt;C&lt;span style="color: red;"&gt;AAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;TTATTATA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;AAATTTA&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;GC&lt;span style="color: red;"&gt;AAA&lt;/span&gt;GGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TATTAA&lt;/span&gt;C&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;TT&lt;/span&gt;C&lt;span style="color: red;"&gt;AATA&lt;/span&gt;
GC&lt;span style="color: red;"&gt;TAAATAT&lt;/span&gt;G&lt;span style="color: red;"&gt;ATTAT&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GC&lt;span style="color: red;"&gt;TAT&lt;/span&gt;G&lt;span style="color: red;"&gt;AATT&lt;/span&gt;C&lt;span style="color: red;"&gt;AAT&lt;/span&gt;GC&lt;span style="color: red;"&gt;ATATTAA&lt;/span&gt;G&lt;span style="color: red;"&gt;AATAAAATAA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;ATT&lt;/span&gt;CC&lt;span style="color: red;"&gt;TTTAATATTAT&lt;/span&gt;GGC&lt;span style="color: red;"&gt;T&lt;/span&gt;CC&lt;span style="color: red;"&gt;TAATAAA&lt;/span&gt;G&lt;span style="color: red;"&gt;AAAAA&lt;/span&gt;
&lt;span style="color: red;"&gt;ATAA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;AA&lt;/span&gt;GGG&lt;span style="color: red;"&gt;TTATT&lt;/span&gt;G&lt;span style="color: red;"&gt;AAATTATA&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AAA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;ATAT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;TTAAT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTAAA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;ATAAA&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;C&lt;span style="color: red;"&gt;AAT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;
&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TAAA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTTTAAAA&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;G&lt;span style="color: red;"&gt;TATTT&lt;/span&gt;G&lt;span style="color: red;"&gt;AAA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AAA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;ATAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TTTT&lt;/span&gt;G&lt;span style="color: red;"&gt;TTAAA&lt;/span&gt;GG&lt;span style="color: red;"&gt;TTTT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TATTAA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;
&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGC&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;GCCC&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;TTATTA&lt;/span&gt;G&lt;span style="color: red;"&gt;TTATA&lt;/span&gt;GG&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;AAAT&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;ATAT&lt;/span&gt;GGC&lt;span style="color: red;"&gt;ATTA&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;TAATA&lt;/span&gt;C&lt;span style="color: red;"&gt;ATTAATA&lt;/span&gt;G&lt;span style="color: red;"&gt;AAT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;
GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;AAT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TA&lt;/span&gt;GCCG&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TAAA&lt;/span&gt;G&lt;span style="color: red;"&gt;TATTA&lt;/span&gt;GGC&lt;span style="color: red;"&gt;TTA&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;CC&lt;span style="color: red;"&gt;ATTA&lt;/span&gt;CC&lt;span style="color: red;"&gt;AATA&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TTAT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;TAATAA&lt;/span&gt;GCC&lt;span style="color: red;"&gt;TTTA&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AAAT&lt;/span&gt;GGC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;
&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;AAAAA&lt;/span&gt;C&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;ATA&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;AAA&lt;/span&gt;G&lt;span style="color: red;"&gt;AAATA&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;ATATA&lt;/span&gt;G&lt;span style="color: red;"&gt;TTT&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;ATTTAT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AAT&lt;/span&gt;GGC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TTATT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TT&lt;/span&gt;GCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCC&lt;span style="color: red;"&gt;TA&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;
&lt;span style="color: red;"&gt;TTAA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;TAA&lt;/span&gt;C&lt;span style="color: red;"&gt;TAATA&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GGG&lt;span style="color: red;"&gt;TTA&lt;/span&gt;G&lt;span style="color: red;"&gt;TT&lt;/span&gt;G&lt;span style="color: red;"&gt;ATT&lt;/span&gt;G&lt;span style="color: red;"&gt;TAATAA&lt;/span&gt;G&lt;span style="color: red;"&gt;TTT&lt;/span&gt;G&lt;span style="color: red;"&gt;AATTT&lt;/span&gt;G&lt;span style="color: red;"&gt;TAT&lt;/span&gt;C&lt;span style="color: red;"&gt;ATTATTT&lt;/span&gt;G&lt;span style="color: red;"&gt;TA&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;G&lt;span style="color: red;"&gt;AATAA&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;

&lt;br /&gt;
&lt;span style="font-size: x-small;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="font-size: x-small;"&gt;&lt;a href="http://genomevolution.org/CoGe/FastaView.pl?fid=24036547" target="_blank"&gt;View gene at FastaView&lt;/a&gt;.&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
The organism &lt;i&gt;Actinomyces oris&lt;/i&gt; (which occurs in the film that builds up on teeth) has an adenine deaminase gene that looks like this:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-size: x-small;"&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGCCG&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;CCG&lt;span style="color: red;"&gt;T&lt;/span&gt;CCGC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;TTAT&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;CGCGCGC&lt;span style="color: red;"&gt;AT&lt;/span&gt;CG&lt;span style="color: red;"&gt;T&lt;/span&gt;CCC&lt;span style="color: red;"&gt;TTT&lt;/span&gt;CCGG&lt;span style="color: red;"&gt;T&lt;/span&gt;CCCG&lt;span style="color: red;"&gt;TA&lt;/span&gt;CCG&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;GGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GCGC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCGCCG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;CCC&lt;span style="color: red;"&gt;T&lt;/span&gt;
C&lt;span style="color: red;"&gt;A&lt;/span&gt;CCCCGGCGCC&lt;span style="color: red;"&gt;TT&lt;/span&gt;GGCCGCGCCGCCGCCCCCGGG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GCCCG&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GCG&lt;span style="color: red;"&gt;TAT&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;GGCGGGCCGGG&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;GGG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GGGGC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;
CCCGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CGGG&lt;span style="color: red;"&gt;T&lt;/span&gt;CC&lt;span style="color: red;"&gt;TT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GGCCG&lt;span style="color: red;"&gt;A&lt;/span&gt;GGGC&lt;span style="color: red;"&gt;T&lt;/span&gt;CC&lt;span style="color: red;"&gt;TT&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;ATT&lt;/span&gt;CCCGGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GGG&lt;span style="color: red;"&gt;A&lt;/span&gt;CGC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CGCCC&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GGCGGCGCGC&lt;span style="color: red;"&gt;T&lt;/span&gt;CGGC&lt;span style="color: red;"&gt;A&lt;/span&gt;CGC
&lt;span style="color: red;"&gt;AT&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CGC&lt;span style="color: red;"&gt;T&lt;/span&gt;GGCC&lt;span style="color: red;"&gt;A&lt;/span&gt;CCCGC&lt;span style="color: red;"&gt;A&lt;/span&gt;GCGCGG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GGCCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;GGC&lt;span style="color: red;"&gt;A&lt;/span&gt;CGGGCGC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCGGG&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;CCGGCCGG&lt;span style="color: red;"&gt;TT&lt;/span&gt;CGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;CCGGCC&lt;span style="color: red;"&gt;A&lt;/span&gt;CG&lt;span style="color: red;"&gt;AT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;G
GGC&lt;span style="color: red;"&gt;TT&lt;/span&gt;CGGGC&lt;span style="color: red;"&gt;A&lt;/span&gt;CCGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;CC&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;GGCCCCGGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GCCC&lt;span style="color: red;"&gt;A&lt;/span&gt;CGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GGCCG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;CGCCG&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CCGGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;TT&lt;/span&gt;CCC&lt;span style="color: red;"&gt;A&lt;/span&gt;CGC&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CCGGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;C
G&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CCGGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GGC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CGGCGGCGC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;TT&lt;/span&gt;CGGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCCGGGGGCC&lt;span style="color: red;"&gt;A&lt;/span&gt;GCGCCC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;CCCGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;CCG&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;AA&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;CCCG&lt;span style="color: red;"&gt;T&lt;/span&gt;GG
&lt;span style="color: red;"&gt;TT&lt;/span&gt;CGCCC&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;CCGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;CCCGGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CGCG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;CCGGC&lt;span style="color: red;"&gt;TA&lt;/span&gt;CCG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;GGCCG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;CCCGGGGCG&lt;span style="color: red;"&gt;T&lt;/span&gt;C
&lt;span style="color: red;"&gt;A&lt;/span&gt;CCGGCG&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;CGG&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;CCCG&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;GGCCGCGGCGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCGGGCC&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGCGG&lt;span style="color: red;"&gt;A&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;GGGCG&lt;span style="color: red;"&gt;TA&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CCCCC&lt;span style="color: red;"&gt;A&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCCC
CGC&lt;span style="color: red;"&gt;AT&lt;/span&gt;CCGC&lt;span style="color: red;"&gt;AT&lt;/span&gt;CGGGG&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;TA&lt;/span&gt;CCGCG&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;AA&lt;/span&gt;CGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;AT&lt;/span&gt;CGCCCGGGGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCGC&lt;span style="color: red;"&gt;A&lt;/span&gt;CCGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;CCGCGC&lt;span style="color: red;"&gt;T&lt;/span&gt;GGC&lt;span style="color: red;"&gt;A&lt;/span&gt;GGC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CCCCGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCCCG&lt;span style="color: red;"&gt;A&lt;/span&gt;C
GG&lt;span style="color: red;"&gt;TT&lt;/span&gt;CCCCGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;GGGGCCGC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;CGC&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;CGGC&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGGC&lt;span style="color: red;"&gt;T&lt;/span&gt;CGGGC&lt;span style="color: red;"&gt;A&lt;/span&gt;GCGC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;G&lt;span style="color: red;"&gt;T&lt;/span&gt;GCG&lt;span style="color: red;"&gt;A&lt;/span&gt;GCCC&lt;span style="color: red;"&gt;TAT&lt;/span&gt;CCCGCCG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;G
GGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;A&lt;/span&gt;CGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;GCGGCG&lt;span style="color: red;"&gt;T&lt;/span&gt;GG&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;AA&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;CCGGGCCG&lt;span style="color: red;"&gt;A&lt;/span&gt;GC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;A&lt;/span&gt;CCG&lt;span style="color: red;"&gt;A&lt;/span&gt;CC&lt;span style="color: red;"&gt;T&lt;/span&gt;C&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGCCC&lt;span style="color: red;"&gt;A&lt;/span&gt;CGCC&lt;span style="color: red;"&gt;T&lt;/span&gt;CCCGGC&lt;span style="color: red;"&gt;A&lt;/span&gt;GGG&lt;span style="color: red;"&gt;TTAT&lt;/span&gt;G&lt;span style="color: red;"&gt;A&lt;/span&gt;G&lt;span style="color: red;"&gt;AT&lt;/span&gt;GGCC&lt;span style="color: red;"&gt;AT&lt;/span&gt;CC&lt;span style="color: red;"&gt;A&lt;/span&gt;C
GCC&lt;span style="color: red;"&gt;AT&lt;/span&gt;CGGGG&lt;span style="color: red;"&gt;A&lt;/span&gt;CGCGGCGG&lt;span style="color: red;"&gt;T&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;CG&lt;span style="color: red;"&gt;A&lt;/span&gt;CG&lt;span style="color: red;"&gt;T&lt;/span&gt;CGCCGCGGCC&lt;span style="color: red;"&gt;TT&lt;/span&gt;CGCGC&lt;span style="color: red;"&gt;A&lt;/span&gt;C&lt;span style="color: red;"&gt;T&lt;/span&gt;CGGG&lt;span style="color: red;"&gt;T&lt;/span&gt;GCCGCCGGGCG&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
For whatever reason (and that's the point: we have no idea why), &lt;i&gt;Actinomyces &lt;/i&gt;has chosen an AT-poor dialect for its DNA, even though it has to make many of the same types of genes as &lt;i&gt;Clostridium&lt;/i&gt;. &lt;br /&gt;
&lt;br /&gt;
Some people don't see this as a major puzzle: One organism evolved its DNA to a super-AT-rich state, another one didn't. So what? It's all random drift.&lt;br /&gt;
&lt;br /&gt;
I disagree. It's not drift. We know of two strong forces that should keep organisms like &lt;i&gt;Actinomyces&lt;/i&gt; from developing high G+C content. First is "AT pressure." It's known that mutations naturally tend to go in the GC--&amp;gt;AT direction. (&lt;a href="http://ateson.com/ws/r/www.pnas.org/content/105/46/17878.full" target="_blank"&gt;One study found&lt;/a&gt; that in &lt;i&gt;Salmonella typhimurium&lt;/i&gt;, GC--&amp;gt;AT mutations outnumbered AT--&amp;gt;GC mutations 50 to 1.) In the absence of corrective measures, natural mutations would very quickly lead all organisms in the direction of DNA with a very low G+C content.&lt;br /&gt;
&lt;br /&gt;
A second important force is that of lateral gene transfer, which we know is common in microorganisms; common enough, certainly, to "even out" GC/AT ratios over evolutionary timescales. Random uptake of foreign genes by cells should tend to make A, G, C, and T levels equal, over time. For organisms like &lt;i&gt;Clostridium &lt;/i&gt;and &lt;i&gt;Actinomyces &lt;/i&gt;(and many others), this clearly hasn't happened.&lt;br /&gt;
&lt;br /&gt;
In an &lt;a href="http://asserttrue.blogspot.com/2013/05/dna-gc-content-and-survival-value.html" target="_blank"&gt;earlier post&lt;/a&gt; I mentioned one possible reason organisms drift away from the 50-50 GC/AT centerline. DNA replication is more efficient when the template is biased toward one extreme (GC) or the other (AT), assuming endogenous nucleotide levels can be regulated in a similarly biased fashion (which they presumably are, in these organisms).&lt;br /&gt;
&lt;br /&gt;
One might speculate that GC/AT extremism also simplifies DNA maintenance and repair. Imagine that your DNA is 70% G+C. A super-simple DNA repair tactic for deaminated purines would be to just replace every defective purine with a guanine. Seven out of ten times, blind replacement of defective purines with guanine would be the correct repair, if you're &lt;i&gt;Actionymyces&lt;/i&gt;. And one out of three times, mistakes wouldn't matter anyway, because high-GC &lt;a href="http://en.wikipedia.org/wiki/DNA_codon_table" target="_blank"&gt;codons&lt;/a&gt; tend to be &lt;a href="http://en.wikipedia.org/wiki/Genetic_code#Degeneracy" target="_blank"&gt;fourfold degenerate.&lt;/a&gt; (In a fourfold degenerate codon, you can replace the third base with anything—A, G, C, or T—without changing the codon's meaning.) Blind guanine substitution would have a better than 80% success rate in a high-GC organism that needed to replace defective purines.&lt;br /&gt;
&lt;br /&gt;
It turns out there are other reasons to live "away from centerline," if you're a bacterium. I'll talk about those in another post.&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/2499585145312271639/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/parsing-dna-crazy-quilt.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2499585145312271639?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/2499585145312271639?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/parsing-dna-crazy-quilt.html" title="Parsing the DNA Crazy Quilt" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;C08EQHg5eyp7ImA9WhBaEEw.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-8441150757625461812</id><published>2013-05-18T00:30:00.000-04:00</published><updated>2013-05-19T21:03:21.623-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T21:03:21.623-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Claude Shannon" /><category scheme="http://www.blogger.com/atom/ns#" term="entropy" /><category scheme="http://www.blogger.com/atom/ns#" term="information theory" /><title>Information Theory in Three Minutes</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Claude_Shannon" target="_blank"&gt;Claude Shannon&lt;/a&gt;, the father of information theory, used to play an interesting game at cocktail parties. He'd grab a book, open it to a random page, and cover up all but the first letter on the page, then ask someone to guess the next letter. If the person couldn't guess, he'd uncover the letter, then ask the person to guess the &lt;i&gt;next&lt;/i&gt; letter. (Suppose the first two letters are 'th'. A reasonable guess for the next letter might be 'e'.) Shannon would continue in this manner, keeping score, until a good deal of text had been guessed. The further along one goes in this game, the easier it becomes (of course) to guess downstream letters, because the upstream letters provide valuable context.&lt;br /&gt;
&lt;br /&gt;
What Shannon consistently found from experiments of this sort is that well over half of English letters are redundant, because they can be guessed in advance. In fact, Shannon found that when all forms of redundancy are taken into account, English is more than 75% redundant, with the average information content of a letter being approximately one bit per symbol. (Yes, one bit. See &lt;a href="http://languagelog.ldc.upenn.edu/myl/Shannon1950.pdf" target="_blank"&gt;Shannon's "Prediction and Entropy of Printed English&lt;/a&gt;.")&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-iEQwIr0YbO8/UZemgsX5NrI/AAAAAAAABqY/BqgFk0JMi9E/s1600/Claude_Elwood_Shannon_(1916-2001).jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://4.bp.blogspot.com/-iEQwIr0YbO8/UZemgsX5NrI/AAAAAAAABqY/BqgFk0JMi9E/s200/Claude_Elwood_Shannon_(1916-2001).jpg" width="141" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Claude Shannon&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
Shannon became intrigued by questions involving the efficiency of information transfer. What is the nature of redundancy in an information stream? Are some encodings more redundant than others? How can you quantify the redundancy? Eventually, Shannon elaborated a mathematical &lt;a href="http://en.wikipedia.org/wiki/Information_theory" target="_blank"&gt;theory&lt;/a&gt; around the encoding and decoding of messages. That theory has since become extremely important for understanding questions of encryption, compression, detection of faint signals in the presence of noise, recovery of damaged signals, and so on.&lt;br /&gt;
&lt;br /&gt;
A central concept in Shannon's theory is that of &lt;a href="http://en.wikipedia.org/wiki/Entropy_(information_theory)" target="_blank"&gt;entropy&lt;/a&gt;. "Shannon entropy" is very widely misunderstood and/or misinterpreted, so it's important to be clear on what it's &lt;i&gt;not&lt;/i&gt;. It's &lt;i&gt;not &lt;/i&gt;disorder: Entropy, in information theory, is not the same as entropy in thermodynamics, even though the mathematics are similar. Shannon liked to consider entropy a statistical parameter reflecting the amount of information (or resolved uncertainty) encoded, on average, by a symbol. We think of the English alphabet as having 26 symbols. Since 26 values can be encoded in log&lt;sub&gt;2&lt;/sub&gt;(26) == 4.7 bits, we say that the channel bandwidth for 26-letter English is 4.7 bits per symbol, but this is not the entropy. Shannon found that the entropy (the &lt;i&gt;actual &lt;/i&gt;bits used per symbol) was closer to 1.0 than to 4.7. How can this be? The answer has to do with the fact that some symbols are used far more often than others; and also (as noted), some symbols are redundant by virtue of context. &lt;br /&gt;
&lt;br /&gt;
Entropy gets to the actual (rather than ideal) information content of a message by taking into account actual frequencies of usage of symbols. If English text used all letters of the alphabet equally (and unpredictably), then the entropy of text would be exactly 4.7 bits per symbol. Each symbol would contribute 1/26th of -log&lt;sub&gt;2&lt;/sub&gt;(1/26) to the total. But because some letters are used more or less frequently than others, they contribute more or less than 1/26th of log&lt;sub&gt;2&lt;/sub&gt;(1/26), and that total can add up to less than 4.7.&lt;br /&gt;
&lt;br /&gt;
It's easy to visualize this with a simple example involving coin-tossing. Suppose, for sake of example, that a series of coin tosses comprises a message. As a medium of communication, the coin toss is capable of expressing only two states: heads, or tails. This could be represented in binary form as 1 and 0. If half of all tosses are heads and half are tails, then the total entropy in the message is 0.5 * log&lt;sub&gt;2&lt;/sub&gt;(0.5) for heads plus 0.5 * log&lt;sub&gt;2&lt;/sub&gt;(0.5) for tails, or one bit per symbol (Note: If you actually do the math you'll come up with a negative-1. Hence, in entropy calculations, the result is usually multiplied by -1 so it can be expressed as a positive number.)&lt;br /&gt;
&lt;br /&gt;
Consider now the situation of a two-headed coin. In this case, there is no "tails" term and the heads term is 1.0 * log&lt;sub&gt;2&lt;/sub&gt;(1.0), or zero. This means the tossing of a two-headed coin resolves no uncertainty and carries no information.&lt;br /&gt;
&lt;br /&gt;
Continuing the example, consider the case of a weighted penny that falls heads-up two-thirds of the time. Intuitively, we know that this kind of coin toss can't possibly convey as much information as a "fair" coin toss. And indeed, if we calculate 2/3 * log&lt;sub&gt;2&lt;/sub&gt;(2/3) for heads plus 1/3 * log&lt;sub&gt;2&lt;/sub&gt;(1/3) for tails, we get an entropy value of 0.9183 bits per symbol, which means that each toss is (on average) 1.0 - 0.9183 == .0817 or 8.17% redundant. If one were to take a large number of coin tosses involving the weighted penny and convert those tosses into symbols ('h' for heads and 't' for tails, say), the resulting data stream would be compressible to 91.83% of its fully expanded size, and then it wouldn't compress any more beyond that, because that's the entropy limit.&lt;br /&gt;
&lt;br /&gt;
Actually, that last statement needs to be qualified. We're assuming, throughout this example, that the result of any given coin toss does not depend on the outcome of the preceding toss. If that rule is violated, then the true entropy of the "message" could be much lower than 0.9183 bits per symbol. For example, suppose the result of 12 successive coin-tosses were: h-h-t-h-h-t-h-h-t-h-h-t. There's a recurring pattern, and the pattern makes the stream predictable. Predictability reduces entropy; remember Shannon's cocktail-party experiment. (You might ask yourself what a message with all possible redundancy removed would look like, and in what way or ways, if any, it would differ from apparent randomness.)&lt;br /&gt;
&lt;br /&gt;
Technically speaking, when symbols represent independent choices (not depending on what came before), the entropy can be calculated as before, and it's called the &lt;i&gt;order-zero&lt;/i&gt; entropy. But if any given symbol depends on the value of the immediately preceding symbol, we have to distinguish between order-zero and order-one entropy. There are also order-two, order-three, and higher-order entropies, representing contexts of contexts.&lt;br /&gt;
&lt;br /&gt;
Suppose now I tell you that an organism's DNA can contain only two types of base-pairs: GC and AT. (You should be thinking "coin toss.") Suppose, further, I tell you that a particular organism's DNA is 70% GC. Disregarding higher-order entropy, does the DNA contain redundancy? If so, how much? Answer: 0.7 * log&lt;sub&gt;2&lt;/sub&gt;(0.7) for GC plus 0.3 * log&lt;sub&gt;2&lt;/sub&gt;(0.3) for AT equals 0.8813, meaning redundancy is about 12%. Could the actual redundancy be higher? Yes. It depends what kinds of recurring patterns exist in the actual sequence of A, G, C, and T values. There might be recurring motifs of many kinds. Each would send entropy lower.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Further Reading&lt;/b&gt;&lt;br /&gt;
&lt;a href="http://cm.bell-labs.com/cm/ms/what/shannonday/shannon1948.pdf" target="_blank"&gt;Shannon's best-known paper&lt;/a&gt;, "A Mathematical Theory of Communication," &lt;i&gt;Bell Systems Tech. Journal&lt;/i&gt;, October 1948&lt;br /&gt;
&lt;a href="http://dspace.mit.edu/bitstream/handle/1721.1/11173/34541425.pdf?sequence=1"&gt;"A Symbolical Analysis of Relay and Switching Circuits,"&lt;/a&gt; Shannon's unpublished master's thesis &lt;br /&gt;
&lt;a href="http://en.wikipedia.org/wiki/Shannon_number" target="_blank"&gt;Claude Shannon's contribution to computer chess&lt;/a&gt;&lt;br /&gt;
&lt;a href="http://en.wikipedia.org/wiki/Nyquist%E2%80%93Shannon_sampling_theorem" target="_blank"&gt;Shannon-Fano coding&lt;/a&gt;&lt;br /&gt;
&lt;a href="http://en.wikipedia.org/wiki/Nyquist%E2%80%93Shannon_sampling_theorem" target="_blank"&gt;Nyquist-Shannon Sampling Theorem&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/8441150757625461812/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/information-theory-in-three-minutes.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/8441150757625461812?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/8441150757625461812?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/information-theory-in-three-minutes.html" title="Information Theory in Three Minutes" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-iEQwIr0YbO8/UZemgsX5NrI/AAAAAAAABqY/BqgFk0JMi9E/s72-c/Claude_Elwood_Shannon_(1916-2001).jpg" height="72" width="72" /><thr:total>1</thr:total></entry><entry gd:etag="W/&quot;CEUEQ3o5fyp7ImA9WhBbFUQ.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-7311936253342528427</id><published>2013-05-15T00:30:00.000-04:00</published><updated>2013-05-15T00:30:02.427-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-15T00:30:02.427-04:00</app:edited><title>Back Pain: An Infectious Process?</title><content type="html">I wrote a &lt;a href="http://bigthink.com/devil-in-the-data/antibiotics-for-back-pain" target="_blank"&gt;piece&lt;/a&gt; for &lt;a href="http://bigthink.com/" target="_blank"&gt;Big Think&lt;/a&gt; the other day about the &lt;a href="http://www.scribd.com/doc/140126317/European-Spine-Journal-2" target="_blank"&gt;recent finding&lt;/a&gt; that many cases of back pain are septic in nature: the pain comes from propionic acid (and other acids) released by anaerobic bacteria that have found their way into spinal-disc tissues. The principal offender is something called &lt;a href="http://www.ncbi.nlm.nih.gov/books/NBK83685/" target="_blank"&gt;&lt;i&gt;Propionibacterium acnes&lt;/i&gt;&lt;/a&gt;, a common mouth and skin germ that also often can be found in lung tissue.&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-sUQcaKbYgGs/UZJwqITpBuI/AAAAAAAABqI/tOZSKPUaDYw/s1600/propioni.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://3.bp.blogspot.com/-sUQcaKbYgGs/UZJwqITpBuI/AAAAAAAABqI/tOZSKPUaDYw/s200/propioni.jpg" width="183" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;i&gt;Propionibacterium acnes&lt;/i&gt; can take on&lt;br /&gt;
an intracellular lifestyle.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;i&gt;Propionibacterium &lt;/i&gt;is, for most of us, a harmless stowaway. It is characterized as a "low virulence" organism, meaning it doesn't aggressively pathologize the host by default, the way (for example) a tuberculosis bacterium does. But for certain individuals, under certain conditions, &lt;i&gt;Propionibacterium &lt;/i&gt;can be a major hazard. In addition to causing acne (and in severe cases, an accompanying arthritis),  &lt;i&gt;P. acnes&lt;/i&gt; is also seen in post-operative infections, prosthesis failure, breast-implant infection, corneal infection, sarcoidosis, bacteremia, and inflammation of lumbar nerves. Its involvement in sarcoidosis is controversial. What seems to be happening is that a special protein (a "trigger factor"), secreted by &lt;i&gt;P. acnes&lt;/i&gt;, stimulates a cellular immune response in sensitive individuals. The macrophages that arrive to attack &lt;i&gt;P. acnes&lt;/i&gt; become overwhelmed by the bacteria as they go into intracellular-parasite mode. Granulomas then form as &lt;i&gt;P. acnes&lt;/i&gt; takes up residence in the aggregated macrophages. (More about P. acnes's role in sarcoidosis can be found in the &lt;a href="http://www.sciencedirect.com/science/article/pii/S2212534513000051" target="_blank"&gt;March 2013 paper&lt;/a&gt; by Eishi in &lt;i&gt;Respiratory Investigation&lt;/i&gt;.)&lt;br /&gt;
&lt;br /&gt;
The immunological response triggered by &lt;i&gt;P. acnes&lt;/i&gt; can be far-reaching. In the 1980s, back when &lt;i&gt;P. acnes&lt;/i&gt; was called &lt;i&gt;Corynebacterium parvum&lt;/i&gt;, researchers &lt;a href="http://cancerres.aacrjournals.org/content/39/9/3554.long" target="_blank"&gt;found&lt;/a&gt; that killed suspensions of the bacteria, injected into mice, caused 80% to 100% suppression of tumor growth. The dead bacteria stimulated the murine immune system to the point where mice could fight off cancer. Why this technique has not been used for human cancer treatment, I don't know. (It might be because it's too cheap and too easy. What do you think?)&lt;br /&gt;
&lt;br /&gt;
In recent years, researchers have been finding &lt;i&gt;P. acnes&lt;/i&gt; in the lumbar discs of back patients, typically at the rate of 40% to 50%. (About half of patients don't have the bacterium.) Simply finding the bacterium in discs doesn't prove a causal role for &lt;i&gt;P. acnes&lt;/i&gt; in back pain, of course, but in a double-blind randomized controlled &lt;a href="http://link.springer.com/article/10.1007/s00586-013-2675-y#page-2" target="_blank"&gt;trial&lt;/a&gt; involving back patients who got either placebo or amoxicillin for 100 days, the amoxicillin-treated patients did better (both over the 100 days and a year later), which tends to suggest that &lt;i&gt;P. acnes&lt;/i&gt; might well be playing a causal role in back pain. &lt;br /&gt;
&lt;br /&gt;
People hurt their backs (to a greater or lesser degree) all the time without experiencing huge pain or lasting damage, but in a certain proportion of cases, disc herniation leads to Type 1 Modic Change (so-called bone edema) in nearby vertebrae, and at that point you're almost guaranteed to be in excruciating pain. But antibiotics might obviate the need for surgery, in at least some cases.&lt;br /&gt;
&lt;br /&gt;
The nuclear material of intervertebral discs is an ideal place for &lt;i&gt;P. acnes&lt;/i&gt; (an anaerobe) to grow, because it's warm, nutrient-rich, and (with no vascular content) oxygen-depleted. The question of how &lt;i&gt;P. acnes&lt;/i&gt; finds its way into a disc in the first place is an interesting one (which I discuss in &lt;a href="http://bigthink.com/devil-in-the-data/antibiotics-for-back-pain" target="_blank"&gt;my post at Big Think&lt;/a&gt;). The short answer is, there's a ton of &lt;i&gt;P. acnes&lt;/i&gt; in your mouth, especially if you happen to be (how shall we say?)  not very attentive to oral hygiene, and bacteria can enter the bloodstream directly via the gums when you brush your teeth or have them professionally cleaned (or when a dentist picks and pokes at your teeth with one of those sharp pointy thingies). Almost any dental event, even vigorous brushing, can lead to a transient bactermia. Your spleen and white blood cells will clear bacterial cells from your blood very quickly, of course, and there are factors in your blood that are chemotoxic to most bacteria, but if a few &lt;i&gt;P. acnes&lt;/i&gt; cells happen to stay in your blood long enough to find an inflammation zone in your body (where they can take up residency), you could be in trouble. By "inflammation zone," I mean an inflamed joint, a catheter or shunt, an implant of any kind, or any irritated tissue, really. Did you recently hurt your back? That counts.&lt;br /&gt;
&lt;br /&gt;
Because even tooth brushing poses a &lt;a href="http://circ.ahajournals.org/content/117/24/3118.short" target="_blank"&gt;significant risk of bacteremia&lt;/a&gt;, you may want to consider investing in a stock of mouthwash and using it before every brushing, to cut the live-bacteria count down and thus reduce your risk of lumbar disc infection, endocarditis, sarcoidosis, acne, and other bacteremic sequelae involving &lt;i&gt;P. acnes&lt;/i&gt;. If you think I'm being alarmist, fine; you're entitled to your opinion. For me, it's mouthwash five times a day.&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/7311936253342528427/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/back-pain-infectious-process.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7311936253342528427?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7311936253342528427?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/back-pain-infectious-process.html" title="Back Pain: An Infectious Process?" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-sUQcaKbYgGs/UZJwqITpBuI/AAAAAAAABqI/tOZSKPUaDYw/s72-c/propioni.jpg" height="72" width="72" /><thr:total>3</thr:total></entry><entry gd:etag="W/&quot;DkQCQX8_eCp7ImA9WhBbFEU.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-6339554419515807204</id><published>2013-05-11T12:15:00.000-04:00</published><updated>2013-05-13T18:32:40.140-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-13T18:32:40.140-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mutation" /><category scheme="http://www.blogger.com/atom/ns#" term="Carsonella ruddii" /><category scheme="http://www.blogger.com/atom/ns#" term="genome" /><category scheme="http://www.blogger.com/atom/ns#" term="GC:AT ratio" /><category scheme="http://www.blogger.com/atom/ns#" term="GC content" /><category scheme="http://www.blogger.com/atom/ns#" term="bacterial genetics" /><category scheme="http://www.blogger.com/atom/ns#" term="E. coli" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="replication" /><category scheme="http://www.blogger.com/atom/ns#" term="deamination" /><category scheme="http://www.blogger.com/atom/ns#" term="Anaeromyxobacter" /><category scheme="http://www.blogger.com/atom/ns#" term="G+C" /><title>DNA G+C Content and Survival Value</title><content type="html">One of biology's big open questions is why organisms differ so much with regard to the relative amounts of GC and AT in their DNA. You'd think that if there are only two kinds of DNA base pairs (see diagram) they'd be more-or-less equally abundant. Not so. There are organisms with DNA that's &lt;i&gt;mostly &lt;/i&gt;GC (and/or CG) pairs; there are organisms with very-AT-rich DNA; and within the chromosomes of higher organisms you find large GC-rich regions (isochores) in the midst of great swaths of AT-rich DNA.&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-0iluQfp9e30/UY4pGvraDqI/AAAAAAAABnQ/twRl2NR9SS0/s1600/400px-AT-GC.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-0iluQfp9e30/UY4pGvraDqI/AAAAAAAABnQ/twRl2NR9SS0/s320/400px-AT-GC.jpg" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;DNA contains adenine and thymine in equal amounts, and &lt;br /&gt;
guanine and cytosine in equal amounts, but it does not &lt;br /&gt;
usually contain GC pairs and AT pairs in equal amounts. And&lt;br /&gt;
it doesn't seem as if there is an "optimum" GC:AT ratio. The &lt;br /&gt;
GC:AT ratio varies by species. Within a species, it's constant.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
There are two really odd facts at work here:&lt;br /&gt;
&lt;br /&gt;
1. The GC content of DNA varies by species, and it varies a lot. &lt;br /&gt;
&lt;br /&gt;
2. Evolution doesn't seem to trend toward an "optimum CG:AT ratio" of any kind.&lt;br /&gt;
&lt;br /&gt;
If there were such thing as an optimum GC:AT ratio for DNA, surely microorganisms would've figured it out by now. Instead, we find huge diversity: There are bacteria on every point in the GC% spectrum, running from 16% GC for the DNA of &lt;a href="http://en.wikipedia.org/wiki/Candidatus_Carsonella_ruddii" target="_blank"&gt;&lt;i&gt;Candidatus Carsonella ruddii&lt;/i&gt;&lt;/a&gt; (a symbiont of the jumping plant louse) to 75% for &lt;a href="http://microbewiki.kenyon.edu/index.php/Anaeromyxobacter_dehalogenans" target="_blank"&gt;&lt;i&gt;Anaeromyxobacter dehalogenans&lt;/i&gt;&lt;/a&gt; 2CP-C (a soil bacterium). At each end of the spectrum you find aerobes and anaerobes; extremophiles and blandophiles; pathogens and non-pathogens. About the only generalization you can make is that the smaller an organism's genome is, the more likely it is to be rich in A+T (low GC%). &lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-wne70sOdrto/UY417aNwGfI/AAAAAAAABno/qCD9SK8hhLE/s1600/GCchart.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="296" src="http://1.bp.blogspot.com/-wne70sOdrto/UY417aNwGfI/AAAAAAAABno/qCD9SK8hhLE/s400/GCchart.jpg" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Genome size correlates loosely with GC content. The very smallest &lt;br /&gt;
bacteria tend to have AT-rich (low GC%) DNA.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
The huge diversity in GC:AT ratios among bacteria is impressive. But does it simply represent a random walk all over the possibility-space of DNA? Or do the various points on the spectrum constitute special niches with important advantages? What advantage could there be for having high-GC% DNA? Or high-AT% DNA?&lt;br /&gt;
&lt;br /&gt;
Some subtle clues tell us that this is not just random deviation from the mean. First, suppose we agree for sake of argument that lateral gene transfer (LGT) is common in the microbial world (a point of view I happen to agree with). Over the course of millions of years, with pieces of DNA of all kinds (high GC%, low GC%) flying back and forth, LGT should force a regression to the mean: It should make genomes tend toward a 50-50 GC:AT ratio. That clearly hasn't happened.&lt;br /&gt;
&lt;br /&gt;
And then there's ordinary mutational pressures. It's beginning to be fairly well accepted (see Hershberg and Petrov, "Evidence That Mutation is Universally Biased Toward AT in Bacteria," &lt;i&gt;PLoS Genetics&lt;/i&gt;, 2010, 6:9, e1001115, full version &lt;a href="http://www.plosgenetics.org/article/info:doi/10.1371/journal.pgen.1001115" target="_blank"&gt;here&lt;/a&gt;) that natural mutation is strongly biased in the direction of AT by virtue of the fact that deamination of cytosine and methylcytosine (which occurs spontaneously at high frequency) leads to replacement of 'C' with 'T', hence GC pairs becoming AT pairs. The strong natural mutational bias toward AT says that &lt;i&gt;all &lt;/i&gt;DNA should creep in the direction of low GC% and end up well below 50% GC. But again,&lt;i&gt; this is not what we see. &lt;/i&gt;We see that high-GC organisms like &lt;i&gt;Anaeromyxobacter &lt;/i&gt;(and many others) maintain their DNA's unusually high (75%) GC content across millions of generations. Even middle-of-the-road organisms like &lt;i&gt;E. coli&lt;/i&gt; (with 50% GC content) don't slowly slip in the direction of high-AT/low-GC. &lt;br /&gt;
&lt;br /&gt;
Clearly, something funny is going on. For a super-high-GC organism like &lt;i&gt;Anaeromyxobacter &lt;/i&gt;to maintain its DNA's super-high GC content against the constant tug of mutations in the AT direction, it must be putting significant energy into maintaining that high GC percentage. But why? Why pay extra to maintain a high GC%? And how does the cost get paid?&lt;br /&gt;
&lt;br /&gt;
I think I've come up with a possible answer. It has to do with DNA replication cost, where "cost" is figured in terms of time needed to synthesize a new copy of the DNA (for cell division). Anything that favors low replication cost (high replication speed) should favor survival; that's my main assumption.&lt;br /&gt;
&lt;br /&gt;
My other assumption is that DNA polymerases (the enzymes involved in replication) are not clairvoyant. They can't know, until the need arises, which of the four deoxyribonucleotide triphosphates (dATP, dTTP, dGTP, dCTP) will be needed at a given moment, to elongate the new strand of DNA. When the need arises for (let's say) an 'A', the 'A' (in the form of dATP) has to come from an existing endogenous pool of dNTPs containing all four bases (dATP, dTTP, dGTP, dCTP) in whatever concentrations they're in. The enzyme has to wait until a dATP (if that's what's needed) randomly happens to lock into the active site. Odds are only one in four (assuming equal concentrations of dNTPs) of a dATP coming along at exactly the right moment. Odds are 3 out of 4 that some incorrect dNTP (either dGTP, dTTP, or dCTP) will try, and fail, to fit the active site first, before dATP comes along.&lt;br /&gt;
&lt;br /&gt;
But imagine that your DNA is 75% G+C. And suppose you've regulated your intracellular metabolism to maintain dGTP and dCTP in a 3:1 ratio over dATP and dTTP. The odds of a good random "first hit" go up.&lt;br /&gt;
&lt;br /&gt;
To simulate the various possibilities, I wrote software (in JavaScript) that simulates DNA replication, where the template DNA molecule is 1000 base-pairs in length and the dNTP pool size is 10000 bases. The software allows you to set the organism's genome GC% to whatever you want, and also set the dNTP pool's relative GC percentage to whatever you want. The template DNA is just a random string of A, T, G, and C bases (1000 total), reflecting their relative abundances as set in the GC% parameter. The pool of dNTPs is set up to be a randomized array (again reflecting abundances set in a GC% parameter).&lt;br /&gt;
&lt;br /&gt;
The way the software works is this. Read a base off the template. Fetch a base randomly from the base pool. If the base happens to be the one (out of four) that's called for, score '1' for the timing parameter, and continue to read another base off the template. If the base was &lt;i&gt;not&lt;/i&gt; the one that's called for, put it back in the pool array in a random location, then randomly fetch another base from the pool; and increment the timing parameter. (For each fetch, the timing parameter goes up by 1.) Keep fetching (and throwing back bases) until the proper base comes up, incrementing the time parameter as appropriate. (The time parameter keeps track of the number of fetch attempts.) When the correct base turns up, the pool shrinks by one base. In other words, replication consumes the pool, but as I said earlier, the pool contains ten times as many bases (to start) as the DNA template. So the pool ends up 10% smaller at the end of replication.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-0tkXOkG0p-8/UY5aPF8VEfI/AAAAAAAABpU/xlNlnYH47Qw/s1600/FINAL+25-75-GC-organisms+with+%5BGC%5D+pool+on+x.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/-0tkXOkG0p-8/UY5aPF8VEfI/AAAAAAAABpU/xlNlnYH47Qw/s400/FINAL+25-75-GC-organisms+with+%5BGC%5D+pool+on+x.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Each point on this graph represents the average of 100 Monte Carlo runs, each run representing complete replication of a 1000-bp DNA template, drawing from a pool of 10,000 bases. The blue points are runs that used a DNA template containing 25% G+C content. The red points are runs that used DNA with 75% G+C. The X-axis represents different base-pool compositions. See text for details. Click for larger image.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
I ran Monte Carlo simulations for DNA templates having GC contents of 75%, 50%, and 25%, using base pools set up to have anywhere from 15% GC to 85% (in 2.5% increments). The results for the 75% GC and 25% GC templates (representing high- and low-GC organisms) are shown in the above graph. Each point on the graph represents the average of 100 complete replication runs. The Y-axis shows the average number of fetches per DNA base (so, a low value means fast replication; a high value means slower DNA replication). The X-axis shows the percentage of GC in the base-pool, in recognition of the fact that relative dNTP abundances in an organism may vary, in accordance with environmental constraints as well as with organism-specific homeostatic setpoints.&lt;br /&gt;
&lt;br /&gt;
Maximal replication speed (the low point of each curve) happens at a base-pool GC percentage that is displaced in the direction of the DNA's own GC%. So, for the 25%-GC organism (blue data points), max replication efficiency comes when the base-pool is about 33% GC. For the 75% GC organism (red points) the sweet spot is at a base-pool GC concentration of 65%. (Why this is not exactly symmetrical with the other curve, I don't know; but bear in mind, these are Monte Carlo runs. Some variation is to be expected.)&lt;br /&gt;
&lt;br /&gt;
The interesting thing to note is that max replication efficiency, for each organism, comes at 3.73 fetches per base-pair (Y-axis). Cache that thought. It'll be important in a minute.&lt;br /&gt;
&lt;br /&gt;
The real jaw-dropper is what happens when you plot a curve for template DNA with 50% GC content. In the graph below, I've shown the 50%-GC runs as black points. (The red and blue points are exactly as before.)&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-NON3aj2Hgi4/UY5fHuPx1MI/AAAAAAAABps/dN4muKwkmuI/s1600/FINAL+25-50-75-GC-organisms+with+%5BGC%5D+pool+on+x+COMBO+GRAPH.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/-NON3aj2Hgi4/UY5fHuPx1MI/AAAAAAAABps/dN4muKwkmuI/s400/FINAL+25-50-75-GC-organisms+with+%5BGC%5D+pool+on+x+COMBO+GRAPH.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;This is the same graph as before, but with replication data for a 50%-GC genome (black points). Again, each data point represents the average of 100 Monte Carlo runs. Notice that the black curve bottoms out at a higher level (4.0) than the red or blue curves (3.73). This means replication is less efficient for the 50%-GC genome.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
Notice that the best replication efficiency comes in the middle of the graph (no big surprise), but check the Y-value: 4.00. The very fastest DNA replication, when the DNA template is 50% GC, requires 4 fetches per base, compared to best-case base-fetching efficiency of 3.73 for the 25%-GC and 75%-GC DNAs.What does this mean? It means DNA replication, in a best-case scenario, is 4.25% more efficient for the skewed-GC organisms. (The difference between 3.73 and 4.00 is 4.25%.)&lt;br /&gt;
&lt;br /&gt;
This goes a long way toward explaining why GC extremism is stable in organisms that pursue it. There is replication efficiency to be had in keeping your DNA biased toward high or low GC. (It doesn't seem to matter which.)&lt;br /&gt;
&lt;br /&gt;
Consider the dynamics of an ATP drawdown. The energy economy of a cell revolves around ATP, which is both an energy molecule and a source for the adenine that goes into DNA and RNA. One would expect normal endogenous concentrations of ATP to be high relative to other NTPs. For a low-GC% organism, that's also a near-ideal situation for DNA replication, because high AT in the base pool puts you near the max-replication-speed part of the curve (see blue points). A sudden drawdown in ATP (when the cell is in crisis) shifts replication speed to the right-hand part of the blue curve, slowing replication significantly. This is what you want if you're an intracellular symbiont (or a mitochondrion, incidentally). You want to stop dividing when the host cell is unable to divide because of an energy crisis.&lt;br /&gt;
&lt;br /&gt;
Consider the high-GC organism (red dots), on the other hand. If ATP levels are high during normal metabolism, replication is not as efficient as it could be, but so what? It just means you're willing to tolerate less-efficient replication in good times. But as ATP draws down (perhaps because nutrients are becoming scarce), DNA replication actually becomes more efficient. This is what you want if you're a free-living organism in the wild. You want to be able to continue replicating your DNA even as ATP becomes scarce. And indeed that's what happens (according to the red data points): As the base pool becomes more GC-rich, replication efficiency increases. The best efficiency comes when base-pool A+T is down around 35%.&lt;br /&gt;
&lt;br /&gt;
I think these simulations are meaningful and I think they help explain the DNA-composition extremism seen among microorganisms. If you're a professional scientist and you find these results tantalizing, and you'd like to co-author a paper for &lt;i&gt;PLoS Genetics&lt;/i&gt; (or another journal), please get in touch. (My Google mail is kas-dot-e-dot-thomas.) I'd like to coauthor with someone who is good with statistics, who can contribute more ideas to this line of investigation. I think these results are worth sharing with the scientific community at large.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/6339554419515807204/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/dna-gc-content-and-survival-value.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/6339554419515807204?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/6339554419515807204?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/dna-gc-content-and-survival-value.html" title="DNA G+C Content and Survival Value" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-0iluQfp9e30/UY4pGvraDqI/AAAAAAAABnQ/twRl2NR9SS0/s72-c/400px-AT-GC.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DkcHQXk4cSp7ImA9WhBUGEo.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-7494228526453430991</id><published>2013-05-06T00:30:00.001-04:00</published><updated>2013-05-06T17:00:30.739-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-06T17:00:30.739-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="genetics" /><category scheme="http://www.blogger.com/atom/ns#" term="genome" /><category scheme="http://www.blogger.com/atom/ns#" term="tree of life" /><category scheme="http://www.blogger.com/atom/ns#" term="ribosomes" /><category scheme="http://www.blogger.com/atom/ns#" term="Ford Doolittle" /><category scheme="http://www.blogger.com/atom/ns#" term="E. coli" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="Darwin" /><category scheme="http://www.blogger.com/atom/ns#" term="hydrogen peroxide" /><category scheme="http://www.blogger.com/atom/ns#" term="evolution" /><category scheme="http://www.blogger.com/atom/ns#" term="Lateral Gene Transfer" /><title>Hydrogen Peroxide Powers Evolution</title><content type="html">I'm about to offer a conjecture that is a bit preposterous-sounding but could well hold true. I actually think it does.&lt;br /&gt;
&lt;br /&gt;
I propose that evolution, at the level of bacteria (though probably not at higher levels), is driven by hydrogen peroxide.&lt;br /&gt;
&lt;br /&gt;
This theory rests on three assumptions: One is that the creation of new bacterial species happens almost entirely via &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/14616063" target="_blank"&gt;lateral gene transfer&lt;/a&gt;, not heritable point-mutations. Secondly, bacteria (marine and terrestrial) are regularly exposed to challenges by hydrogen peroxide in the environment. Thirdly, those challenges drive lateral gene transfer.&lt;br /&gt;
&lt;br /&gt;
Evidence for the first assumption is embarrassingly abundant. If you're not up to speed on the subject, I suggest you read the excellent paper, "Lateral Gene Transfer," by Olga Zhaxybayeva and W. Ford Doolittle in &lt;i&gt;Current Biology&lt;/i&gt;, April 2011, 21:7, pp. R242-246 (unlocked copy &lt;a href="http://www.sciencedirect.com/science/article/pii/S0960982211001011" target="_blank"&gt;here&lt;/a&gt;). It's now common to find that any given bacterial species can trace a good percentage of its protein base to "ancestors" that are too far removed horizontally to be ancestors in the conventional sense.&lt;br /&gt;
&lt;br /&gt;
Consider &lt;i&gt;E. coli&lt;/i&gt;. There are hundreds of strains of &lt;i&gt;E. coli&lt;/i&gt;, with genes ranging in number from 4,100 to about 5,300 per strain. The problem is, the various strains of &lt;i&gt;E. coli&lt;/i&gt; have only about 900 genes in common (and that's far too few genes to render a fully functional &lt;i&gt;E. coli&lt;/i&gt;). The E. coli pan-genome actually takes in more than 15,000 gene families, total. Certainly, you can draw a family tree of &lt;i&gt;E. coli&lt;/i&gt; based on 16S ribosomal polymorphisms, but that doesn't explain where the 15,000 pan-genome genes came from. The "family tree" metaphor quickly breaks down if you start drawing trees based on proteins. You get many conflicting trees—all of them correct.&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-1vqb-hAJIx0/UYcV0dmXTZI/AAAAAAAABmU/_rG9Ep2xXJ0/s1600/treeoflife.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-1vqb-hAJIx0/UYcV0dmXTZI/AAAAAAAABmU/_rG9Ep2xXJ0/s320/treeoflife.jpg" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Trees like this are fiction where bacteria are concerned.&lt;br /&gt;
The tree of life is more like a net of life or web &lt;br /&gt;
of life than a directed acyclic graph.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
Where are all of the genes coming from? Other species, of course. They arrive by way of mechanisms like &lt;a href="http://en.wikipedia.org/wiki/Transformation_(genetics)" target="_blank"&gt;transformation&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/Transduction_(genetics)" target="_blank"&gt;transduction&lt;/a&gt;, and &lt;a href="http://en.wikipedia.org/wiki/Bacterial_conjugation" target="_blank"&gt;conjugation&lt;/a&gt;. all of which allow direct entry of foreign DNA into a bacterial cell. At one time it was thought that conjugation could only occur between bacteria of the same species, but it is now &lt;a href="http://jb.asm.org/content/173/21/6705.abstract" target="_blank"&gt;known&lt;/a&gt; that cross-species conjugation also occurs (as, for example, between &lt;i&gt;E. coli&lt;/i&gt; and &lt;i&gt;Streptomyces &lt;/i&gt;or &lt;i&gt;Mycobacterium&lt;/i&gt;).&lt;br /&gt;
&lt;br /&gt;
Transduction, which is where viruses package up an infected host's genes in virus capsules that are then taken up by another cell, occurs naturally in bacterial populations in response to environmental factors like ultraviolet light and hydrogen peroxide. Exposure of a virus-carrying (lysogenic) cell to UV light or peroxide can induce runaway production of virus, and in fact this mechanism &lt;a href="http://www.pnas.org/content/106/4/1234.full" target="_blank"&gt;is used by &lt;i&gt;Streptococcus &lt;/i&gt;to kill competitive &lt;i&gt;Staphylococcus &lt;/i&gt;cells&lt;/a&gt;, in a clever bit of chemical warfare. It's been known for years that hydrogen peroxide can cause many types of bacteria to shed DNA. Now we know why: Hydrogen peroxide is a signalling molecule. It signals (among other things) lysogenic bacteria to go into a &lt;i&gt;lytic cycle.&lt;/i&gt; It also signals cells to mount what's known as &lt;a href="http://en.wikipedia.org/wiki/SOS_response" target="_blank"&gt;the SOS response&lt;/a&gt;, which is a global response to oxidative challenge. Years ago, Bruce Ames and his colleagues &lt;a href="http://www.envtox.ucdavis.edu/woodlab/etx214/ames_h2o2_defense.pdf" target="_blank"&gt;showed&lt;/a&gt; that exposing &lt;i&gt;Salmonella &lt;/i&gt;to very dilute (60 micromolar) hydrogen peroxide caused the cells to differentially express 30 "SOS" proteins, including heat-shock proteins and low-fidelity DNA-repair systems. We know that hydrogen peroxide as dilute as 0.1 micromolar can induce phage (virus) production &lt;a href="http://www.int-res.com/articles/ame/18/a018p217.pdf" target="_blank"&gt;in up to 11% of marine bacteria&lt;/a&gt;. This is significant, because &lt;a href="http://www.aseanenvironment.info/Abstract/41011683.pdf" target="_blank"&gt;rainwater contains hydrogen peroxide in concentrations of 2 to 40 micromolar&lt;/a&gt;, and &lt;a href="http://link.springer.com/article/10.1023/A%3A1010738910358#page-2" target="_blank"&gt;ocean water has been known to reach millimolar levels of H2O2 after a rain storm&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
If you're wondering why rain contains hydrogen peroxide, the peroxide gets there in two ways. One is UV-frequency photochemistry (where water is cleaved to H and OH, then reforms as H2 and H2O2); the other is via ionization reactions &lt;a href="http://www.sciencedirect.com/science/article/pii/S0016703799002744#FIG3" target="_blank"&gt;caused by lightning&lt;/a&gt;. (Lightning is energetic enough to bring airborne oxygen and water to a plasma state. The resulting ionization and rearrangement of free atoms yields a certain amount of hydrogen peroxide.) The presence of H2O2 in rainwater has been confirmed many times, and in fact there's a well-preserved "fossil record" of it in polar icepacks, going back centuries. (Polar snowpacks contain from 10 to 900 ppb of H2O2; it varies seasonally, the max coming in summer.)&lt;br /&gt;
&lt;br /&gt;
Bottom line, every rain event (over land, over sea) constitutes a hydrogen peroxide challenge for microbes. Which induces viral transduction (and a release of whole-cell DNA through lysis, some of which will be inevitably be used in transformation). It also induces low-fidelity DNA repair (which is guaranteed to help evolution along). Every rain event, in other words, is a chance for evolution to do its thing. For bacteria, that means gene-sharing within &lt;i&gt;and across&lt;/i&gt; species lines.&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-UOl-8k_ikzw/UYcWxE2G-VI/AAAAAAAABmg/O1YFTL9T3eo/s1600/charles-darwin.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://1.bp.blogspot.com/-UOl-8k_ikzw/UYcWxE2G-VI/AAAAAAAABmg/O1YFTL9T3eo/s200/charles-darwin.jpg" width="190" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Darwin's theory of a tree-like ancestor basis&lt;br /&gt;
for all living things is dead wrong, at &lt;br /&gt;
least for bacteria.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
W. Ford Doolittle (who wrote a classic &lt;a href="http://books.google.com/books?hl=en&amp;amp;lr=&amp;amp;id=SMvLpiK-fgsC&amp;amp;oi=fnd&amp;amp;pg=PA119&amp;amp;dq=:+If+the+tree+of+life+fell,+would+it+make+a+sound%3F&amp;amp;ots=Xgtc7tgSCs&amp;amp;sig=4NE2gWTfMXOS_f5rhaTL5PlbRas#v=onepage&amp;amp;q=%3A%20If%20the%20tree%20of%20life%20fell%2C%20would%20it%20make%20a%20sound%3F&amp;amp;f=false" target="_blank"&gt;book chapter&lt;/a&gt; about lateral gene transfer called "If the Tree of Life Fell, Would We Recognize the Sound?") estimates that if a horizontal gene transfer occurs once every ten billion vertical replications, "it would be enough to ensure that &lt;i&gt;no &lt;/i&gt;gene in any modern genome has an unbroken history of vertical descent back to some hypothetical last universal common ancestor." (See &lt;a href="http://www.sciencedirect.com/science/article/pii/S0960982211001011" target="_blank"&gt;this article&lt;/a&gt;.)&lt;br /&gt;
&lt;br /&gt;
It's obvious (to me, at least) that every rain event carries with it the potential to cause far more gene transfers than are necessary (according to Doolittle) to make vertical inheritance fade into insignificance as an evolutionary bringer of change. The hydrogen peroxide in rain has been driving lateral gene transfer in bacteria for eons. In fact, it is arguably &lt;i&gt;the &lt;/i&gt;dominant driver of evolution in bacteria. &lt;br /&gt;
&lt;br /&gt;
Sorry, Mr. Darwin. Point mutations handed down to sons and daughters just isn't cutting it. &lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/7494228526453430991/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/hydrogen-peroxide-powers-evolution.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7494228526453430991?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7494228526453430991?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/hydrogen-peroxide-powers-evolution.html" title="Hydrogen Peroxide Powers Evolution" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-1vqb-hAJIx0/UYcV0dmXTZI/AAAAAAAABmU/_rG9Ep2xXJ0/s72-c/treeoflife.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;AkUEQnw-fip7ImA9WhBUF08.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-956187904058625161</id><published>2013-05-05T00:30:00.000-04:00</published><updated>2013-05-05T00:30:03.256-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-05T00:30:03.256-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="bacterial ecology" /><category scheme="http://www.blogger.com/atom/ns#" term="Bordetella" /><category scheme="http://www.blogger.com/atom/ns#" term="FeatView" /><category scheme="http://www.blogger.com/atom/ns#" term="FASTA" /><category scheme="http://www.blogger.com/atom/ns#" term="Polynucleobacter" /><category scheme="http://www.blogger.com/atom/ns#" term="genome" /><category scheme="http://www.blogger.com/atom/ns#" term="phylogenetic tree" /><category scheme="http://www.blogger.com/atom/ns#" term="omics" /><category scheme="http://www.blogger.com/atom/ns#" term="Ralstonia" /><category scheme="http://www.blogger.com/atom/ns#" term="microbial genetics" /><title>More Science on the Desktop</title><content type="html">&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;Not to keep harping on the amazing power of desktop &lt;a href="https://en.wikipedia.org/wiki/Omics" target="_blank"&gt;omics&lt;/a&gt; tools, but I thought I'd share a tip for those of you into genome-mining. The tip in a nutshell is that if you gang-load a bunch of &lt;a href="http://en.wikipedia.org/wiki/Fasta" target="_blank"&gt;FASTA&lt;/a&gt; sequences (DNA sequence data) into the &lt;a href="http://genomevolution.org/CoGe/FastaView.pl" target="_blank"&gt;FeatView&lt;/a&gt; form at &lt;a href="http://genomevolution.org/"&gt;http://genomevolution.org&lt;/a&gt;, then click the rather inconspicuous button labeled "Phylogeny.fr" at the bottom left of the FeatView page, you'll be taken automatically to &lt;a href="http://www.phylogeny.fr/"&gt;http://www.phylogeny.fr&lt;/a&gt;, where you'll get a realtime-generated phylogenetic tree based on the sequence data you provided in FeatView, with no effort on your part (it's truly a one-click operation). Copy and paste DNA sequences into FeatView, click one button, and 30 seconds later a tree shows up on your screen, looking (perhaps) something like this:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://4.bp.blogspot.com/-_DK1MVqXB98/UYVBOabwd-I/AAAAAAAABls/Z2g6F9DxpTE/s1600/phylo_treeGroEL-v3.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-_DK1MVqXB98/UYVBOabwd-I/AAAAAAAABls/Z2g6F9DxpTE/s1600/phylo_treeGroEL-v3.png" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;The reason I made this tree is that I wasn't satisfied with my knowledge of the relatedness of certain weird microorganisms I've recently run into. Namely:&lt;/span&gt;&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://microbewiki.kenyon.edu/index.php/Ralstonia_eutropha" target="_blank"&gt;&lt;i&gt;Ralstonia &lt;/i&gt;&lt;/a&gt;(which I &lt;a href="http://asserttrue.blogspot.com/2013/05/a-tale-of-two-microbes.html" target="_blank"&gt;mentioned yesterday&lt;/a&gt;), WEIRD BECAUSE: It turns hydrogen gas and CO2 into plastic.&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://microbewiki.kenyon.edu/index.php/Bordetella" target="_blank"&gt;&lt;i&gt;Bordetella&lt;/i&gt;&lt;/a&gt;, a bronchial infection agent; WEIRD BECAUSE: It turns out to be very similar, genetically, to &lt;i&gt;Ralstonia&lt;/i&gt;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://www.jgi.doe.gov/sequencing/why/50024.html" target="_blank"&gt;&lt;i&gt;Burkholderia&lt;/i&gt;&lt;/a&gt;, a soil organism (and human and animal pathogen), WEIRD BECAUSE: It has an unexpectedly large amount of genetic similarity to &lt;i&gt;Ralstonia &lt;/i&gt;and &lt;i&gt;Polynucleobacter&lt;/i&gt;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://www.jgi.doe.gov/sequencing/why/3616.html" target="_blank"&gt;&lt;i&gt;Polynucleobacter&lt;/i&gt;&lt;/a&gt;, a ditch-water bacterium, WEIRD BECAUSE: It can live as an intracellular parasite of freshwater ciliates or it can live independently in soil (making it potentially a great study organism for determining the genetic bases of intracellular symbiosis) &lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://microbewiki.kenyon.edu/index.php/Thiomicrospira_crunogena" target="_blank"&gt;&lt;i&gt;Thiomicrospira&lt;/i&gt;&lt;/a&gt;, a very tiny CO2- and sulfur-loving organism, WEIRD BECAUSE: It can only be found near deep-sea thermal vents (see my &lt;a href="http://asserttrue.blogspot.com/2013/05/deep-sea-vents-mosquito-connection.html" target="_blank"&gt;previous writeup&lt;/a&gt;)&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://www.plosone.org/article/info:doi/10.1371/journal.pone.0023742" target="_blank"&gt;&lt;i&gt;Polaromonas&lt;/i&gt;&lt;/a&gt;, a relatively newly discovered and still poorly understood bacterium, WEIRD BECAUSE: It is abundant in glacier ice on multiple continents. Plus it has an amazing (and totally unexpected) amount of genetic overlap with our good friend &lt;i&gt;Bordetella&lt;/i&gt;, the whooping-cough bug.&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;If you're not familiar with how bacterial classification works, let's just say it's a mess. There's a long historical tradition of classifying microorganisms based on a hodgepodge of &lt;i&gt;ad hoc&lt;/i&gt; methods involving everything from physical appearance under the microscope (especially after staining with crystal violet), to the habitat of the organism, to its ability to metabolize various substances, its ability to make spores, adaptation to oxygen or lack of oxygen, serological characteristics, etc. It's always been an error-prone system, resulting in many misclassifications and later corrections, owing to its inconsistency and basic irrationality, to put it bluntly. With the advent of molecular genetic techniques, it's now possible to create accurate phylogenies based on little more than DNA sequence differences, usually involving the 16S ribosomal RNA (more &lt;a href="http://www.ncbi.nlm.nih.gov/pmc/articles/PMC207061/" target="_blank"&gt;here&lt;/a&gt;).&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://4.bp.blogspot.com/-nXZ2RncNdmw/UYVkwfRQSUI/AAAAAAAABmE/2N2ychlCT0A/s1600/euplotes1.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://4.bp.blogspot.com/-nXZ2RncNdmw/UYVkwfRQSUI/AAAAAAAABmE/2N2ychlCT0A/s200/euplotes1.jpg" width="200" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;Freshwater ciliates (like this &lt;i&gt;Euplotes&lt;/i&gt;) are &lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;home
for &lt;i&gt;Polynucleobacter &lt;/i&gt;endosymbionts.&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;As big an advance as ribosome-based phylogeny is, it's pretty far from ideal (IMHO), mainly because it ignores phenotypes. In fact it's pretty far removed from anything at all having to do with an organism's ecology, metabolism, mode of living, etc. What are we really measuring when we measure relatedness according to a 16S ribosomal yardstick? Just the rate of random mutation accumulation in a pretty uninteresting cell artifact. I'd rather have a yardstick that's tied to phenotypic reality than to a slow-to-change, "highly conserved" piece of cold dead scaffolding.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;br /&gt;So to create my own "family tree" of two dozen or so microbes, I said to hell with 16S ribosomes and decided to use, as my yardstick, genetic variation in the &lt;/span&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="https://en.wikipedia.org/wiki/GroEL" target="_blank"&gt;GroEL&lt;/a&gt; gene, which codes for the 60-kiloDalton heat-shock protein. I chose this protein (or rather, the gene for it) as my phylo-yardstick for a number of reasons. First, the DNA sequence is sizable, at about 1643 nucleotides (making it somewhat bigger than the 16S rDNA). It's important to have a large yardstick gene when looking for faint genetic signals. Secondly, this protein is essentially universal in prokaryotes. It's ubiquitous but not necessarily highly &lt;i&gt;conserved&lt;/i&gt;, in the same sense that rRNA is highly &lt;i&gt;conserved&lt;/i&gt;. ("Highly conserved" is not what you want. Think about it. Taken to the extreme, a "highly conserved" sequence is invariant. It never changes. And is therefore useless for phylogenetics.) Thirdly, the GroEL &lt;a href="https://en.wikipedia.org/wiki/Heat_shock_protein" target="_blank"&gt;heat-shock protein&lt;/a&gt; has multiple intracellular touchpoints: It's known to interact&lt;span class="Apple-converted-space"&gt;&amp;nbsp;&lt;/span&gt;with&lt;span class="Apple-converted-space"&gt;&amp;nbsp;&lt;/span&gt;&lt;a href="https://en.wikipedia.org/wiki/GroES" style="background-attachment: initial; background-clip: initial; background-color: initial; background-image: none; background-origin: initial; background-position: initial initial; background-repeat: initial initial; color: #0645ad; text-decoration: none;" title="GroES"&gt;GroES&lt;/a&gt;,&lt;span class="Apple-converted-space"&gt; &lt;/span&gt;&lt;a href="https://en.wikipedia.org/wiki/ALDH2" style="background-attachment: initial; background-clip: initial; background-color: initial; background-image: none; background-origin: initial; background-position: initial initial; background-repeat: initial initial; color: #0645ad; text-decoration: none;" title="ALDH2"&gt;ALDH2&lt;/a&gt;, and&lt;span class="Apple-converted-space"&gt; &lt;/span&gt;&lt;a href="https://en.wikipedia.org/wiki/Dihydrofolate_reductase" style="background-attachment: initial; background-clip: initial; background-color: initial; background-image: none; background-origin: initial; background-position: initial initial; background-repeat: initial initial; color: #0645ad; text-decoration: none;" title="Dihydrofolate reductase"&gt;dihydrofolate reductase&lt;/a&gt;, &lt;i&gt;and &lt;/i&gt;it's involved in signal tranduction (it's induced not just by heat &lt;a href="http://www.pnas.org/content/83/21/8059.short" target="_blank"&gt;but by hydrogen peroxide&lt;/a&gt;). Not to overlook the obvious, but it is also a touchpoint protein for any enzyme that can be repaired by the 60kDa heat shock protein. That's probably dozens if not hundreds of enzymes. Why is that important? Think about it: A protein that is sensitive to the 3D conformational requirements of &lt;i&gt;other &lt;/i&gt;proteins &lt;i&gt;has to evolve in response to the needs of all the proteins it services. &lt;/i&gt;A thermophile (&lt;i&gt;Thermomicrospira&lt;/i&gt;)&amp;nbsp; is going to need a different heat-shock repair system than a psychrophile (&lt;i&gt;Polaromonas&lt;/i&gt;). A salt-lover needs a different one than a freshwater-lover. GroEL has to reflect, in its own structure, the many shifting requirements of the host proteome. These considerations make GroEL a highly appropriate basis gene for phylogenetic analysis.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;And frankly, I think the GroEL-based phylo-tree &lt;a href="http://phylogeny.fr/"&gt;phylogeny.fr&lt;/a&gt; spit out for me (see illustration further above) speaks for itself. It's a remarkably informative (and accurate) tree. GroEL evolutionary differences not only accurately grouped endosymbionts together, soil organisms together, aquatic organisms, etc., it also correctly grouped the "enteric-alike" &lt;i&gt;Erwinia &lt;/i&gt;with &lt;i&gt;E. coli &lt;/i&gt;and &lt;i&gt;Shigella&lt;/i&gt;, and it cannily put &lt;i&gt;Polaromonas &lt;/i&gt;with soil organisms (rather than aquatics), which I think is correct, based on recent &lt;i&gt;Polaromonas &lt;/i&gt;isolates being &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/18599688" target="_blank"&gt;found in soil rather than snow&lt;/a&gt;. Likewise, it's good to see &lt;i&gt;Bdellovibrio &lt;/i&gt;(a freshwater bug) clustered with &lt;i&gt;Polynucleobacter &lt;/i&gt;(which is symbiotic with a ciliate protozoan), with &lt;i&gt;Thiomicrospira &lt;/i&gt;(the saltwater hydro-vent organism) a very nearby out-node.&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://3.bp.blogspot.com/-ku5n2QBCyDo/UYVkHq_EORI/AAAAAAAABl8/4ttsyvITySA/s1600/tumblr_luns0a5LKr1qgl0s1o1_400.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-ku5n2QBCyDo/UYVkHq_EORI/AAAAAAAABl8/4ttsyvITySA/s320/tumblr_luns0a5LKr1qgl0s1o1_400.jpg" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;If you get an infection while in a hospital, pray&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;it's not &lt;i&gt;Clostridium difficile&lt;/i&gt;, which is often deadly.&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;A harder call to make is &lt;i&gt;Clostridium difficile&lt;/i&gt;, which is present in 1% to 5% of non-ill people's intestines. Is it an enteric (a la &lt;i&gt;E. coli&lt;/i&gt;)? Definitely not. The &lt;i&gt;Clostridia &lt;/i&gt;(botulism, tetanus, etc.) are spore-forming &lt;i&gt;soil &lt;/i&gt;bacteria. Their placement in the tree not far from the soil-dwelling spore-former, &lt;i&gt;Bacillus thuringensis&lt;/i&gt;, is thus eminently correct. &lt;i&gt;Bacillus &lt;/i&gt;is a proximal out-node relative to &lt;i&gt;Clostridium&lt;/i&gt;, which is understandable in that &lt;i&gt;Bacillus &lt;/i&gt;is aerobic whereas &lt;i&gt;Clostridia &lt;/i&gt;are strict anaerobes.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;br /&gt;&lt;i&gt;Buchnera &lt;/i&gt;&lt;/span&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;(an aphid symbiont) comes at an odd location, much further away from the insect-dwelling &lt;i&gt;Wolbachia &lt;/i&gt;than I would have predicted, but then again &lt;i&gt;Buchnera&lt;/i&gt;'s host feeds on cold sap where &lt;i&gt;Wolbachia&lt;/i&gt;'s hosts typically feed on warm blood. All the organisms around &lt;i&gt;Wolbachia &lt;/i&gt;in the tree are hemophiles.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;br /&gt;Our good friend &lt;/span&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;i&gt;Bordetella &lt;/i&gt;(of pertussis fame) is placed firmly in the soil group. I think that's real and significant. When you start to look at &lt;i&gt;Bordetella&lt;/i&gt;'s &lt;a href="http://asserttrue.blogspot.com/2013/05/a-tale-of-two-microbes.html" target="_blank"&gt;high DNA sequence similarity with &lt;i&gt;Ralstonia&lt;/i&gt;&lt;/a&gt; and Burkholderia, it would be surprising, actually, if it fell anywhere &lt;i&gt;else &lt;/i&gt;in the tree.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;br /&gt;Honestly, when I took Bacterial Ecology 201 in college, many years ago, it was under duress and I hated the experience. But now, decades later, I'm starting to like it. With tools like those available for free at &lt;/span&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;a href="http://genomevolution.org/"&gt;http://genomevolution.org&lt;/a&gt; and  &lt;a href="http://www.phylogeny.fr/"&gt;http://www.phylogeny.fr&lt;/a&gt;, what's not to like?&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/956187904058625161/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/more-science-on-desktop.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/956187904058625161?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/956187904058625161?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/more-science-on-desktop.html" title="More Science on the Desktop" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-_DK1MVqXB98/UYVBOabwd-I/AAAAAAAABls/Z2g6F9DxpTE/s72-c/phylo_treeGroEL-v3.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CUMFRXg4eyp7ImA9WhBUF00.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-7074617674749803371</id><published>2013-05-04T01:39:00.000-04:00</published><updated>2013-05-04T17:36:54.633-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-04T17:36:54.633-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Bordetella" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="synteny" /><category scheme="http://www.blogger.com/atom/ns#" term="SynMap" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><category scheme="http://www.blogger.com/atom/ns#" term="Ralstonia" /><title>A Tale of Two Microbes</title><content type="html">One area where Big Data has started to pay big dividends is in genome research, and you can begin to taste the payoff yourself, right now, if you want to come along as I show you how to mine genetic data from public databases in the service of a little desktop microbial genetics. You'll be amazed at what you can do.&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-fEj6D9Xu4bE/UYSAIfI1WqI/AAAAAAAABkM/TBFyTH9vC6U/s1600/2763668057_5fef2337a6.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="141" src="http://2.bp.blogspot.com/-fEj6D9Xu4bE/UYSAIfI1WqI/AAAAAAAABkM/TBFyTH9vC6U/s200/2763668057_5fef2337a6.jpg" width="200" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;No one knows why, but when &lt;i&gt;Ralstonia eutropha&lt;/i&gt;&lt;br /&gt;
eats too much, it produces plastic granules&lt;br /&gt;
instead of, say,
starch or fat. Go figure.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
For today's experiment, we're going to compare the genomes of two bacteria, one of which you know very well, the other of which you don't, unless you've got way too much time on your hands. The germ you already know is &lt;a href="http://en.wikipedia.org/wiki/Bordetella" target="_blank"&gt;&lt;i&gt;Bordetella&lt;/i&gt;&lt;/a&gt;, the whooping cough bug. The bug you haven't heard of is &lt;a href="http://en.wikipedia.org/wiki/Ralstonia_eutropha" target="_blank"&gt;&lt;i&gt;Ralstonia eutropha&lt;/i&gt;&lt;/a&gt;, a soil organism that has the amazing ability to subsist only on hydrogen gas, nitrate, and carbon dioxide. In return, it produces wicked-crazy quantities of plastic (yes, plastic—it stores carbon as &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/21279345" target="_blank"&gt;polyhydroxybutyrate&lt;/a&gt;), and because it's potentially useful to industry, &lt;i&gt;Ralstonia&lt;/i&gt;'s DNA, like Bordetella's, has been fully sequenced. &lt;br /&gt;
&lt;br /&gt;
If you go right now to &lt;a href="http://genomevolution.org/r/8o1x"&gt;http://genomevolution.org/r/8o1x&lt;/a&gt;, you'll see that I've set up a little experiment for you. You shouldn't have to press the pink "Generate SynMap" button on that page. It should run automatically (but if you don't see an image like the one below, hit the button).&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-q4irhLxzYcM/UYSIvcYQG7I/AAAAAAAABkk/xmTMj9uqkNA/s1600/master_3708_3297.CDS-CDS.last.tdd10.filtered.dag.all.go_D50_g25_A2.aligncoords.gcoords_ctdiag.w500.sr.cs3.csoS.log.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/-q4irhLxzYcM/UYSIvcYQG7I/AAAAAAAABkk/xmTMj9uqkNA/s400/master_3708_3297.CDS-CDS.last.tdd10.filtered.dag.all.go_D50_g25_A2.aligncoords.gcoords_ctdiag.w500.sr.cs3.csoS.log.png" width="286" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Every dot in this dot-plot represents a match between &lt;br /&gt;
a gene in &lt;i&gt;Bordetella bronchiseptica&lt;/i&gt; and a gene in&lt;br /&gt;
&lt;i&gt;Ralstonia eutropha&lt;/i&gt;. See text for discussion.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
What has happened is that the &lt;a href="http://genomevolution.org/CoGe/SynMap.pl" target="_blank"&gt;SynMap&lt;/a&gt; server has been instructed to go find the complete DNA sequence of &lt;i&gt;Ralstonia eutropha Strain H16 &lt;/i&gt;as well as the complete DNA sequence for &lt;i&gt;Bordetella bronchiseptica Strain RB50&lt;/i&gt;, and run a comparison of one against the other.&lt;i&gt; &lt;/i&gt;It so happens &lt;i&gt;Bordetella &lt;/i&gt;has a single chromosome with 5,339,179 base pairs, whereas our hydrogen-loving, plastic-storing friend &lt;i&gt;Ralstonia &lt;/i&gt;has 3 chromosomes totalling 7,416,678 base pairs. (It has one main chromosome, and two small auxiliary chromosomes called &lt;i&gt;plasmids&lt;/i&gt;.)&lt;br /&gt;
&lt;br /&gt;
Every point on the above graph represents a match between a gene in &lt;i&gt;Bordetella &lt;/i&gt;and a gene in &lt;i&gt;Ralstonia&lt;/i&gt;. The X-axis represents locations on the &lt;i&gt;Bordetella &lt;/i&gt;genome (starting from one end and going to the other). The Y-axis plots locations on the &lt;i&gt;Ralstonia &lt;/i&gt;genome. All we're doing is mapping one genome to another and tallying the significant matches.&lt;br /&gt;
&lt;br /&gt;
This is a massive number of matches (well over 10,000), just to let you know. Usually, when you compare organisms, you don't see this many dots. I chose &lt;i&gt;Bordetella &lt;/i&gt;and &lt;i&gt;Ralstonia &lt;/i&gt;because I knew there'd be a lot of hits, based on my own prior experiments. And by the way, I don't think most microbiologists are aware (yet) that &lt;i&gt;Bordetella &lt;/i&gt;and &lt;i&gt;Ralstonia &lt;/i&gt;are extremely closely related. This is new information I'm sharing with you. &lt;br /&gt;
&lt;br /&gt;
It's one thing to get a bunch of points on a dot-plot, but how do we really know these two organisms are related? This is where &lt;i&gt;&lt;a href="http://en.wikipedia.org/wiki/Synteny" target="_blank"&gt;synteny&lt;/a&gt; &lt;/i&gt;comes in. Synteny is the degree to which two chromosomes share blocks of order. The key intuition is that merely sharing genes isn't enough; what counts is whether matching genes are in the same arrangements. If genome A has genes X, Y, and Z, in that order, and genome B also has genes X, Y, and Z (in the same order), we say that A and B share a syntenous triplet. The genomes have a degree of synteny.&lt;br /&gt;
&lt;br /&gt;
The &lt;a href="http://genomevolution.org/CoGe/SynMap.pl" target="_blank"&gt;SynMap tool&lt;/a&gt; is very powerful because it lets you find syntenous regions in DNA, and it's tunable. If you go to the Analysis Options tab on &lt;a href="http://genomevolution.org/r/8o1x" target="_blank"&gt;the SynMap page&lt;/a&gt;, you'll see that you can set two parameters called Maximum Distance Between Two Matches, and Minimum Number of Aligned Pairs. The URL that I sent you to (for our experiment) has values of 50 and 2, respectively, already dialed in. That means the graph is plotting every occurrence of 2 gene-pair matches that occurred between genes no more than 50 genes apart. That's a pretty liberal setting. If two organisms are related, you can expect to see a lot of matches.&lt;br /&gt;
&lt;br /&gt;
But what I propose you try (if you want) is setting "Maximum Distance Between Two Matches" to 500 and "Minimum Number of Aligned Pairs" to 250. (Then click the Generate SynMap button to refresh the graph.) This is a much more stringent requirement: It tells SynMap to try to find 250 matched genes within any given 500-gene region, do it for all regions of both genomes, and plot the results, if any. A 250-gene chunk is a pretty large syntenous region for a creature that has only 10,000-or-so genes to begin with. &lt;br /&gt;
&lt;br /&gt;
The result of our hunt for super-large 250-gene syntenous regions is shown in the first graph below. The red dots represent the regions. They run from the top of the Y-axis to the lower right corner. Remember that the axes map directly to positions on the genome. What the diagonal line says is that there's a near-linear mapping of syntenous regions from one genome to the other. &lt;br /&gt;
&lt;br /&gt;
The second graph below shows what happens when we re-tune our DNA-matching parameters to find blocks of 200 ordered genes within each 500-gene domain. We're looking for shorter runs of genes (200 instead of 250), which should be more plentiful. And they are. This time our graph looks like an 'X'. Why? Bacterial chromosomes do a lot of rearranging, and one of the most common events is a symmetric inversion around the origin of replication (and/or the terminus of replication). If you get enough of these inversions of various sizes, you end up with pieces of DNA that used to be near the start of the chromosome ending up near the end, and vice versa. (Repeat for all intermediate locations as well.) If you want to know more about how and why this ends up making an X-pattern on a dot-plot, be sure and read the classic paper by Eisen &lt;i&gt;et al.&lt;/i&gt; called "Evidence for symmetric chromosomal inversions around the replication origin in bacteria," &lt;i&gt;Genome Biology&lt;/i&gt; 2000, 1(6):research0011.1–0011.9 (unlocked PDF &lt;a href="http://www.biomedcentral.com/content/pdf/gb-2000-1-6-research0011.pdf" target="_blank"&gt;here&lt;/a&gt;).&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-pXqql2G9INQ/UYSQGVs6_WI/AAAAAAAABk8/60g8lknfajM/s1600/1-500x300.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-pXqql2G9INQ/UYSQGVs6_WI/AAAAAAAABk8/60g8lknfajM/s320/1-500x300.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Genomes compared with synteny-block size 250.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-UJ0qKUspwbo/UYSQGAocRUI/AAAAAAAABk0/AWsh9qeOK8w/s1600/2-500x250.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-UJ0qKUspwbo/UYSQGAocRUI/AAAAAAAABk0/AWsh9qeOK8w/s320/2-500x250.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Synteny block size 200. &lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-hkyNwWTltcM/UYSQGdLwrAI/AAAAAAAABk4/sSRCnDsRyzQ/s1600/3-500x175.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-hkyNwWTltcM/UYSQGdLwrAI/AAAAAAAABk4/sSRCnDsRyzQ/s320/3-500x175.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Block size 175.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-EjAk1b83rDQ/UYSQGpLwXAI/AAAAAAAABlM/G7OARLHcU00/s1600/4-180x120.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-EjAk1b83rDQ/UYSQGpLwXAI/AAAAAAAABlM/G7OARLHcU00/s320/4-180x120.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Block size 120, max domain size 180 genes.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-bh_78o945_4/UYSQG5yOvJI/AAAAAAAABlE/Z_-R636O1a0/s1600/5-130x90.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-bh_78o945_4/UYSQG5yOvJI/AAAAAAAABlE/Z_-R636O1a0/s320/5-130x90.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Block size 90, max domain 130.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-L6l6BERHtws/UYSQG_27iRI/AAAAAAAABlI/fX92Z5X2bDU/s1600/6-50x2.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-L6l6BERHtws/UYSQG_27iRI/AAAAAAAABlI/fX92Z5X2bDU/s320/6-50x2.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Block size 2, max domain size 50.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&amp;nbsp;

&lt;br /&gt;
The third and fourth graphs in this series show what happens when we tune our match for smaller block sizes. In the third graph, we've set "Maximum Distance Between Two Matches" to 500 and "Minimum Number of Aligned Pairs" to 175, which produces what looks like two really poorly drawn X's superimposed on each other. As we get more permissive with our synteny matches, we start to see the results of more inversion events. It makes sense that shorter synteny blocks will be swept up in more successful inversions, because an inversion that cuts across a large synteny block is probably fatal in many cases. (Some large groups of genes need to be kept together, for proper gene regulation. If an inversion event cuts through a critical regulon at the wrong spot, the cell might not go on to reproduce.)&lt;br /&gt;
&lt;br /&gt;
As we keep tuning the "Minimum Number of Aligned Pairs" downward, the graphs become more cluttered as we see the results of many thousands of inversion events in the history of the chromosomes.&lt;br /&gt;
&lt;br /&gt;
The fourth graph uses values of 180 and 120 for Max Distance and Minimum Number of Aligned Pairs, then in graph five we have values of 130 and 90. And finally, in the last graph, we have 50 and 2. The final graph is mostly noise. But buried in the noise are many faint signals that can be seen by twiddling the knobs on the synteny settings.&lt;br /&gt;
&lt;br /&gt;
I hope this bit of desktop genomics has convinced you that desktop genomics has reached an exciting stage indeed. (I've only scratched the surface, here, of what the tools at &lt;a href="http://genomevolution.org/"&gt;http://genomevolution.org&lt;/a&gt; can do.) I also hope I've convinced any microbial geneticists who might be reading this that &lt;i&gt;Bordetella &lt;/i&gt;and &lt;i&gt;Ralstonia &lt;/i&gt;are very closely related indeed. (Which should come as news. I don't think it's been reported.) You wouldn't think a hydrogen-loving soil organism would have much in common with a throat-dwelling pathogen, but as I like to say: DNA doesn't lie!&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/7074617674749803371/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/a-tale-of-two-microbes.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7074617674749803371?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7074617674749803371?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/a-tale-of-two-microbes.html" title="A Tale of Two Microbes" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-fEj6D9Xu4bE/UYSAIfI1WqI/AAAAAAAABkM/TBFyTH9vC6U/s72-c/2763668057_5fef2337a6.jpg" height="72" width="72" /><thr:total>4</thr:total></entry><entry gd:etag="W/&quot;CU4HSHczfip7ImA9WhBUFEk.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-7764110015102305708</id><published>2013-05-01T11:34:00.002-04:00</published><updated>2013-05-01T17:32:19.986-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-01T17:32:19.986-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="bacteria" /><category scheme="http://www.blogger.com/atom/ns#" term="mosquito" /><category scheme="http://www.blogger.com/atom/ns#" term="omics" /><category scheme="http://www.blogger.com/atom/ns#" term="genes" /><category scheme="http://www.blogger.com/atom/ns#" term="parasite" /><category scheme="http://www.blogger.com/atom/ns#" term="synteny" /><category scheme="http://www.blogger.com/atom/ns#" term="genomics" /><category scheme="http://www.blogger.com/atom/ns#" term="Thiomicrospira" /><category scheme="http://www.blogger.com/atom/ns#" term="Wolbachia" /><category scheme="http://www.blogger.com/atom/ns#" term="Europa" /><category scheme="http://www.blogger.com/atom/ns#" term="Candidatus" /><category scheme="http://www.blogger.com/atom/ns#" term="DNA" /><category scheme="http://www.blogger.com/atom/ns#" term="crustacean" /><category scheme="http://www.blogger.com/atom/ns#" term="thermal vent" /><category scheme="http://www.blogger.com/atom/ns#" term="symbiosis" /><category scheme="http://www.blogger.com/atom/ns#" term="mitochondria" /><title>Deep-Sea Vents: The Mosquito Connection</title><content type="html">Quick: What species of life on earth is the most abundant? (Which species has more living members than any other species?) Hint: If an alien probe lands in a random location on earth, chances are better than 70% that the probe will encounter this organism.&lt;br /&gt;
&lt;br /&gt;
If you're thinking in terms of the ocean, you're on the right track. What may surprise you is the connection between the world's-most-populous-organism (to be revealed shortly) and the mosquitoes that've been dive-bombing your neck all week. Equally amazing is the link between the mosquitoes in your back yard and hydrothermal vents in the ocean floor.&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-oIoBKpGcqgM/UYCFKc73xsI/AAAAAAAABjQ/B5B-p8D0tyo/s1600/wolb_egg.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://4.bp.blogspot.com/-oIoBKpGcqgM/UYCFKc73xsI/AAAAAAAABjQ/B5B-p8D0tyo/s200/wolb_egg.jpg" width="200" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;The hundreds of bright little particles at the &lt;br /&gt;
narrow 
end of this wasp egg are &lt;i&gt;Wolbachia &lt;/i&gt;cells. &lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
I wasn't thinking about marine biology or deep-sea hydrothermal vents when I went online at &lt;a href="http://genomevolution.org/"&gt;http://genomevolution.org&lt;/a&gt; the other day to do a little nosing around into the genome of &lt;i&gt;Wolbachia pipientis&lt;/i&gt;, the ultra-tiny bacterial parasite carried by nearly every mosquito on earth. (&lt;b&gt;Caution&lt;/b&gt;: Don't attempt the following DNA-analysis tricks on your own unless you want to become thoroughly addicted to desktop &lt;a href="http://en.wikipedia.org/wiki/Omics" target="_blank"&gt;omics&lt;/a&gt;. I'm a microbiologist by training. I can do these stunts safely.) "Parasite" is actually the wrong word. Our tiny friend &lt;i&gt;Wolbachia &lt;/i&gt;doesn't just &lt;i&gt;parasitize &lt;/i&gt;the mosquito; it's an integral &lt;i&gt;part &lt;/i&gt;of the mosquito. &lt;i&gt;Wolbachia &lt;/i&gt;can't live outside its insect host—and guess what? The host &lt;a href="http://www.celemans.com/pdfs/2007%20Pannebakker%20etal%20PNAS.pdf" target="_blank"&gt;frequently&lt;/a&gt; can't live without &lt;i&gt;Wolbachia&lt;/i&gt;. The two provide essential services for each other, an arrangement known as &lt;a href="http://en.wikipedia.org/wiki/Mutualism_%28biology%29" target="_blank"&gt;mutualism&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
I would argue that &lt;i&gt;Wolbachia &lt;/i&gt;is more than a mutualistic symbiont: It's a proto-organelle, something very close to what &lt;a href="http://en.wikipedia.org/wiki/Lynn_Margulis" target="_blank"&gt;Lynn Margulis&lt;/a&gt; had in mind as the ancestor of today's &lt;a href="http://en.wikipedia.org/wiki/Mitochondrion" target="_blank"&gt;mitochondrion&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;Wolbachia &lt;/i&gt;can't live on its own in the outside world (as far as anybody knows): it needs to live inside a host (generally an arthropod, although filarial worms also carry &lt;i&gt;Wolbachia&lt;/i&gt;). Inside its host it occupies a very special niche: It lives in the &lt;i&gt;nursery cells of the insect's ovary&lt;/i&gt;—the cells that will go on to become egg cells.&lt;br /&gt;
&lt;br /&gt;
This is no ordinary symbiosis. I mentioned in an &lt;a href="http://asserttrue.blogspot.com/2013/04/science-on-desktop.html" target="_blank"&gt;earlier post&lt;/a&gt; that Wolbachia carries with it genes for reverse-transcriptases, resolvases, recombinases, 
transposases, translocases, DNA polymerases, RNA polymerases, and phage integrases—a complete suite of &lt;a href="http://en.wikipedia.org/wiki/Retrovirus" target="_blank"&gt;retroviral&lt;/a&gt; machinery, designed for export of foreign DNA into host DNA. And indeed, researchers have found that &lt;i&gt;Wolbachia &lt;/i&gt;DNA is quite often embedded in the host's own nuclear DNA. (One group, looking at four insect hosts and four nematode hosts, &lt;a href="http://www.sciencemag.org/content/317/5845/1753.abstract" target="_blank"&gt;found&lt;/a&gt; anywhere from 500 base-pairs to over a million base pairs of &lt;i&gt;Wolbachia&lt;/i&gt; DNA residing in the nucleus. &lt;a href="http://genome.cshlp.org/content/18/2/272.full" target="_blank"&gt;Another group found&lt;/a&gt; 45 &lt;i&gt;Wolbachia &lt;/i&gt;genes incorporated in a fruit-fly host's nuclear DNA.) The situation with &lt;i&gt;Wolbachia &lt;/i&gt;thus parallels the situation with mitochondria, where we know that 97% of the gene products that go to make up a mitochondrion &lt;a href="http://www.broadinstitute.org/pubs/MitoCarta/" target="_blank"&gt;are actually encoded in nuclear DNA&lt;/a&gt;, not mitochondrial DNA.&lt;br /&gt;
&lt;br /&gt;
When you encounter an organism as baffling as &lt;i&gt;Wolbachia&lt;/i&gt;, oftentimes you want to know what its relatives are—what it's most closely related to. When a new or poorly understood organism has a close relative that's already well-studied, sometimes you learn a lot in a hurry. That's particularly true of pathogens (not that &lt;i&gt;Wolbachia &lt;/i&gt;is a pathogen per se). Pathogens have virulence strategies of various kinds. Maybe &lt;i&gt;Wolbachia &lt;/i&gt;has symbiosis strategies that it learned from a relative?&lt;br /&gt;
&lt;br /&gt;
The problem with a lot of the super-tiny microbes (which &lt;i&gt;Wolbachia &lt;/i&gt;definitely is, with only a quarter as much DNA as &lt;i&gt;E. coli&lt;/i&gt;) is that their relatedness is not always well understood. Organisms are assigned a taxonomic slot, then the assignment changes a few years later, after they're better-studied. (So for example, &lt;i&gt;Cowdria ruminantium&lt;/i&gt; was eventually renamed &lt;i&gt;Ehrlichia ruminantium&lt;/i&gt;, and a bunch of former &lt;i&gt;Ehrlichias &lt;/i&gt;are now &lt;i&gt;Neorickettsias&lt;/i&gt;, except the ones that attack red blood cells, which are now &lt;i&gt;Anaplasmas&lt;/i&gt;.) Taxonomy at this end of the evolutionary tree is definitely a work in progress.&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-3qj32nXJl_8/UYCInTI7lsI/AAAAAAAABjg/VM6sLpcxbyM/s1600/hydrothermal-vent-yale-university.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-3qj32nXJl_8/UYCInTI7lsI/AAAAAAAABjg/VM6sLpcxbyM/s320/hydrothermal-vent-yale-university.jpg" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Deep-sea thermal vents like this one &lt;br /&gt;
are home to organisms like &lt;i&gt;Thiomicrospira&lt;/i&gt; &lt;br /&gt;
that can grow on sulfide, CO2, and basic salts.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
Fortunately, it's easy nowadays (what with so many organisms' DNA sequences available online) to go on the web and compare genomes directly, using a tool like &lt;a href="http://genomevolution.org/CoGe/SynMap.pl" target="_blank"&gt;SynMap&lt;/a&gt;, which is what I started doing with &lt;i&gt;Wolbachia&lt;/i&gt;. I started going down the list of mini-microorganisms and began running DNA similarity tests of &lt;i&gt;Wolbachia &lt;/i&gt;against &lt;i&gt;Ehrlichia&lt;/i&gt;, &lt;i&gt;Neorickettsia, Anaplasma, Chlamydia&lt;/i&gt;, and "the usual suspects" at the ultra-small-chromosome end of the tree of life.&lt;br /&gt;
&lt;br /&gt;
What I found surprised me. A bizarre little bacterium called &lt;i&gt;&lt;a href="http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.0040383" target="_blank"&gt;Thiomicrospira&lt;/a&gt; &lt;/i&gt;kept showing up in my &lt;a href="http://blast.ncbi.nlm.nih.gov/Blast.cgi" target="_blank"&gt;BLAST&lt;/a&gt; searches as having many genes in common with &lt;i&gt;Wolbachia &lt;/i&gt;(based on sequence matches in large numbers of genes). None of the taxonomy charts showed the two to be related. But DNA doesn't lie. I kept coming up with matches across &lt;i&gt;hundreds &lt;/i&gt;of genes. (Bear in mind, &lt;i&gt;Wolbachia &lt;/i&gt;has only about 1300 genes to begin with, which is very small, even for a bacterium.)&lt;br /&gt;
&lt;br /&gt;
What's bizarre about &lt;i&gt;Thiomicrospira &lt;/i&gt;is that it's one of those fairly newly discovered microbes that lives on sulfur, heat, and CO2 at the bottom of the ocean, in total darkness, in the vicinity of thermal vents. &lt;i&gt;Thiomicrospira &lt;/i&gt;is the kind of life form NASA takes a great interest in, because it &lt;i&gt;could &lt;/i&gt;be a prototype for exactly the type of survive-in-the-dark CO2-using organism that might live under the ice crust of Europa (Saturn's moon). In theory, there could be geothermal vents on the floor of the large ocean of liquid water that NASA is pretty sure exists under Europa's ice. If there's life down there, it could very well look like &lt;i&gt;Thiomicrospira&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
But why should &lt;i&gt;Thiomicrospira &lt;/i&gt;have so many genes in common with a mosquito symbiont? &lt;i&gt;Thiomicrospira &lt;/i&gt; organism lives at the bottom of the ocean; &lt;i&gt;Wolbachia&lt;/i&gt; lives inside arthropod eggs. One obtains its carbon in the form of CO2; the other produces CO2 as a waste product. One is adapted to live in warm salt water; the other lives in cold-blooded insects. In theory, these two germs couldn't be further apart. And yet, oddly enough, they not only have hundreds of genes in common, the genes are well-matched from a DNA sequence-similarity standpoint. &lt;i&gt;Thiomicrospira&lt;/i&gt;'s DNA even incorporates a prophage module, and some of its phage genes show a high percentage base-pair similarity with the phage genes of &lt;i&gt;Wolbachia&lt;/i&gt;. (See screen shot below.)&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-qy-jy12X4H0/UYEet5nIviI/AAAAAAAABjw/ckhUEmQn4dU/s1600/PhageThioWolpip.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="348" src="http://4.bp.blogspot.com/-qy-jy12X4H0/UYEet5nIviI/AAAAAAAABjw/ckhUEmQn4dU/s640/PhageThioWolpip.png" width="640" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Remarkably, &lt;i&gt;Thiomicrospira &lt;/i&gt;and &lt;i&gt;Wolbachia &lt;/i&gt;share certain phage genes in common, as shown here. The genes have a DNA sequence identity of about 60%.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
After doing a little more detective work, I found an organism that might very well form a "missing link" between the mosquito symbiont and the thermal-vent dweller. This organism kept showing up in my analyses as having a high degree of DNA similarity with both &lt;i&gt;Thiomicrospira &lt;/i&gt;and &lt;i&gt;Wolbachia&lt;/i&gt;. The organism in question is &lt;a href="http://en.wikipedia.org/wiki/Pelagibacter_ubique" target="_blank"&gt;&lt;i&gt;Pelagibacter ubique&lt;/i&gt;&lt;/a&gt; (now known as &lt;i&gt;Candidatus pelagibacter&lt;/i&gt;, although some might question this taxonomic assignment since all other &lt;i&gt;Candidatus &lt;/i&gt;members are obligate intracellular symbionts), and it's an astonishing organism in two ways: First, it's the smallest non-parasitic (free-living) bacterium known to science, with only 1.3 million base-pairs in its DNA (making it slightly smaller than &lt;i&gt;Wolbachia &lt;/i&gt;and its tiny cousins). Secondly, it's the most &lt;i&gt;numerous&lt;/i&gt; living thing on earth. It's present in large amounts in every one of earth's oceans. &lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;Pelagibacter &lt;/i&gt;was placed in the &lt;i&gt;Candidatus &lt;/i&gt;clade in 2007 due to its small genome and cell size and certain ribosomal markers. It has a very mitochondria-like genetic profile, and in fact &lt;a href="http://www.plosone.org/article/info:doi/10.1371/journal.pone.0024857" target="_blank"&gt;some people think&lt;/a&gt; &lt;i&gt;Pelagibacter &lt;/i&gt;is the ancestor of today's mitochondrion, a theory that's all the more satisfying when you consider that &lt;i&gt;Pelagibacter &lt;/i&gt;is both ancient and tied to the sea.&lt;br /&gt;
&lt;br /&gt;
My analysis using &lt;a href="http://genomevolution.org/CoGe/SynMap.pl" target="_blank"&gt;SynMap&lt;/a&gt; found that &lt;i&gt;Pelagibacter &lt;/i&gt;and its thermal-vent-dwelling cousin &lt;i&gt;Thiomicrospira &lt;/i&gt;share about 660 genes (out of 1480 or so for &lt;i&gt;Pelagibacter&lt;/i&gt;), whereas &lt;i&gt;Wolbachia &lt;/i&gt;and &lt;i&gt;Pelagibacter &lt;/i&gt;share around 581, and &lt;i&gt;Thiomicrospira &lt;/i&gt;and &lt;i&gt;Wolbachia &lt;/i&gt;share around 1000. These are so-called &lt;i&gt;non-syntenous&lt;/i&gt; point matches between genes; instances where the same gene occurs in both organisms, with a high percentage of base-pair matching. &lt;a href="http://en.wikipedia.org/wiki/Synteny" target="_blank"&gt;Synteny&lt;/a&gt; is a concept that takes gene-matching one step further and says that &lt;i&gt;clusters &lt;/i&gt;of similar genes are what count. Synteny at the level of higher plants and animals is one thing, but at the level of a mini-microbe it tends to lack meaning, because the genes of bugs like &lt;i&gt;Wolbachia &lt;/i&gt;are notoriously mobile: They find new positions on the chromosome over time (probably because of the large number of transposases, nucleases, and integrases in the genome). Even so, I decided to carry out a bit of syntenic analysis to see what I could find out.&lt;br /&gt;
&lt;br /&gt;
For purposes of my analysis I defined a "syntenon" as three or more co-proximal genes that match three or more genes on the other organism's genome. But to be part of a syntenon, all three genes in a triplet have to occur within a 30-gene span (and match 3 genes in a 30-gene span on the other organism's DNA) &lt;i&gt;plus &lt;/i&gt;the genes have to be&lt;i&gt; in the same order &lt;/i&gt;in both organisms.&lt;br /&gt;
&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-pM8yIAaK7JE/UYEgRepv7yI/AAAAAAAABj8/rigX3Js9p1w/s1600/europa_galileo_900.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-pM8yIAaK7JE/UYEgRepv7yI/AAAAAAAABj8/rigX3Js9p1w/s320/europa_galileo_900.jpg" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;A planet-spanning waterworld is thought to exist under &lt;br /&gt;
Europa's icy outer crust. If thermal vents exist at the &lt;br /&gt;
bottom, any life that exists may look a lot like &lt;i&gt;Thiomicrospira&lt;/i&gt;.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
Using &lt;a href="http://genomevolution.org/CoGe/SynMap.pl" target="_blank"&gt;SynMap&lt;/a&gt;, I found that whereas &lt;i&gt;Wolbachia &lt;/i&gt;and &lt;i&gt;Pelagibacter &lt;/i&gt;share around 157 syntenic genes, and &lt;i&gt;Thiomicrospira &lt;/i&gt;and &lt;i&gt;Wolbachia &lt;/i&gt;share around 132, &lt;i&gt;Thiomicrospira &lt;/i&gt;and &lt;i&gt;Pelagibacter &lt;/i&gt;share 250 (which makes sense in that both are ocean-dwellers). For comparison-and-control purposes, I did a triplet match of &lt;i&gt;Thiomicrospira &lt;/i&gt;against another chemoautotroph (an organism that gets energy from inorganic chemicals, and carbon from CO2), namely &lt;i&gt;Methanothermobacter marburgensis&lt;/i&gt;. There were only 53 syntenic triplets in common between the two chemoautotrophs. (Between &lt;i&gt;Wolbachia &lt;/i&gt;and &lt;i&gt;Methanothermobacter&lt;/i&gt;, on the other hand, there were only 3 triplet-matches.) Doing a match between two &lt;i&gt;Wolbachia &lt;/i&gt;species (a mosquito-dwelling variety and a fruit-fly-dwelling cousin) produced 522 gene matches in syntenic triplets. &lt;br /&gt;
&lt;br /&gt;
It seems reasonable to me, based not just on the previous sorts of analysis but also direct inspection of the genomes (in terms of their respective protein products), that &lt;i&gt;Thiomicrospira &lt;/i&gt;evolved from &lt;i&gt;Pelagibacter&lt;/i&gt;.&amp;nbsp; &lt;i&gt;Pelagibacter &lt;/i&gt;is the most abundant life form in the ocean, and perhaps the oldest. &lt;i&gt;Pelagibacter &lt;/i&gt;is also very mitochondria-like, and so is &lt;i&gt;Thiomicrospira&lt;/i&gt;, which has rhodanese-like proteins, the full cytochrome system, redox enzymes, citric-acid-cycle enzymes, plus certain characteristic membrane and sensor proteins, flippases, etc. (For what it's worth, &lt;i&gt;Thiomicrospira &lt;/i&gt;has the highest signal-transduction profile I've ever seen at &lt;a href="http://mistdb.com/"&gt;http://mistdb.com&lt;/a&gt;, again making it very mitochondrial-feeling.)&lt;br /&gt;
&lt;br /&gt;
I'm tempted to say, similarly, that &lt;i&gt;Thiomicrospira &lt;/i&gt;and &lt;i&gt;Wolbachia &lt;/i&gt;are related. They have phage proteins in common. They both have genes for patatin proteins. They share multiple drug resistance genes. (That's not so strange. Antibiotics occur naturally in the environment.) They share genes for Flp-type pilins. Plus many more coincidences, big and small.&lt;br /&gt;
&lt;br /&gt;
At first blush, a deep-sea thermal vent seems pretty far removed, environmentally, from the egg cell of a mosquito. How to reconcile the difference? Actually, I see similarities. &lt;i&gt;Thiomicrospira &lt;/i&gt;thrives at temperatures of 28 to 32 degrees Celsius (which is also true of mosquitoes, although they prefer the 28-degree end of the scale). And blood (the preferred food source for mosquitoes) is comparable in pH and salinity to seawater. Also, mosquitoes have an aquatic lifecycle: they require brackish water in which to lay eggs. Mosquitoes and salt marshes go back millions of years.&lt;br /&gt;
&lt;br /&gt;
It's even possible that &lt;i&gt;Wolbachia &lt;/i&gt;might live in deep-sea-vent-dwelling host organisms. In fact, I predict they &lt;i&gt;will &lt;/i&gt;be found there. Why? Because in addition to inhabiting flying insects, spiders, mites, and ticks (and filarial worms), &lt;i&gt;Wolbachia &lt;/i&gt;have also been &lt;a href="http://rcordaux.voila.net/pdfs/01.pdf" target="_blank"&gt;found&lt;/a&gt; in a very high percentage of crustaceans. We know that crustaceans are often found living near deep-sea thermal vents; and many crustaceans show the characteristic feminization of genetic males that's so often the tipoff to a massive &lt;i&gt;Wolbachia&lt;/i&gt; presence in insect populations.&lt;br /&gt;
&lt;br /&gt;
Insects and crustaceans represent two of the oldest, most successful, and most widely distributed life forms of the animal kingdom. Would it really be so surprising if the bacteria that colonize these life forms are closely related to the most common marine bacteria on the planet? I don't think so. Stranger things have happened.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/7764110015102305708/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/05/deep-sea-vents-mosquito-connection.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7764110015102305708?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/7764110015102305708?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/05/deep-sea-vents-mosquito-connection.html" title="Deep-Sea Vents: The Mosquito Connection" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-oIoBKpGcqgM/UYCFKc73xsI/AAAAAAAABjQ/B5B-p8D0tyo/s72-c/wolb_egg.jpg" height="72" width="72" /><thr:total>4</thr:total></entry><entry gd:etag="W/&quot;A0ACSXY5cSp7ImA9WhBVGUQ.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-1890586115384328475</id><published>2013-04-26T00:30:00.000-04:00</published><updated>2013-04-26T14:09:28.829-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-26T14:09:28.829-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="endosymbiosis" /><category scheme="http://www.blogger.com/atom/ns#" term="ankyrin" /><category scheme="http://www.blogger.com/atom/ns#" term="GenomeView" /><category scheme="http://www.blogger.com/atom/ns#" term="Anaplasma" /><category scheme="http://www.blogger.com/atom/ns#" term="endosymbiont" /><category scheme="http://www.blogger.com/atom/ns#" term="Chlamydia" /><category scheme="http://www.blogger.com/atom/ns#" term="nitroreductase" /><category scheme="http://www.blogger.com/atom/ns#" term="phage" /><category scheme="http://www.blogger.com/atom/ns#" term="superoxide" /><category scheme="http://www.blogger.com/atom/ns#" term="Wolbachia" /><category scheme="http://www.blogger.com/atom/ns#" term="retrovirus" /><category scheme="http://www.blogger.com/atom/ns#" term="Candidatus" /><category scheme="http://www.blogger.com/atom/ns#" term="apoptosis" /><category scheme="http://www.blogger.com/atom/ns#" term="comparative genomics" /><category scheme="http://www.blogger.com/atom/ns#" term="mitochondria" /><title>Science on the Desktop</title><content type="html">For decades, I've been hoping I'd live long enough to see a day when serious science could be done on the desktop by dedicated amateurs. Amateur astronomers know what I'm talking about. You can't do much particle physics on the desktop, and there are no affordable desktop electron microscopes (&lt;a href="http://singularityhub.com/2009/12/15/hitachis-desktop-electron-microscope-cheap-enough-for-home-use/" target="_blank"&gt;yet&lt;/a&gt;), but if comparative genomics is your thing? Get ready to rock and roll, my friend.&lt;br /&gt;
&lt;br /&gt;
Over the weekend I discovered &lt;a href="http://genomevolution.org/"&gt;http://genomevolution.org&lt;/a&gt; and promptly went nuts. Let me take you on a tour of what's possible.&lt;br /&gt;
&lt;br /&gt;
First I should explain that my background is in microbiology, and I've always had a soft spot in my heart (not literally) for organisms with ultra-tiny genomes: things like &lt;i&gt;Chlamydia trachomatis&lt;/i&gt;, the sexually transmitted parasite. It's technically a bacterium, but you can't grow it in a dish. It requires a host cell in which to live.&lt;br /&gt;
&lt;a href="http://1.bp.blogspot.com/-YzTYVDesjoA/UXkrNm6emSI/AAAAAAAABiw/pB9TqDMvP2U/s1600/Wolbachia+Mitochondra_resize.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-YzTYVDesjoA/UXkrNm6emSI/AAAAAAAABiw/pB9TqDMvP2U/s320/Wolbachia+Mitochondra_resize.jpg" /&gt;&lt;/a&gt;
&lt;br /&gt;
It turns out there are many of these itty-bitty obligate endosymbionts (at least a dozen major families are known), and because of their small size and obligate intracellular lifestyle, they have a lot in common with mitochondria. Which is to say, like mitochondria, they're about a micron in size, they divide on their own, they have circular DNA, and they provide services to the host in exchange for living quarters.&lt;br /&gt;
&lt;br /&gt;
When you look at one of these little creatures under the microscope (whether it's &lt;i&gt;Chlamydia &lt;/i&gt;or &lt;i&gt;Ehrlichia &lt;/i&gt;or &lt;i&gt;Anaplasma &lt;/i&gt;or what have you), you see pretty much the same thing. (See photo.) Namely, a tiny bacterium living in cytoplasm, mimicking a mitochondrion.&lt;br /&gt;
&lt;br /&gt;
When Lynn Margulis wrote her classic 1967 &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/11541392" target="_blank"&gt;paper&lt;/a&gt; suggesting that mitochondria were once tiny bacterial endosymbionts, it seemed laughable at the time, and her ideas were widely criticized (in fact her paper was "rejected by about fifteen journals," she once recalled). Now it's taught in school, of course. But we have a long way to go before we understand how mitochondria work. And we really, really &lt;i&gt;need &lt;/i&gt;to know how they work, because for one thing, mitochondria seem to be deeply involved in orchestrating &lt;a href="http://en.wikipedia.org/wiki/Apoptosis" target="_blank"&gt;apoptosis &lt;/a&gt;(programmed cell death) and various kinds of &lt;a href="http://en.wikipedia.org/wiki/Signal_transduction" target="_blank"&gt;signal transduction&lt;/a&gt;, and until we understand how all that works, we're going to be hindered in understanding cancer.&lt;br /&gt;
&lt;br /&gt;
When I discovered the tools at  &lt;a href="http://genomevolution.org/"&gt;http://genomevolution.org&lt;/a&gt;, one of the first things I did, on a what-the-hell basis, was compare the genomes of two small endosymbionts, &lt;a href="http://en.wikipedia.org/wiki/Wolbachia" target="_blank"&gt;&lt;i&gt;Wolbachia pipientis&lt;/i&gt;&lt;/a&gt; and &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/15658987" target="_blank"&gt;&lt;i&gt;Neorickettsia sennetsu&lt;/i&gt;&lt;/a&gt;. The former lives in insects; the latter, in flatworms that infect fish, bats, birds, horses, and probably lots else. Note that for a horse to get Potomac horse fever, first the Neorickettsia has to infect a tiny flatworm; then the flatworm has to be ingested by a dragonfly, caddisfly, or mayfly; then the horse has to eat (or maybe be bitten by, although only infection-by-ingestion has been demonstrated) the worm-infected fly. The parasite-of-a-parasite chain of events is not only fascinating in its own right, it suggests (to me) that parasites &lt;i&gt;enable each other&lt;/i&gt; through shared strategies at the biochemical level, and I might as well spoil some suspense here by revealing that there's even &lt;i&gt;yet another&lt;/i&gt; layer of parasitism (and biochemical enablement) going on in this picture, involving viruses. But we're getting ahead of ourselves.&lt;br /&gt;
&lt;br /&gt;
I mentioned Wolbachia a second ago. Wolbachia is a fascinating little critter, because it's found in the reproductive tract of anywhere from 20% to 70% of all insects (plus an undetermined number of spiders, mites, crustaceans, and nematodes), but they don't cause disease, and in fact it &lt;a href="http://www.rochester.edu/College/BIO/labs/WerrenLab/WerrenPapers-PDF/2003_Werren_GenderBenders.pdf" target="_blank"&gt;appears&lt;/a&gt; many insects are unable to survive without them. Wolbachia are unusual in that the extracellular phase of their lifecycle (the part where they spread from one host to another) isn't known; no one has observed it. What's more (and this part is incredible), Wolbachia have adapted to a stem-cell niche: They live in the cells that give rise to insect egg cells. Thus, all newborn female progeny of an infected mother are infected, and all eggs pass on the Wolbachia. In this sense, the genetics of Wolbachia obey mitochondrial genetics (whereby the mother passes on the organelle and its genome).&lt;br /&gt;
&lt;br /&gt;
I quickly found, via Sunday afternoon desktop genomics, that Wolbachia and Neorickettsia (and other endosymbionts: Anaplasma, Ehrlichia, etc.) have many genes in common—hundreds, in fact. And when I say "genes in common," I mean that the genes often show better-than-50% similarity in DNA base-pair matching.&lt;br /&gt;
&lt;br /&gt;
It's important to put some context on this. These little organisms have DNA that encodes only 1,000 genes. (By comparison, &lt;i&gt;E. coli &lt;/i&gt;has around 4,400 genes.) Endosymbionts lack genes for common metabolic pathways. They cannot biosynthesize amino acids, for example; instead they rely on the host to provide such nutrients ready-made. If 400 to 500 of an endosymbiont's 1,000 genes are shared across major endosymbiont families, that's a huge percentage. It suggests there's a set of core genes, numbering in the low hundreds, that encapsulate the basic "strategy" of endosymbiosis.&lt;br /&gt;
&lt;br /&gt;
A little more context: Mitochondria have their own DNA and look a lot like endosymbionts. But here's the thing: Mitochondrial DNA is &lt;i&gt;tiny&lt;/i&gt; (only about 15,000 base pairs, versus a million for an endosymbiont). It turns out, 97% of the "stuff" that makes up a mitochondrion is encoded&lt;i&gt; in the nucleus of the host&lt;/i&gt;. If you include these nuclear genes, mitochondria actually rely on about &lt;a href="http://www.broadinstitute.org/pubs/MitoCarta/" target="_blank"&gt;1,000 genes&lt;/a&gt; total, of which only 3% are in the organelle's DNA. Lynn Margulis would say that what happened is, the endosymbiont ancestor of today's mitochondrion originally had DNA of about a million base-pairs (1,000 genes), but some time after taking up residency in the host cell, the invader's DNA mostly migrated to the host nucleus.&lt;br /&gt;
&lt;br /&gt;
Why did symbiont-to-host DNA migration stop at 97%? Why not 100%? If we look at that 3%, we find genes coding for tRNA and bacterial ribosomes (specialized protein-making machinery) plus genes for enormous, complex transmembrane enzyme systems: &lt;a href="http://en.wikipedia.org/wiki/Cytochrome_c_oxidase_subunit_I" target="_blank"&gt;cytochrome c oxidase&lt;/a&gt; and &lt;a href="http://www.princeton.edu/~achaney/tmve/wiki100k/docs/NADH_dehydrogenase.html" target="_blank"&gt;NADH dehydrogenase&lt;/a&gt;. (The former is the endpoint of oxidative respiration; the latter the entry-point.) Obviously it must be advantageous for these genes to be proximal to the organelle.&lt;br /&gt;
&lt;br /&gt;
But why even have an organelle (a physical compartment)? One might ask why it's necessary to have a mitochondrial parasite swimming around in the cytoplasm at all, when most of the genes are part of the host's DNA? The answer is, the stuff that goes on inside the confines of the mitochondrion &lt;i&gt;needs &lt;/i&gt;to be contained, because it's violently toxic stuff involving superoxide radicals, redox reactions, "proton pumps," and Fenton chemistry (transition-metal peroxide reactions). A containment structure is definitely called for, to segregate this toxic chemistry from the rest of the cell.&lt;br /&gt;
&lt;br /&gt;
We might ask how it is that the DNA of the protobacterial ancestor of today's mitochondria wound up in the host nucleus in the first place. Let's consider the possibilities. Protobacterial (symbiont) DNA may have transferred to the host all at once, or it might have migrated piecemeal, over time. Or both. Is it realistic that huge amounts of endosymbiont DNA could have migrated to the host nucleus all at once? Yes. It's been suggested that vacuolar phagocytosis drove invader DNA to the nucleus in a big gulp. Evidence? Wolbachia inhabits the vacuolar space. &lt;br /&gt;
&lt;br /&gt;
But export of genes and gene products to the host might have occurred piecemeal as well. A little desktop exploration provides some clues. If you use &lt;a href="http://genomevolution.org/CoGe/GenomeView.pl?dsgid=1977" target="_blank"&gt;GenomeView&lt;/a&gt; or any number of other online tools to explore the DNA of Wolbachia, several things pop out at you. First is that many Wolbachia genes are mitochondria-like: They encode for things like cytochrome c oxidase, cytochrome b, NADH dehydrogenase, succinyl-CoA synthetase, Fenton-chemistry enzymes, and a slew of oxidases and reductases (including a nitroreductase). Wolbachia is clearly engaged in providing what might be called redox-detox services for the host—the same value proposition that mitochondria offer. This makes sense, because if Wolbachia cells were a net &lt;i&gt;drag &lt;/i&gt;on the respiratory potential of host-cell mitochondria (if they couldn't at least hold their own with respect to mitochondria), the host would die.&lt;br /&gt;
&lt;br /&gt;
The second thing that jumps out at you when you look at the Wolbachia genome is the abundance of genes devoted to export processes: membrane proteins, permeases, type I, II, and IV secretion systems, &lt;a href="http://en.wikipedia.org/wiki/ATP-binding_cassette_transporter" target="_blank"&gt;ABC transporters&lt;/a&gt;, etc., plus at least 60 &lt;a href="http://en.wikipedia.org/wiki/Ankyrin" target="_blank"&gt;ankyrin&lt;/a&gt;-repeat-domain genes—all powerful evidence of specializations aimed at export of genes and gene products to the host. But the most stunning "smoking gun" of all is the presence, in Wolbachia DNA, of five reverse-transcriptase genes, plus genes for resolvases, recombinases, transposases, DNA polymerases, RNA polymerases, and phage integrases. In essence, there's a complete suite of &lt;a href="http://en.wikipedia.org/wiki/Retrovirus" target="_blank"&gt;retroviral&lt;/a&gt; machinery, &lt;i&gt;designed for&lt;/i&gt; export of foreign DNA into host DNA.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-t3ic9ldighY/UXlxkwFXqzI/AAAAAAAABjA/jrsFuRvQgLU/s1600/PhageProtein.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="392" src="http://4.bp.blogspot.com/-t3ic9ldighY/UXlxkwFXqzI/AAAAAAAABjA/jrsFuRvQgLU/s640/PhageProtein.png" width="640" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;i&gt;An example of one of 113 phage-derived genes in Wolbachia (lower gene array). In this case, the gene matches a phage gene found in Candidatus hamiltonella (upper gene array). The two isoforms exhibit 59% DNA sequence similarity, despite widely differing GC ratios. See text for discussion.&lt;/i&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
But wait. There's more. The third thing that jumps straight in your face when you start looking at the Wolbachia genome is the presence of (are you ready?) no less than 113 genes for phage-related proteins, including major and minor capsid and HK97-style prohead proteins, plus tail proteins, baseplate, tail tube, tail tape-measure, and sheath proteins; late control gene D; phage DNA methylases; and so on. (For non-biologists: phage is the term for viruses that attack bacteria.) &lt;br /&gt;
&lt;br /&gt;
In the above screenshot, I'm comparing Wolbachia DNA (lower strip) to DNA from another insect-infecting endosymbiont, &lt;i&gt;Candidatus hamiltonella&lt;/i&gt;, which is &lt;a href="http://aem.asm.org/content/74/21/6782.full" target="_blank"&gt;known&lt;/a&gt; to contain an intact virus (phage) in its DNA. Many phage proteins in Wolbachia have corresponding matches in the Candidatus genome. In this case, we're looking at a gene (the gold-colored stretch pointed at by red arrows) that is 1440 nucleotides long, with a 59% sequence match across genomes. The match percentage is remarkably high given that the Candidatus version of this gene has a 51.7% GC content while the Wolbachia version has a 40.6% GC. Also, note that Wolbachia itself has an overall GC of 34.2%. The fact that Wolbachia's putative phage genes are significantly higher in GC content than Wolbachia's &lt;i&gt;non&lt;/i&gt;-phage genes is good confirmation that the genes really &lt;i&gt;are &lt;/i&gt;from phage. &lt;br /&gt;
&lt;br /&gt;
It's 100% clear that viral DNA has made its way into the DNA of Wolbachia (either recently or long ago), and it's reasonable to hypothesize that Wolbachia has repurposed the retrovirus-like phage genes for packaging and exporting Wolbachia DNA to the host nucleus.&lt;br /&gt;
&lt;br /&gt;
Okay, so maybe you have to be a biologist for any of this stuff to make your hairs stand on end. To me, it's a dream come true to be able to do this kind of detective work on a Sunday afternoon while sitting on the living-room couch, using nothing more than a decrepit five-year-old Dell laptop with a wireless connection. The notion that you can do comparative genomics and proteomics while watching an Ancient Aliens rerun on TV is (for me) totally cerebrum-blowing. It makes me wonder what's just around the corner.&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/1890586115384328475/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/04/science-on-desktop.html#comment-form" title="11 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1890586115384328475?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1890586115384328475?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/04/science-on-desktop.html" title="Science on the Desktop" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-YzTYVDesjoA/UXkrNm6emSI/AAAAAAAABiw/pB9TqDMvP2U/s72-c/Wolbachia+Mitochondra_resize.jpg" height="72" width="72" /><thr:total>11</thr:total></entry><entry gd:etag="W/&quot;DkUGRn48eip7ImA9WhBVGE0.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-1062824868917473502</id><published>2013-04-22T12:34:00.000-04:00</published><updated>2013-04-24T07:50:27.072-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-24T07:50:27.072-04:00</app:edited><title>When Vitamins Turn Deadly</title><content type="html">The other day I did something I swore I'd never do: I paid $31.50 to Elsevier for a copy of a scientific paper. I spent a good 30 minutes looking for a free version of the paper online first, of course, using every sneaky trick I know. Then I debated with myself for 30 minutes, saying things like "You are not seriously going to pay those crooks $31.50, are you?" And answering: "No, of course not." "But you have to." "I know." "So do it." "I can't. I'd rather set my face on fire." And on and on.&lt;br /&gt;
&lt;br /&gt;
After an hour I realized, of course, that the time I'd wasted &lt;i&gt;not buying&lt;/i&gt; the paper had been worth much more than the paper itself. So I bought the paper. Just this once.&lt;br /&gt;
&lt;br /&gt;
The article in question is called "Stopping the Active Intervention: CARET," by Bowen &lt;i&gt;et al.&lt;/i&gt;, &lt;i&gt;Controlled Clinical Trials&lt;/i&gt; 24 (2003) 39–50. As you might have guessed, I'm going to spend the rest of today's blog talking about it so you don't have to buy it yourself.&lt;br /&gt;
&lt;br /&gt;
The paper I bought talks about the circumstances surrounding the halting of an infamous cancer-prevention study called CARET, otherwise known as the Carotene and Retinol Efficacy Trial. It was a large National Cancer Institute trial that didn't go well.&lt;br /&gt;
&lt;br /&gt;
CARET was actually one of &lt;i&gt;two &lt;/i&gt;large NCI vitamin studies that didn't go well in the 1980s. The other was the Alpha-Tocopherol and Beta-Carotene (ATBC) Lung Cancer Prevention Study. CARET was carried out in the U.S.; ATBC took place in Finland.&lt;br /&gt;
&lt;br /&gt;
I'm not going to spend a lot of time talking about the studies here (for that, see my &lt;a href="http://bigthink.com/devil-in-the-data/the-dark-side-of-antioxidants" target="_blank"&gt;latest blog&lt;/a&gt; on this subject at &lt;a href="http://bigthink.com/blogs/devil-in-the-data" target="_blank"&gt;Big Think&lt;/a&gt;). Suffice it to say that both studies began enrolling participants in 1985, the Finns at a rate of about 9,000 a year, the Americans at 2,000 a year. The ATBC study reached its enrollment goal of just under 30,000 people in three years. CARET went on enrolling for nine years, peaking at 18,314 people in September 1994.&lt;br /&gt;
&lt;br /&gt;
It's customary in large trials to enroll people gradually—not just for practical reasons (it's infeasible to sign up 30,000 people at once) but also because if a treatment proves to be deadly, you want to be able to halt the study before huge numbers of people have been put at risk.&amp;nbsp; Sadly, both ATBC and CARET put large groups at risk. Both should have been stopped prematurely. Only CARET was—and its halting came unnecessarily late.&lt;br /&gt;
&lt;br /&gt;
The purpose of ATBC and CARET was to validate the usefulness of antioxidant supplements (Vitamins A and E, chiefly) as cancer-preventive agents. For ATBC, the study population consisted of male Finnish smokers aged 50 to 69. For CARET it was current and former smokers, male and female, aged 50 to 69, plus asbestos-exposed workers (all men) aged 45 to 74. (The asbestos workers made up 22% of the 18,314 study participants.) The experimental design in Finland was a 2x2 matrix design in which a quarter of the participants got 20 mg/day of beta-carotene, a quarter got 50 mg/day of alpha-tocopherol (Vitamin E), a quarter got both, and a quarter got placebo. Thus, treatment groups outnumbered placebo 3:1. In the U.S., CARET began with a 2x2 design but the design was changed early on so that the bulk of the study population got either placebo &lt;i&gt;or &lt;/i&gt;a combo of 30 mg/day beta-carotene and 25,000 IU of retinol per day (thus a 1:1 ratio of treated to untreated populations).&lt;br /&gt;
&lt;br /&gt;
According to the ATBC &lt;a href="http://jnci.oxfordjournals.org/content/88/21/1560.full.pdf" target="_blank"&gt;writeup&lt;/a&gt; that eventually appeared in &lt;i&gt;The Journal of the National Cancer Institute&lt;/i&gt;, "An independent Data and Safety Monitoring Board convened twice annually to monitor trial progress and to study unblinded data that were relevant to intervention safety and efficacy." Which makes it all the harder to understand why the ATBC trial wasn't halted early when the beta-carotene groups separated from placebo—in the wrong direction. It was evident in Year 5 of the eight-year study that people in the treatment group were developing lung cancer at a heightened rate. In fact, that ended up being the key finding of the study: taking Vitamin E and beta-carotene leads to 18% more cancer.&lt;br /&gt;
&lt;br /&gt;
Presumably, the Finns didn't halt their intervention early for the simple reason that the study's &lt;i&gt;stopping criterion&lt;/i&gt; (whatever it happened to be; all large studies have them) wasn't met. Perhaps the divergence of treated and untreated group performance was deemed statistically non-significant. We'll never know for sure. &lt;br /&gt;
&lt;br /&gt;
The ATBC results &lt;a href="http://www.nejm.org/doi/pdf/10.1056/NEJM199404143301501" target="_blank"&gt;appeared&lt;/a&gt; in the April 14, 1996 issue of &lt;i&gt;The New England Journal of Medicine&lt;/i&gt;. In &lt;a href="http://www.sciencedirect.com/science/article/pii/S0197245602002775" target="_blank"&gt;"Stopping the Active Intervention: CARET" ($31.50 from Elsevier)&lt;/a&gt;, we learn from the lead investigator and his coauthors:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;On March 25, 1994, the NCI directed investigators of NCI-funded randomized trials involving beta-carotene to inform their data and safety monitoring boards of the ATBC trial findings, to review their data in light of these findings and to develop plans to inform their participants of the ATBC results and incorporate the findings in their consent forms. &lt;/span&gt;&lt;/blockquote&gt;
This is an interesting statement in a couple of ways. First, it doesn't say exactly &lt;i&gt;who&lt;/i&gt; at NCI "directed investigators" to review their data. The implication is that somebody higher up (than the lead investigator) gave an order. Whoever that person was, he or she knew the unblinded Finnish results &lt;i&gt;ahead of publication&lt;/i&gt;. Secondly, it says safety boards were directed to "review their data." But in the CARET study (unlike ATBC), the safety board &lt;i&gt;did not have access to &lt;/i&gt;unblinded data. How can you review data that's still blinded? And if you did so, what purpose would it serve? You'd look at the data and see that one group of people, under one set of codes, was doing worse than another group under another set of codes. You might very reasonably assume that the group doing worse was the placebo group. But you wouldn't know for sure.&lt;br /&gt;
&lt;br /&gt;
This is where it gets interesting, because in "Stopping the Active Intervention: CARET" ($31.50 from Elsevier) we learn that&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;On March 29 and April 1 [1994], CARET’s Principal Investigator convened telephone conference calls with the SEMC during which the committee was unblinded to intervention group assignment.&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/blockquote&gt;
Note the phrase "the committee was unblinded." In actuality, there was no unblinding over the phone. In a separate writeup in &lt;i&gt;Data Monitoring in Clinical Trials: A Case Studies Approach &lt;/i&gt;(Springer, 2006), page 222, former CARET Safety Committee members Anthony B. Miller, Julie Buring, and O. Dale Williams explain that in&lt;i&gt; &lt;/i&gt;August 1994—&lt;i&gt;four months after&lt;/i&gt; the phone call—"we unanimously agreed that we should be unblinded as to the nature of the regimens given to the coded groups." The unblinding actually happened in August, after statisticians compiled their interim report for the Safety Committee—not March.&lt;br /&gt;
&lt;br /&gt;
The account given by the lead investigators in "Stopping the Active Intervention: CARET" makes it sound as if urgent action was undertaken in March 1994 to begin a stand-down of the experiment. That's not what happened. Not by a long shot.&lt;br /&gt;
&lt;br /&gt;
When the Safety Committee got its first look at unblinded data (in August 1994), there was no doubt that CARET's study population was experiencing the same elevated cancer rates seen by the Finns. The Safety Committee took a vote on whether to stop the CARET trial—and found itself deadlocked. Two members of the five-person committee were in favor of stopping CARET. The others thought it would be better to go ahead. The rationale for continuing in spite of elevated cancer in the treated group (here I quote from the account given in &lt;i&gt;Data Monitoring in Clinical Trials&lt;/i&gt;) was:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;The statistical significance of the difference had not crossed the O’Brien–Fleming boundary (i.e., this could still be a chance finding).&lt;/li&gt;
&lt;li&gt;The effect was surprisingly rapid and must mean if real that preexisting (but undiagnosed) lung cancers had had their growth accelerated by the regimen.&lt;/li&gt;
&lt;li&gt;We knew of no mechanism of the action of beta-carotene that could have induced such an effect.&lt;/li&gt;
&lt;li&gt;There were other chemoprevention trials using beta carotene ongoing, to stop CARET now would have an undesirable adverse effect on these trials.&lt;/li&gt;
&lt;li&gt;We owed it to science to be absolutely certain of the adverse effect before stopping the trial.&lt;/li&gt;
&lt;/ul&gt;
The two members who favored a stop cited these reasons:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;This was the second trial to show an adverse effect of beta-carotene chemoprevention; it was extremely unlikely to be due to chance.&lt;/li&gt;
&lt;li&gt;We owed it to the participants to prevent possible further harm to them. It was perhaps particularly unfortunate that the adverse effect appeared to be present in asbestos workers as well as current smokers.&lt;/li&gt;
&lt;li&gt;The adverse effects appeared not to be restricted to lung cancer; there appeared to be an adverse effect on cardiovascular disease as well.&lt;/li&gt;
&lt;/ul&gt;
Incredibly, even though the Finns had just shown (in a much larger study population) a definite correlation between beta-carotene usage and lung cancer in high-risk individuals, and even though CARET's own data replicated those results perfectly, and even though CARET was administering a 50% higher dose of beta-carotene to its participants than the Finns had used (possibly putting people at much greater risk), a decision was made to continue CARET, on the absurd assumption that after more data were accumulated, the bad trend-line would prove to have been nothing more than a statistical fluke.&lt;br /&gt;
&lt;br /&gt;
In September 1995, a year after the first interim analysis, the Safety Committee got a look at updated results. They were even worse than before.&lt;br /&gt;
&lt;br /&gt;
Many memos and meetings and conference calls and cross-country flights by NCI personnel later, a decision was finally reached in mid-December 1995 to pull the plug on CARET. The decision was approved on December 18. Then everyone adjourned for Christmas holidays. Then on January 12, 1996, letters went out (yes, by &lt;i&gt;snail mail&lt;/i&gt;) to all CARET participants, informing them of the decision to end the study (and the reasons for the decision).&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-bGUyu7Da8es/UXVhR5qGStI/AAAAAAAABig/k3yW7DS_j_E/s1600/CARET-Results.jpg" imageanchor="1"&gt;&lt;img border="0" height="310" src="http://2.bp.blogspot.com/-bGUyu7Da8es/UXVhR5qGStI/AAAAAAAABig/k3yW7DS_j_E/s320/CARET-Results.jpg" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;CARET found increases in all-cause mortality, cardiovascular &lt;br /&gt;
mortality, and (not shown here) lung cancer mortality in high-risk&lt;br /&gt;
individuals who took beta-carotene and retinol. (&lt;i&gt;N Engl J&lt;br /&gt;Med&lt;/i&gt; 1996;334:1150-5.)&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
When CARET's results were published in May 1996 in &lt;i&gt;The New England Journal of Medicine&lt;/i&gt;, the medical world was aghast to learn that administration of beta-carotene and retinol to high-risk individuals actually increased the rate of lung cancer by 28% compared to placebo. Cardiovascular mortality, likewise, went up 26%.&lt;br /&gt;
&lt;br /&gt;
The Finnish ATBC study had found an increase in lung cancer of 18%, based on a beta-carotene consumption of 20 mg/day. In the CARET study, participants had taken a 50% higher dose of beta-carotene (30 mg/day). They got 50% more cancer.&lt;br /&gt;
&lt;br /&gt;
Hindsight is an evil game, and it's easy to bash NCI for not pulling the plug on CARET earlier than it did without knowing all the particulars. But frankly, what's to know? The Finns published their results in April 1994. Anyone could have looked at those results, then looked at the CARET experimental protocol (and study population), and immediately seen the red flags. Even without knowing the unblinded results, it would have been prudent to halt (or at least begin winding down) the study in April 1994, three and a half years ahead of the planned stop date. Instead, a befuddled Safety Committee (and a bureacracy-entrenched National Cancer Institute) waited until January 12, 1996 to send letters by first-class mail to people who were unnecessarily dying. In the irritatingly self-congratulatory account of this travesty in "Stopping the Active Intervention: CARET," we're constantly reminded that the study was terminated 21 months early, as if it's something to be proud of. The fact is, the study was terminated a minimum of two years later than it should have been, due to negligent disregard of the Finnish results (which were known ahead of time to NCI higher-ups). The Finns, too, should have stopped early, as soon as it became apparent that the treatment groups had diverged (in the wrong direction) from the placebo group, which is to say, in Year 5 of the 8-year study.&lt;br /&gt;
&lt;br /&gt;
To argue that it was okay to wait an extra two years to stop the CARET study because the formal stopping criteria had not been met is a phony argument. When CARET finally was stopped, the stopping criterion had &lt;i&gt;still not been met&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
As I said before, I've provided more detail on ATBC and CARET (and other deadly vitamin trials) in &lt;a href="http://bigthink.com/devil-in-the-data/the-dark-side-of-antioxidants" target="_blank"&gt;a separate post at Big Think&lt;/a&gt;. Please read that post before deciding whether to adjust your daily vitamin routine. In the end, you might want to consider scaling back your use of Vitamins E and A if you're at high risk of cancer (and maybe even if you're not at high risk). The evidence says these supplements are harmful for at least some people. I'll post more on the subject here over the next few days.&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
&lt;i&gt;Please share this story with your social media contacts if you found it helpful. And please see the companion post at &lt;/i&gt;&lt;i&gt;&lt;a href="http://bigthink.com/devil-in-the-data/the-dark-side-of-antioxidants"&gt;http://bigthink.com/devil-in-the-data/the-dark-side-of-antioxidants&lt;/a&gt;. Thanks!&lt;/i&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/1062824868917473502/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/04/when-vitamins-turn-deadly_22.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1062824868917473502?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1062824868917473502?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/04/when-vitamins-turn-deadly_22.html" title="When Vitamins Turn Deadly" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-bGUyu7Da8es/UXVhR5qGStI/AAAAAAAABig/k3yW7DS_j_E/s72-c/CARET-Results.jpg" height="72" width="72" /><thr:total>3</thr:total></entry><entry gd:etag="W/&quot;CEUEQXs9cCp7ImA9WhBVFE4.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-3571754549400501524</id><published>2013-04-20T00:30:00.000-04:00</published><updated>2013-04-20T00:30:00.568-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-20T00:30:00.568-04:00</app:edited><title>Is Aging Caused by Oxidative Stress?</title><content type="html">Everybody wants to know what causes aging, and what can be done about it. As it turns out, we know a lot about aging. But we know very little about how to prevent it. &lt;br /&gt;
&lt;br /&gt;
Of all the theories of aging that have been proposed, the most thoroughly researched, by far, is the &lt;a href="http://en.wikipedia.org/wiki/Free_radical_theory" target="_blank"&gt;Free Radical Theory of Aging&lt;/a&gt;, which is more properly now called the Oxidative Stress Theory of Aging. It's been 60 years since &lt;a href="http://en.wikipedia.org/wiki/Denham_Harman" target="_blank"&gt;Denham Harman&lt;/a&gt; first proposed that senescence is driven by the buildup of oxidative damage to DNA and other biological macromolecules. Since then, extensive research has verified repeatedly that as tissues age, they do, in fact, accumulate a wide variety of types of oxidative damage. &lt;br /&gt;
&lt;br /&gt;
It's because of the Oxidative Stress Theory of Aging that you see so many foods these days labeled "Rich in Antioxidants." Supposedly, antioxidants (like Vitamin E and beta carotene) confer resistance to heart disease and other ailments. In the U.S., food (and supplement) makers are forbidden by &lt;a href="http://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfcfr/CFRSearch.cfm?fr=101.54" target="_blank"&gt;law&lt;/a&gt; from making specific therapeutic claims around antioxidants. But this restriction is actually a boon for marketers, because the nameless benefits that accrue to antioxidants, whatever they are, will be conjured in the buyer's mind with much greater force and power than anything a label or an ad could possibly say, the same way a person watching a horror movie will make a monster even scarier (in his or her imagination) if the monster isn't actually &lt;i&gt;shown &lt;/i&gt;on screen.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-E_tq9cXG_QM/UW_2-huL4nI/AAAAAAAABiQ/0Iyrsd19RJw/s1600/RiceCrispies.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="263" src="http://1.bp.blogspot.com/-E_tq9cXG_QM/UW_2-huL4nI/AAAAAAAABiQ/0Iyrsd19RJw/s400/RiceCrispies.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;In 2010, the Federal Trade Commission made Kellogg's stop using the "Immunity" claim (tied to antioxidants) on its cereal packaging. The packaging you see here was discontinued. Notice that the "25%" badge seems to imply that there is a "Daily Value" for antioxidants. There is no such thing.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
The bogeyman, in this case, goes by the name "&lt;a href="http://en.wikipedia.org/wiki/Reactive_oxygen_species" target="_blank"&gt;reactive oxygen species&lt;/a&gt;" (ROS), which covers a lot of ground, chemically. Originally, ROS meant free radicals—breakdown products of peroxides. Which is a perfect bogeyman, because free radicals are so fleeting their concentrations in living cells can't even be measured. (They're so chemically reactive that they last for only a nanosecond or so.) These days, ROS can also refer to superoxides, aldehydes (e.g., formaldehyde), and/or anything else that's reactive and contains oxygen. (Again, a conveniently vague category of "scary stuff.")&lt;br /&gt;
&lt;br /&gt;
The way bogeyman research works in science is, when a new mediator of tissue damage is first identified (for example: nitric oxide, superoxide anion, prostaglandins, leukotrienes, interleukin-6, interleukin-8, tumor necrosis factor alpha) researchers rush to measure it in a host of human and/or animal diseases. Soon, the molecule in question is "implicated" in the pathogenesis of various diseases. &lt;br /&gt;
&lt;br /&gt;
The problem is, finding that XYZ bogey-molecule is &lt;i&gt;implicated &lt;/i&gt;in this or that disease is not the same thing as finding that it &lt;i&gt;causes &lt;/i&gt;the disease. Free radicals have been &lt;i&gt;implicated &lt;/i&gt;in over a hundred diseases (Gutteridge, JMC, "Free radicals in disease processes: a compilation of cause and consequence," &lt;i&gt;Free Radic Res Commun&lt;/i&gt; 1993;19:141-58). They're the &lt;i&gt;cause &lt;/i&gt;of none of them.&lt;br /&gt;
&lt;br /&gt;
Until recently, it's been impossible to show a cause-effect relationship between oxidative stress and aging. We know the two are linked, but we don't know if it's in causal fashion.&lt;br /&gt;
&lt;br /&gt;
Recent work with genetically modified mice may have finally provided some much-needed clarification on the role of oxidative stress in aging. I'm referring to work done by Viviana Pérez and her colleagues at the University of Texas Health Science Center in San Antonio, Texas, &lt;a href="http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2789432/pdf/nihms-123940.pdf" target="_blank"&gt;published in 2009&lt;/a&gt;. Pérez looked&lt;a href="http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2789432/pdf/nihms-123940.pdf" target="_blank"&gt;&lt;/a&gt; into the life-extending (or -reducing) effects of various mutations involving oxidative enzymes in mice, the idea being that if you knock out certain oxidative-damage-repair genes, mice should age prematurely (if they live at all), whereas if you amplify or upregulate certain damage-repair genes, mice should show fewer signs of aging (and maybe live longer).&lt;br /&gt;
&lt;br /&gt;
The Pérez results take a while to explain, but it's worth looking at carefully, because it sheds much-needed light on the question of whether aging is actually caused by oxidative damage (or the converse). &lt;br /&gt;
&lt;br /&gt;
When the Pérez team looked at genetically modified mice that lacked &lt;i&gt;glutathione peroxidase 1&lt;/i&gt; (Gpx1, a major scavenger of intracellular peroxides), they found, surprisingly, that the mice lived to normal age and showed no pathology. However, mice deficient in &lt;i&gt;glutathione peroxidase 4 &lt;/i&gt;(genetic marker Gpx4) were embryonic-lethal. The latter finding tends to support Free Radical (Oxidative Stress) Theory.
&lt;br /&gt;
&lt;br /&gt;
An enzyme called &lt;i&gt;methionine sulfoxide reductase-A&lt;/i&gt; (MsrA) repairs oxidized methionine residues in proteins and may also function as a general antioxidant.  Pérez &lt;i&gt;et al.&lt;/i&gt; found that mice null for MsrA lived a normal lifespan even though they showed some additional sensitivity to oxidative stress.&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;Thioredoxin 2&lt;/i&gt; (Trx2) plays an important role in repairing oxidation of cysteine residues in proteins. It turns out Trx2-null mice are embryonic-lethal, but Trx2 +/- (heterozygous) mice, with just one copy of the gene instead of the normal two, had 16% longer maximum lifespan.&lt;br /&gt;
&lt;br /&gt;
There are two major superoxide dismutases that break down superoxides in cells: CuZnSOD and MnSOD (genetic markers SOD1 and SOD2). Pérez &lt;i&gt;et al.&lt;/i&gt; found that mice lacking the former suffer a 30% reduction in mean and maximum lifespan. Mice lacking the latter die within days of birth.&lt;br /&gt;
&lt;br /&gt;
To recap so far: Mice null for SOD2 or Gpx4 are non-viable, while those null for SOD1 have 30% shorter lives. These results tend to support Free Radical Theory. But knockout mice lacking MsrA or Trx2 live normal lifespans, which contradicts Free Radical Theory.&lt;br /&gt;
&lt;br /&gt;
How are we to interpret these results? One problem with knockout studies is that if a certain chemical reaction doesn't occur (because the responsible enzyme system is taken away—"knocked out" genetically), it's difficult to know whether resulting harm to the host is due to a buildup of unreacted precursor molecules, or (rather) the absence of crucial end-products. The end-products of the reaction might be vital to downstream metabolism. It might &lt;i&gt;not &lt;/i&gt;simply be that the precursors to the reaction are toxic. After all, if hydrogen peroxide (the end-product of superoxide dismutases) is an important signalling molecule, as &lt;a href="http://5mp.eu/fajlok/bokkon-brain-imagery/role_of_reactive_oxygen_species_in_cell_signalling_pathways._www.5mp.eu_.pdf" target="_blank"&gt;recent work seems to indicate&lt;/a&gt;, you would &lt;i&gt;expect &lt;/i&gt;abnormalities in SOD1 or SOD2 to be harmful indeed—for reasons having nothing to do with aging.&lt;br /&gt;
&lt;br /&gt;
Bottom line, the finding that mice lacking SOD1, SOD2, or Gpx4 are unhealthy is not sufficient to vindicate the Oxidative Stress Theory.&lt;br /&gt;
&lt;br /&gt;
The ultimate test for Oxidative Stress Theory would be to see whether mice show fewer signs of aging (e.g., less DNA damage with age)—and &lt;i&gt;actually live longer—&lt;/i&gt;when enzymes involved in combating oxidative stress are &lt;i&gt;increased &lt;/i&gt;(over-expressed).&lt;i&gt; &lt;/i&gt;The  Pérez team tried exactly this approach.&lt;br /&gt;
&lt;br /&gt;
As mentioned before, there are two major superoxide dismutases that break down superoxides in cells: CuZnSOD and MnSOD (genetic markers SOD1 and SOD2). When mice were made to over-express SOD1 (so that they had &lt;i&gt;two to five times&lt;/i&gt; the normal activity of the CuZnSOD enzyme), the mice were indeed more resistant to oxidative stress as measured by standard tests involving tolerance of paraquat and diquat. But the mice &lt;i&gt;lived no longer than ordinary mice.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The same was observed for mice that over-expressed SOD2.&lt;br /&gt;
&lt;br /&gt;
When  Pérez &lt;i&gt;et al.&lt;/i&gt;created mice that over-expressed &lt;a href="http://asserttrue.blogspot.com/2013/04/hydrogen-peroxide-and-scientific-dogma.html" target="_blank"&gt;catalase&lt;/a&gt; (the enzyme that degrades hydrogen peroxide to water and oxygen), they found the mice were less prone to DNA damage—&lt;i&gt;but lived no longer than normal.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In mice with upregulated glutathione 4, enhanced protection against various kinds of oxidative stress was demonstrated. But the mice lived no longer than normal wild-type animals.&lt;br /&gt;
&lt;br /&gt;
The Pérez group also tried over-expressing more than one antioxidative gene at once. No combination produced any lifespan extension.&lt;br /&gt;
&lt;br /&gt;
To recap: mice &lt;i&gt;do not live longer&lt;/i&gt; when they over-express antioxidant enzymes (singly or in combinations), &lt;i&gt;even though they show heightened protection against DNA damage, lipid damage, and other typical signatures of oxidative stress.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Pérez &lt;i&gt;et al. &lt;/i&gt;concluded:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;We believe the fact that the lifespan was not altered in the majority [of] the knockout/transgenic mice is strong evidence against oxidative stress/damage playing a major role in the molecular mechanism of aging in mice.&lt;/span&gt;&lt;/blockquote&gt;
It's hard to disagree with that conclusion. Some of the genetic manipulations Pérez &lt;i&gt;et al. &lt;/i&gt;tried were inspired by fruit-fly experiments that gave much more encouraging results. But mammals are not fruit flies. And it seems unlikely to me that the results reported by Pérez &lt;i&gt;et al. &lt;/i&gt;are some kind of fluke, limited to mice. (It seems unlikely that entirely different results would be found in humans.) Altogether, Pérez &lt;i&gt;et al. &lt;/i&gt;tried 18 different genetic manipulations. Not one extended the life of mice.&lt;br /&gt;
&lt;br /&gt;
To me, it means we can put the oxidative-stress bogeyman to bed now, and go on to worry about other things. Whatever's keeping us from living to be 120, it's not oxidative stress.&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
&lt;br /&gt;
&lt;hr /&gt;
&lt;i&gt;For more on this subject, see my &lt;a href="http://bigthink.com/devil-in-the-data/who-needs-antioxidants-no-one-actually" target="_blank"&gt;recent post at Big Think&lt;/a&gt;.&lt;/i&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/3571754549400501524/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/04/is-aging-caused-by-oxidative-stress.html#comment-form" title="5 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/3571754549400501524?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/3571754549400501524?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/04/is-aging-caused-by-oxidative-stress.html" title="Is Aging Caused by Oxidative Stress?" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-E_tq9cXG_QM/UW_2-huL4nI/AAAAAAAABiQ/0Iyrsd19RJw/s72-c/RiceCrispies.png" height="72" width="72" /><thr:total>5</thr:total></entry><entry gd:etag="W/&quot;AkYMRns9fSp7ImA9WhBVE00.&quot;"><id>tag:blogger.com,1999:blog-21557504.post-1667299625295335176</id><published>2013-04-18T00:30:00.000-04:00</published><updated>2013-04-18T14:03:07.565-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-18T14:03:07.565-04:00</app:edited><title>Hydrogen Peroxide and Scientific Dogma</title><content type="html">&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-m4IhJGJftmQ/UWVqTQOFkFI/AAAAAAAABiA/sUD3WUonYmI/s1600/catalase.png" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-m4IhJGJftmQ/UWVqTQOFkFI/AAAAAAAABiA/sUD3WUonYmI/s320/catalase.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;i&gt;The catalase test is simple: Add a drop of hydrogen &lt;/i&gt;&lt;br /&gt;
&lt;i&gt;peroxide 
to a sample of bacteria on a microscope slide and see&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;if it fizzes. 
Anaerobes (even aerotolerant ones, such as Streptococcus &lt;/i&gt;&lt;br /&gt;
&lt;i&gt;pyogenes) won't fizz
because they lack catalase. 
Aerobic bacteria &lt;/i&gt;&lt;br /&gt;
&lt;i&gt;produce oxygen
bubbles, as on the right.&lt;/i&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;span class="alegreya"&gt;
I remember the first time I was introduced (as a young bacteriology student) to the catalase test. You scrape a colony of bacteria off the surface of an agar dish, rub it onto a microscope slide, then take an ordinary eyedropper filled with 3% hydrogen peroxide and drop a big fat drop of liquid onto the little slimy smudge of bacteria. If the smudge begins to fizz vigorously, like Alka Seltzer, the bacteria are catalase-positive. If no fizzing happens, they're catalase-negative.&lt;br /&gt;
&lt;br /&gt;
The fizzing happens because of an enzyme called &lt;a href="http://en.wikipedia.org/wiki/Catalase" target="_blank"&gt;catalase&lt;/a&gt; that promotes the conversion of hydrogen peroxide to water and molecular oxygen:&lt;/span&gt;
&lt;span class="alegreya"&gt;&lt;br /&gt;
&lt;/span&gt;&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;span style="color: #cc0000;"&gt;&lt;span class="alegreya"&gt;2 H&lt;sub&gt;2&lt;/sub&gt;O&lt;sub&gt;2&lt;/sub&gt; → 2 H&lt;sub&gt;2&lt;/sub&gt;O + O&lt;sub&gt;2&lt;/sub&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span class="alegreya"&gt;
&lt;br /&gt;
For the last hundred years, every student of bacteriology has been taught that the reason aerobic bacteria (and indeed all aerobic life forms, up to and including humans) have catalase is that hydrogen peroxide is severely toxic and must be gotten rid of, lest it form highly reactive hydroxyl radicals:&lt;br /&gt;

&lt;/span&gt;&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;span style="font-family: Georgia,&amp;quot;Times New Roman&amp;quot;,serif;"&gt;&lt;span class="alegreya"&gt;&amp;nbsp;&lt;span style="color: #cc0000;"&gt;H&lt;sub&gt;2&lt;/sub&gt;O&lt;sub&gt;2&lt;/sub&gt; → OH + OH&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;span class="alegreya"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span class="alegreya"&gt;
The OH radicals, being extremely reactive chemically, will attack just about anything: DNA, RNA, proteins, lipids, mucopolysaccharides, what have you. Hydroxyl radicals are toxic. Catalase provides a way of neutralizing peroxides so that no radicals can form.&lt;br /&gt;
&lt;br /&gt;
Anaerobic organisms, the story goes, &lt;i&gt;lack&lt;/i&gt; catalase because they live in oxygen-poor environments where things like peroxides don't form. So-called &lt;i&gt;strict anaerobes&lt;/i&gt; are actually killed&lt;i&gt; &lt;/i&gt;by exposure to air. The reason they die when they come in contact with air (supposedly) is that in the presence of oxygen they experience an endogenous buildup of peroxides that (in the absence of catalase) go on to form toxic free radicals that, in turn, eventually cause damage to DNA, proteins, lipids, and other macromolecules.&lt;br /&gt;
&lt;br /&gt;
That's the official dogma on peroxides, free radicals, and catalase.&lt;br /&gt;
&lt;br /&gt;
The only trouble is, as with so much other dogma in this world, it's completely wrong. &lt;br /&gt;
&lt;br /&gt;
And it would all be harmless prattle if it just applied to bacteria. But unfortunately, this bit of assumption-laden dogma about peroxides and free radicals leads to some rather fanciful notions about the role of "oxidative stress" in ordinary metabolism. The notion that peroxides and free radicals  (so-called Reactive Oxygen Species) are &lt;i&gt;harmful &lt;/i&gt;has led to the spending of billions of research dollars on"oxidative stress" and ways to stave off "oxidative damage" to DNA, proteins, etc. It has led to billions of dollars of &lt;a href="http://bigthink.com/devil-in-the-data/who-needs-antioxidants-no-one-actually" target="_blank"&gt;misleading advertising around foods "rich in antioxidants."&lt;/a&gt; (And it has actually led to clinical trials of antioxidants like beta carotene and Vitamin E in which people died needlessly, something I'll discuss in more detail in a future post. For now, you might want to refer to &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/8602180?dopt=Citation" target="_blank"&gt;this paper&lt;/a&gt;.)&lt;br /&gt;
&lt;br /&gt;
Suppose that, rather than accepting the catalase-as-detoxifier/peroxides-as-evil theory at face value, we ask some fundamental questions. Such as:&lt;br /&gt;
&lt;/span&gt;&lt;br /&gt;
&lt;ul&gt;&lt;span class="alegreya"&gt;
&lt;li&gt;What's the &lt;i&gt;evidence &lt;/i&gt;that anaerobes exposed to air actually die of &lt;i&gt;peroxide poisoning?&lt;/i&gt;&amp;nbsp;&lt;/li&gt;
&lt;li&gt;If peroxides are poisonous, why are there no anaerobes that have catalase? (If there's survival value for &lt;i&gt;aerobes &lt;/i&gt;to have catalase, surely there's even more survival value for an &lt;i&gt;anaerobe &lt;/i&gt;to have it?)&lt;/li&gt;
&lt;li&gt;If catalase exists to protect aerobic cells from peroxide poisoning, then we should expect catalase-knockout mutations to be lethal, or at least gravely deleterious, yes?&lt;/li&gt;
&lt;/span&gt;&lt;/ul&gt;
&lt;br /&gt;
&lt;span class="alegreya"&gt;
Let's review some basic facts. First, hydrogen peroxide is not toxic at low concentrations. It is, in the words of one researcher, "poorly reactive: it does not oxidize most biological molecules including lipids, DNA, and proteins" (Halliwell &lt;i&gt;et al.&lt;/i&gt;, "Hydrogen peroxide: Ubiquitous in cell culture and in vivo?",&lt;i&gt;&amp;nbsp;IUBMB Life,&lt;/i&gt; 50: 251–257, 2000, &lt;a href="http://onlinelibrary.wiley.com/doi/10.1080/713803727/pdf" target="_blank"&gt;PDF here&lt;/a&gt;). &lt;i&gt;Concentrated&lt;/i&gt; hydrogen peroxide is toxic (it's a disinfectant), but at the dilute concentrations found in living cells, hydrogen peroxide isn't doing anything harmful to DNA, proteins, or lipids. The situation is analogous to that of hydrochloric acid. At high concentrations, HCl will eat through skin. Put a couple liters into an 80,000-liter swimming pool, though, and you can drink the stuff. So it is with peroxide.&lt;br /&gt;
&lt;br /&gt;
Secondly, peroxides are ubiquitous in living systems (again see the Halliwell paper). In higher life forms H&lt;sub&gt;2&lt;/sub&gt;O&lt;sub&gt;2 &lt;/sub&gt;is produced &lt;i&gt;in vivo&lt;/i&gt; by monoamine oxidase, xanthine oxidases, various dismutases, and other enzymes, under homeostatic control. There's substantial evidence that hydrogen peroxide is a widely used signalling molecule (see references 21 to 26 in the Halliwell paper) and &lt;a href="http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0057618" target="_blank"&gt;recent work&lt;/a&gt; has shown a role for hydrogen peroxide in reparative neovascularization. Recruitment of immune cells to wounds likewise &lt;a href="http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.1000621" target="_blank"&gt;appears to require hydrogen peroxide&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Far from being toxic, hydrogen peroxide is an important biomolecule, essential to ordinary metabolic processes. There is zero evidence that hydrogen peroxide does anything harmful in living tissues, at the concentrations normally found &lt;i&gt;in vivo&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
If hydrogen peroxide were toxic, we'd expect that a mutation that knocks out catalase would mean certain death for the host organism. After all, with no catalase to break down H&lt;sub&gt;2&lt;/sub&gt;O&lt;sub&gt;2 &lt;/sub&gt;to oxygen and water, hydrogen peroxide would simply accumulate until reaching toxic levels. But it turns out, naturally occurring catalase-negative mutants of &lt;i&gt;Staphylococcus aureas&lt;/i&gt; have been &lt;a href="https://www.jstage.jst.go.jp/article/yoken/65/5/65_439/_pdf" target="_blank"&gt;reported&lt;/a&gt; (laboratory-created catalase-negative mutant strains of &lt;i&gt;E. coli&lt;/i&gt; and other organisms are known as well). Catalase-knockout mice have been created, and they &lt;a href="http://www.jbc.org/content/279/31/32804.short" target="_blank"&gt;develop normally&lt;/a&gt;. Humans lacking normal catalase were first identified in the early 1950s when a Japanese doctor found that pouring hydrogen peroxide onto a patient's infected gums caused no foaming. The catalase-negative condition in humans is known as &lt;a href="http://rd.springer.com/article/10.1007/BF00484026#page-1" target="_blank"&gt;acatalasemia&lt;/a&gt; or acatalasia. It results in no pathology except an increased tendency toward periodontal infection.&lt;br /&gt;
&lt;br /&gt;
Catalase does serve an important function in aerobic organisms (having nothing to do with detoxification). With catalase, the oxygen in hydrogen peroxide can be scavenged for (re)use in respiration. This is important because every oxygen molecule is worth 38 ATP molecules in the aerobic breakdown of glucose. (ATP, &lt;a href="http://en.wikipedia.org/wiki/Adenosine_triphosphate" target="_blank"&gt;adenosine triphosphate&lt;/a&gt;, is the high-energy molecule that powers the chemical machinery of cells. Without ATP, metabolism grinds to a halt.) By comparison, anaerobic breakdown of glucose (which is to say, fermentative breakdown) yields only 2 ATP molecules per sugar molecule. It's very much in the cell's interest to recycle the oxygen from hydrogen peroxide rather than let it go to waste. Catalase makes that reuse possible.&lt;br /&gt;
&lt;br /&gt;
Anaerobic bacteria obtain energy solely from fermentation. They have no use for oxygen. Therefore they have no use for catalase. A catalase gene would simply be extra genetic baggage for an anaerobe. It would confer no survival value.&lt;br /&gt;
&lt;br /&gt;
It's astonishing to me that the catalase myth (the version of the myth that says catalase exists to &lt;i&gt;detoxify&lt;/i&gt; hydrogen peroxide so as to keep the cell from dying of free-radical-induced damage) has survived for well over a hundred years without anyone questioning it. I see the myth repeated all over the Internet as if it's Gospel. Never do I see any substantiating research referenced in support of it. It's just propagated from one unquestioning drone to another.&lt;br /&gt;
&lt;br /&gt;
Why? Why do myths like this take hold in science? Why do otherwise intelligent scientists cling to them and perpetuate them, regurgitating them in textbooks and handing them down to new generations of students?&lt;br /&gt;
&lt;br /&gt;
I think the answer is,&lt;i&gt; because it makes a good story&lt;/i&gt;, and as human beings we value a good story more than we value &lt;i&gt;checking out&lt;/i&gt; the story to see if it's true (providing it's a suitably satisfying story).&lt;br /&gt;
&lt;br /&gt;
Before there was science, stories were all the human race had as a way of trying to understand the universe. If someone told a good enough story, and&amp;nbsp; the story provided a satisfying-enough explanation of something, the story endured. Some of humankind's most cherished stories have survived for thousands of years. They survive, in many cases, even if they're not verifiably true.&lt;br /&gt;
&lt;br /&gt;
With science, the &lt;i&gt;theory &lt;/i&gt;is the unit of storytelling. If a theory seems to fit the facts, it's accepted. If, on closer inspection, a story is found &lt;i&gt;not &lt;/i&gt;to fit the facts, it will &lt;i&gt;still be accepted by many people, if it's a satisfying enough story.&amp;nbsp;&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The ancient Greeks knew the earth was not flat. (According to Diogenes Laertius, "Pythagoras was the first who called the earth round; though Theophrastus attributes this to Parmenides, and Zeno to Hesiod.") Nevertheless it took centuries for flat-earth theory to fall into disrepute, and even to this day there are &lt;a href="http://www.lhup.edu/~dsimanek/flat/flateart.htm" target="_blank"&gt;people&lt;/a&gt; who find flat-earth theory satisfying.&lt;br /&gt;
&lt;br /&gt;
So it is with scientific theories. Good stories (and that's all scientific theories are: stories) have staying power. Even when they're wrong.&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;For more on the Oxidative Stress Theory of Aging (and why it's wrong), see &lt;a href="http://bigthink.com/devil-in-the-data/who-needs-antioxidants-no-one-actually" target="_blank"&gt;my blog at Big Think&lt;/a&gt;.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;The views expressed here are entirely my own, not those of my employer.&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://asserttrue.blogspot.com/feeds/1667299625295335176/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://asserttrue.blogspot.com/2013/04/hydrogen-peroxide-and-scientific-dogma.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1667299625295335176?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/21557504/posts/default/1667299625295335176?v=2" /><link rel="alternate" type="text/html" href="http://asserttrue.blogspot.com/2013/04/hydrogen-peroxide-and-scientific-dogma.html" title="Hydrogen Peroxide and Scientific Dogma" /><author><name>Kas Thomas</name><uri>http://www.blogger.com/profile/10019988763491638199</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="29" height="32" src="http://4.bp.blogspot.com/-jwpU0fLihHQ/TmxUHqlPJuI/AAAAAAAAAs4/ZCDBSd4oUmM/s220/Kas%2Btiny.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-m4IhJGJftmQ/UWVqTQOFkFI/AAAAAAAABiA/sUD3WUonYmI/s72-c/catalase.png" height="72" width="72" /><thr:total>3</thr:total></entry></feed>
