<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;DEYASX49fCp7ImA9WhRbFUg.&quot;"><id>tag:blogger.com,1999:blog-28219072</id><updated>2012-02-06T11:22:28.064-08:00</updated><category term="Dynamic Programming" /><category term="Random DNA/Protein Sequence permutation" /><category term="Variants" /><category term="Imputation" /><category term="Bayesian Probability" /><category term="Inteins Exeins" /><category term="Contig Assembly" /><category term="Sequence Alignment" /><category term="N50" /><category term="Heritability" /><title>genomics</title><subtitle type="html" /><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://genomics-array.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>77</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/blogspot/BUfNt" /><feedburner:info uri="blogspot/bufnt" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;DEYASX48fCp7ImA9WhRbFUg.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-4836571095311921016</id><published>2012-02-06T11:22:00.001-08:00</published><updated>2012-02-06T11:22:28.074-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-06T11:22:28.074-08:00</app:edited><title>Free SVN book!!!!!!</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
A free svn book is here: http://svnbook.red-bean.com/en/1.7/index.html&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-4836571095311921016?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/xugmAHKE6ca0yoJ2qJk1auVXT5E/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/xugmAHKE6ca0yoJ2qJk1auVXT5E/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/xugmAHKE6ca0yoJ2qJk1auVXT5E/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/xugmAHKE6ca0yoJ2qJk1auVXT5E/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/rT0Gf63eN-A" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/4836571095311921016/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=4836571095311921016" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4836571095311921016?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4836571095311921016?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/rT0Gf63eN-A/free-svn-book.html" title="Free SVN book!!!!!!" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2012/02/free-svn-book.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEACSH89eCp7ImA9WhRWF0o.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-1629001294665974145</id><published>2012-01-05T07:17:00.001-08:00</published><updated>2012-01-05T07:19:29.160-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-05T07:19:29.160-08:00</app:edited><title>Awesomest video!!</title><content type="html">&lt;object width = "768" height = "432" &gt; &lt;param name = "movie" value = "http://www-tc.pbs.org/s3/pbs.videoportal-prod.cdn/media/swf/PBSPlayer.swf" &gt; &lt;/param&gt;&lt;param name="flashvars" value="width=768&amp;height=432&amp;video=1506740590&amp;player=viral&amp;end=0&amp;lr_admap=in:warnings:0;in:pbs:0" /&gt; &lt;param name="allowFullScreen" value="true"&gt;&lt;/param &gt; &lt;param name = "allowscriptaccess" value = "always" &gt; &lt;/param&gt;&lt;param name="wmode" value="transparent"&gt;&lt;/param &gt;&lt;embed src="http://www-tc.pbs.org/s3/pbs.videoportal-prod.cdn/media/swf/PBSPlayer.swf" flashvars="width=768&amp;height=432&amp;video=1506740590&amp;player=viral&amp;end=0&amp;lr_admap=in:warnings:0;in:pbs:0" type="application/x-shockwave-flash" allowscriptaccess="always" wmode="transparent" allowfullscreen="true" width="768" height="432" bgcolor="#000000"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;p style="font-size:11px; font-family:Arial, Helvetica, sans-serif; color: #808080; margin-top: 5px; background: transparent; text-align: center; width: 512px;"&gt;Watch &lt;a style="text-decoration:none !important; font-weight:normal !important; height: 13px; color:#4eb2fe !important;" href="http://video.pbs.org/video/1506740590" target="_blank"&gt;RNAi&lt;/a&gt; on PBS. See more from &lt;a style="text-decoration:none !important; font-weight:normal !important; height: 13px; color:#4eb2fe !important;" href="http://www.pbs.org/wgbh/nova/sciencenow/" target="_blank"&gt;NOVA scienceNOW.&lt;/a&gt;&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-1629001294665974145?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/aLL7eT8pAyGx_e-Ukbx1mzC0vJ0/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/aLL7eT8pAyGx_e-Ukbx1mzC0vJ0/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/aLL7eT8pAyGx_e-Ukbx1mzC0vJ0/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/aLL7eT8pAyGx_e-Ukbx1mzC0vJ0/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/3dk4r3WvJL8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/1629001294665974145/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=1629001294665974145" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/1629001294665974145?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/1629001294665974145?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/3dk4r3WvJL8/awesomest-video.html" title="Awesomest video!!" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2012/01/awesomest-video.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEYDSX8-eSp7ImA9WhRWFEo.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-4375722922656458880</id><published>2011-12-28T13:07:00.000-08:00</published><updated>2012-01-01T18:42:58.151-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-01T18:42:58.151-08:00</app:edited><title>Calling a perl subroutine from PHP scripts</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
I spent a good part of my Christmas vacation in figuring out how to &lt;b&gt;call a perl subroutine from a PHP script&lt;/b&gt;. There are several reasons why you would like to do that. The first and foremost may be because you don't want to replicate all your perl subroutines to PHP in order to use it. The other issues may be incompatibilities. The one I face is on incompatibility of my PHP version to run oracle queries which can only be solved at the sys admin level.On the other hand the perl/CGI interface for oracle just works fine.&lt;br /&gt;
&lt;br /&gt;
There are 3 levels this task can be achieved: &lt;br /&gt;
1. We will see how to pass absolute values to perl subroutines.&lt;br /&gt;
2. Pass variables to perl subroutines and&lt;br /&gt;
3. Collect return values from the perl subroutine&lt;br /&gt;
&lt;br /&gt;
&lt;u&gt;Following are some points to be remembered:&lt;/u&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
1. If your perl subroutines are packed into perl packages then they are good to go (e.g; The file should begin with a package "name"; header and the end of the file should have a 1; )&lt;br /&gt;
2. &lt;u&gt;Do not&lt;/u&gt; use &amp;lt;include "package name"&amp;gt; inside the PHP script.&lt;br /&gt;
3. Initialize a string with perl commands e.g; $command='perl -MpackageName -e "packageName::subroutine(arg,arg,arg)"'&lt;br /&gt;
4. Call system($command); from the php script. &lt;u&gt;Do not use &lt;/u&gt;backticks (`)&lt;br /&gt;
&lt;br /&gt;
Here is an example of passing &lt;u&gt;absolute value&lt;/u&gt; to perl subroutine:&lt;br /&gt;
&lt;br /&gt;
##Package Test ###&lt;br /&gt;
#!usr/bin/perl -w&lt;br /&gt;
package Test;&lt;br /&gt;
&lt;br /&gt;
sub printNames&lt;br /&gt;
{&lt;br /&gt;
my $name1 = shift;&lt;br /&gt;
my $name2 = shift;&lt;br /&gt;
&lt;br /&gt;
print "The names are $name1 and $name2\n";&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
1;&lt;br /&gt;
&lt;br /&gt;
## save it as Test.pm&lt;br /&gt;
&lt;br /&gt;
&lt;u&gt;Level1: Passing an absolute value into the perl subroutine:&lt;/u&gt;&lt;br /&gt;
&lt;br /&gt;
# test.php&lt;br /&gt;
&amp;lt;?&lt;br /&gt;
$command='perl -MTest -e "Test::printNames(Guest1,Guest2)"';&lt;br /&gt;
system($command);&lt;br /&gt;
?&amp;gt;&lt;br /&gt;
#Open browser and run test.php :&lt;br /&gt;
&lt;br /&gt;
The names are Guest1 and Guest2&lt;br /&gt;
&lt;br /&gt;
 &lt;br /&gt;
&lt;br /&gt;
&lt;u&gt;Level2: Passing a PHP variable into the perl subroutine:&lt;/u&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;u&gt;&amp;lt;? &lt;/u&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
$arg &amp;nbsp; ="guest1 and guest2";&lt;br /&gt;
$arg1 ="guest3 and guest4";&lt;br /&gt;
&lt;br /&gt;
$command = "perl -MTest -e 'Test::printNames("$arg","$arg1")'";&lt;br /&gt;
system($command);&lt;br /&gt;
&lt;br /&gt;
?&amp;gt;&lt;br /&gt;
&lt;br /&gt;
# Open browser and run the command:&lt;br /&gt;&lt;br /&gt;
&lt;br /&gt;The names are guest1 and guest2 and guest3 and guest4&lt;br /&gt;
&lt;br /&gt;
&lt;u&gt;Level3: Collecting the return values from perl subroutine as PHP array&lt;/u&gt;&lt;br /&gt;
&lt;br /&gt;
Instead of running PHP "system" command, run "exec". Print the outputs from inside the perl subroutine, that can be captured by exec. Now the perl subroutine will undergo slight modification:&lt;br /&gt;
&lt;br /&gt;
##Package Test ###&lt;br /&gt;
#!usr/bin/perl -w&lt;br /&gt;
package Test;&lt;br /&gt;
&lt;br /&gt;
sub calculateVal&lt;br /&gt;
{&lt;br /&gt;
my $val1 = shift;&lt;br /&gt;
my $val2 = shift;&lt;br /&gt;
&lt;br /&gt;
$val1 *= 20;&lt;br /&gt;
$val2 /= 3; &lt;br /&gt;
&lt;br /&gt;
print $val1;&lt;br /&gt;
print $val2; &lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
1;&lt;br /&gt;
&lt;br /&gt;
---&lt;br /&gt;
 &lt;br /&gt;
&lt;u&gt;&amp;lt;? &lt;br /&gt;
  &lt;/u&gt;&lt;br /&gt;

&lt;br /&gt;
&lt;br /&gt;


$arg &amp;nbsp; =10;&lt;br /&gt;
$arg1 =300;&lt;br /&gt;
&lt;br /&gt;
$command = "perl -MTest -e 'Test::printVal($arg,$arg1)'";&lt;br /&gt;
$out = array(); &lt;br /&gt;
$tmp = exec($command,$out);&lt;br /&gt;
print_r($out);&lt;br /&gt;
?&amp;gt;&lt;br /&gt;
&lt;br /&gt;
# Output&lt;br /&gt;
&lt;br /&gt;
Array&amp;nbsp; &lt;br /&gt;
(&lt;br /&gt;
[0] =&amp;gt; 200&lt;br /&gt;
[1] =&amp;gt; 100&lt;br /&gt;
)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
NOTE: Multidimensional perl arrays can also be passed by printing the value from inside the perl subroutine. &lt;br /&gt;
 &lt;br /&gt;
 &lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-4375722922656458880?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/UsqioV7mimX2D3JAJEQyiIh6Tew/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/UsqioV7mimX2D3JAJEQyiIh6Tew/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/UsqioV7mimX2D3JAJEQyiIh6Tew/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/UsqioV7mimX2D3JAJEQyiIh6Tew/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/g3d1_nGmkfw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/4375722922656458880/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=4375722922656458880" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4375722922656458880?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4375722922656458880?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/g3d1_nGmkfw/calling-perl-subroutine-from-php.html" title="Calling a perl subroutine from PHP scripts" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/12/calling-perl-subroutine-from-php.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8GRHo8eyp7ImA9WhdaEUs.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-3965041679726340373</id><published>2011-10-20T18:42:00.000-07:00</published><updated>2011-10-20T19:07:05.473-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-20T19:07:05.473-07:00</app:edited><title>Installing SQL Developer</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
This is slightly offtopic for this blog, but nevertheless a very important one. I have decided to blog this one becuase this particular problem dragged for atleast few days for me and after surfing countless number of sites and installing various softwares including harmful ones, I learnt the hard way how to tackle it.&lt;br /&gt;
If you have a 64 bit windows7&amp;nbsp; machine and you are trying your luck installing SQL Developer and failing consistently, this post is for you.&lt;br /&gt;
SQL developer is available at the oracle site here &lt;a href="http://www.oracle.com/technetwork/developer-tools/sql-developer/downloads/index.html"&gt;http://www.oracle.com/technetwork/developer-tools/sql-developer/downloads/index.html&lt;/a&gt;. However, this version does not come with a compatible java version. You may already have java installed in your machine, if not, you may need to follow the instructions from the above mentioned site on how to install java.&lt;br /&gt;
After you downloaded and unpacked SQL Developer, when you click on the sqldeveloper.exe file, it may say "permission denied". This may be because the executable file may not have execution permission. You may like to change that with a chmod 755 &amp;lt;filename&amp;gt; . Then click on the executable file sqldeveloper.exe . It may possibly ask you the java path if it does not find one, just provide the path through the browse button option. I was stuck when the program complained about absence of msvcr100.dll and jvm.dll files . I browsed countless number of sites and in that process installed a melaware and ended up cleaning it later. Be aware, don't download .dll files from anywhere other than safe places. I looked for resources that asked me to download .NET and visual C++ that could solve the lingering msvcr100.dll problem but in vein. I have installed uninstalled .NET and visual C++ few times at least to make sure that the softwares are installed correctly, but that did not work. Since it was looking for this file from Java, I checked the java distribution and finally located it under&amp;nbsp; ProgramsFiles/Java/jdk.7.0_01/bin/ . Copy pasted it under Windows/system32/ . For jvm.dll, many sites including the java site advised that probably the java installation was incorrect. I re-installed java several times after each un-install and it still did not work. Finally I found a safe site at &lt;a href="https://rt4.cceb.med.upenn.edu/crcu_html/jinit/jinit_download.htm"&gt;https://rt4.cceb.med.upenn.edu/crcu_html/jinit/jinit_download.htm&lt;/a&gt; , from where I downloaded jvm.dll. Copy pasted this file under Windows/system32 and it solved the problem!!&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-3965041679726340373?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/HY3auV6YOlChns2crEHbewhBnWA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/HY3auV6YOlChns2crEHbewhBnWA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/HY3auV6YOlChns2crEHbewhBnWA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/HY3auV6YOlChns2crEHbewhBnWA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/m9WruQtBA6A" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/3965041679726340373/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=3965041679726340373" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/3965041679726340373?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/3965041679726340373?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/m9WruQtBA6A/installing-sql-developer.html" title="Installing SQL Developer" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/10/installing-sql-developer.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0ADRHY9fyp7ImA9WhdbGEo.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-8751536905482435885</id><published>2011-10-17T10:15:00.000-07:00</published><updated>2011-10-17T10:16:15.867-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-17T10:16:15.867-07:00</app:edited><title>RGS14 - The protein that makes us forget</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
In this months protein spotlight issue, there is a protein, RGS14 highlighted that makes us filter our memory. Do we not need something that will make us remember things rather forget them? Well, too much un-necessary information stored in brain will certainly make it more chaotic. However, silencing RGS14 in mice makes them smarter, I wonder if same can be said about humans.&lt;br /&gt;
&lt;br /&gt;
The full article is available here &lt;a href="http://web.expasy.org/spotlight/pdf/132/"&gt;http://web.expasy.org/spotlight/pdf/132/&lt;/a&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-8751536905482435885?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/b2gdGmXuKA4bDK2qphRXYfOtlKY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/b2gdGmXuKA4bDK2qphRXYfOtlKY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/b2gdGmXuKA4bDK2qphRXYfOtlKY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/b2gdGmXuKA4bDK2qphRXYfOtlKY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/-cWVsFv1N24" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/8751536905482435885/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=8751536905482435885" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/8751536905482435885?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/8751536905482435885?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/-cWVsFv1N24/rgs14-protein-that-makes-us-to-forget.html" title="RGS14 - The protein that makes us forget" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/10/rgs14-protein-that-makes-us-to-forget.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkYAR3wyeCp7ImA9WhdVF0k.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-311846214413872557</id><published>2011-09-22T20:22:00.000-07:00</published><updated>2011-09-22T20:22:26.290-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-22T20:22:26.290-07:00</app:edited><title>Morph of plant embryo development</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;object width="320" height="266" class="BLOGGER-youtube-video" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0" data-thumbnail-src="http://2.gvt0.com/vi/LL28rMiZIPI/0.jpg"&gt;&lt;param name="movie" value="http://www.youtube.com/v/LL28rMiZIPI&amp;fs=1&amp;source=uds" /&gt;
&lt;param name="bgcolor" value="#FFFFFF" /&gt;
&lt;embed width="320" height="266"  src="http://www.youtube.com/v/LL28rMiZIPI&amp;fs=1&amp;source=uds" type="application/x-shockwave-flash"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;/div&gt;
Awesome video!&lt;/div&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-311846214413872557?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/W-Kh184IP445kuEIgfemRMSlDLI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/W-Kh184IP445kuEIgfemRMSlDLI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/W-Kh184IP445kuEIgfemRMSlDLI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/W-Kh184IP445kuEIgfemRMSlDLI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/VxoQTn8IVBU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/311846214413872557/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=311846214413872557" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/311846214413872557?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/311846214413872557?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/VxoQTn8IVBU/morph-of-plant-embryo-development.html" title="Morph of plant embryo development" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/09/morph-of-plant-embryo-development.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkIESHwzeCp7ImA9WhdVF04.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-405961065370688755</id><published>2011-09-22T15:28:00.001-07:00</published><updated>2011-09-22T15:28:29.280-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-22T15:28:29.280-07:00</app:edited><title>To or Not to with cufflink:</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
&lt;span id="internal-source-marker_0.3013401597640447" style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: bold; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;Cufflink
 is an amazingly easy to install and use software, that lured me into 
using it. However, it is not without its sets of pitfalls... I am still 
researching on the illusive nature of the outputs from this software.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;this software and the types of outputs it produces using different commands:&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;Here are few things to keep in mind before trying to run cufflink&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;1. Cufflink can be run on sorted bam files/ sam files.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;2. It can run in multi-threading mode with a -p option and is much faster than single threading mode.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;3.
 The new cuffmerge program first converts your gtf files derived from 
cuffcompare to sam files, merges them, sorts them, runs cufflink on them
 with a set of hard coded parameters and then runs cuffcompare on the 
finally to give you your merged.gtf file.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;[The
 cufflink commandline option: cufflinks -o ./merged_asm/ -F 0.05 -q 
--overhang-tolerance 200 --library-type=transfrags -A 0.0 
--min-frags-per-transfrag 0 --no-5-extend -p 16 
./merged_asm/tmp/mergeSam_filewvcXTG.]&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;Earlier, I used a route as follows:&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;1. Run cufflink on each of the libraries with a reference gtf and without a reference gtf.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;2.
 Merge the outputs separately using cuffcompare on, with and without 
reference gtf and merge the output with a script to indicate which of 
the transcripts are represented by a gene id. But the potential pitfall 
with this approach is the overlapping and shorter transcripts. This 
clearly is a stumbling block when you are trying to produce an assembled
 transcript.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;In
 the recent cufflink versions, there is a accessory program called as 
cuffmerge, which the manual suggests for merging the individual gtfs. As
 I have mentioned earlier, this is again a wrapper, that internally 
calls cufflink and cuffcompare, albeit with several options already 
pre-set. So, what I did was, merged the bam files generated from 
different libraries, merged them with samtools, sorted with samtools and
 derived sam files from the sorted bam for running cufflink [ Please 
note running the same sorted sam files with a reference gtf file 
suggests it is not sorted, where as without a gtf file runs fine..]&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: bold; text-decoration: none; vertical-align: baseline;"&gt;Output files&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: bold; text-decoration: none; vertical-align: baseline;"&gt;with reference gtf:&lt;/span&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt; &lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt; genes.fpkm.tracking -&amp;gt; has no fpkm information&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;transcripts.gtf -&amp;gt; has fpkm column but meaningless (all 0.0000)&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;isoforms.fpkm_tracking -&amp;gt; similar in size and content with genes.fpkm.tracking&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: bold; text-decoration: none; vertical-align: baseline;"&gt;Without reference gtf:&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;transcripts.gtf -&amp;gt; has transcript and exon information with fpkm and cov values; the co-ordinates are 1 based.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;genes.fpkm.tracking -&amp;gt; has gene info with fpkm. The co-ordinates are 0 based&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;isoform.fpkm_tracking -&amp;gt; same as genes.fpkm.tracking&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: bold; text-decoration: none; vertical-align: baseline;"&gt;Comparison between all-merged-sam with cufflink VS cufflink -&amp;gt; gtf -&amp;gt; cuffcompare:&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;1.
 In both the cases, only a single exon is reported per gene(Since 
cufflink is run after bowtie directly without running tophat, this may 
be the case).&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;2.
 Much less number of transcripts are found in the first case, and they 
are non-overlapping and larger than the later case, where the 
transcripts are short, overlapping. The FPKM is slightly higher than the
 FPKM values in the later case. The first one seems to merge several 
smaller transcripts together.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-family: Arial; font-size: 11pt; font-style: normal; font-variant: normal; font-weight: normal; text-decoration: none; vertical-align: baseline;"&gt;So,
 in essence, if you have various biological replicates of a single 
treatment type, instead of going through the path of running them 
individually with cufflink, followed by merging the results with 
cuffmerge, merge the map bam files first and follow this route. The FPKM
 values are much accurate in this case...&lt;/span&gt;&lt;/div&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-405961065370688755?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/1_WTlSeLLx-ANXzlXbhEraAqF9U/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1_WTlSeLLx-ANXzlXbhEraAqF9U/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/1_WTlSeLLx-ANXzlXbhEraAqF9U/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1_WTlSeLLx-ANXzlXbhEraAqF9U/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/Go8QFRPDaZU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/405961065370688755/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=405961065370688755" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/405961065370688755?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/405961065370688755?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/Go8QFRPDaZU/to-or-not-to-with-cufflink.html" title="To or Not to with cufflink:" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/09/to-or-not-to-with-cufflink.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkIDQX4yfip7ImA9WhdVEU8.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-1791393998563788400</id><published>2011-09-14T08:21:00.000-07:00</published><updated>2011-09-15T14:02:50.096-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-15T14:02:50.096-07:00</app:edited><title>Regulatory regions under represented with NGS methods</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
My new blog post on this subject can be found at SAB blogger web site at:&lt;br /&gt;
&lt;a href="http://products.scienceboard.net/index.php/archives/2011/09/13/916/"&gt;http://products.scienceboard.net/index.php/archives/2011/09/13/916/ &lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-1791393998563788400?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/lPrZT946f1krrdF12IGhiHWiPEU/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/lPrZT946f1krrdF12IGhiHWiPEU/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/lPrZT946f1krrdF12IGhiHWiPEU/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/lPrZT946f1krrdF12IGhiHWiPEU/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/C22Ru4LtADY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/1791393998563788400/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=1791393998563788400" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/1791393998563788400?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/1791393998563788400?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/C22Ru4LtADY/regulatory-regions-under-represented.html" title="Regulatory regions under represented with NGS methods" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/09/regulatory-regions-under-represented.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE8ARX0yfSp7ImA9WhdWFUQ.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-2593225786360220998</id><published>2011-09-08T13:30:00.000-07:00</published><updated>2011-09-09T12:34:04.395-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-09T12:34:04.395-07:00</app:edited><title>JavaScript: Changing drop down Lists</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-AKrsKdzjPAU/TmklnQAoq2I/AAAAAAAABpw/JAkogSs17C0/s1600/temp.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="240" src="http://2.bp.blogspot.com/-AKrsKdzjPAU/TmklnQAoq2I/AAAAAAAABpw/JAkogSs17C0/s320/temp.jpg" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;The select menu should work something like this. Change the first list and the second list also changed&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
Although this is a very small javascript trick, but nevertheless a very useful one! While creating forms, you would sometimes like to change a drop down list dynamically depending on which option was chosen in a earlier list(This may be a radio button, a list itself or anything else)&lt;br /&gt;
Here is a step by step procedure:&lt;br /&gt;
&lt;br /&gt;
1. Write down the names and values for first drop down box e.g; Psojae V1-&amp;gt; name, psv1 -&amp;gt; value; Psojae V5-&amp;gt; name, psv5-&amp;gt; value and so on...&lt;br /&gt;
2. For each name in first drop down list, make a sublist of name value pairs, you want to appear on select: for example, for Psojae V1: PS1-&amp;gt;name; ps1-&amp;gt;value; PS2-&amp;gt;name; ps2-&amp;gt;value AND for Psojae V4: WI1-&amp;gt;name; wi1-&amp;gt;value; WI2-&amp;gt;name; wi2-&amp;gt;value.&lt;br /&gt;
3. Now write a javascript&amp;nbsp; with all these primary lists and sublist name value pairs something&amp;nbsp; like this:&lt;br /&gt;
&amp;lt;script language="javascript"&amp;gt;&lt;br /&gt;
var lists = new Array();&lt;br /&gt;
&lt;br /&gt;
//First List &lt;br /&gt;
lists['psv5']&amp;nbsp;&amp;nbsp;&amp;nbsp; = new Array(); // Notice here you are making a list with the value of first list&lt;br /&gt;
lists['psv5'][0] = new Array( // These are the names you want to appear on the second list&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'WI1',&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'WI2'&lt;br /&gt;
);&lt;br /&gt;
lists['psv5'][1] = new Array( //These are the values you want to pass on from the second list&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'wi1',&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'wi2'&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ); &lt;br /&gt;
//Second List&lt;br /&gt;
lists['psv1']&amp;nbsp;&amp;nbsp;&amp;nbsp; = new Array(); // Notice here you are making a list with the value of first list&lt;br /&gt;
lists['psv1'][0] = new Array( // These are the names you want to appear on the second list&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'PS1',&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'PS2'&lt;br /&gt;
);&lt;br /&gt;
lists['psv1'][1] = new Array( //These are the values you want to pass on from the second list&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'ps1',&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 'ps2'&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; );&lt;br /&gt;
&lt;br /&gt;
4. Write the second sets of javascripts having functions like:&lt;br /&gt;
emptyList, fillList and changeList &lt;br /&gt;
&lt;br /&gt;
function emptyList( box ) {&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; while ( box.options.length ) box.options[0] = null;&lt;br /&gt;
}&lt;br /&gt;
function fillList( box, arr ) {&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for ( i = 0; i &amp;lt; arr[0].length; i++ ) {&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; option = new Option( arr[0][i], arr[1][i] );&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; box.options[box.length] = option;&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; box.selectedIndex=0;&lt;br /&gt;
}&lt;br /&gt;
function changeList( box ) {&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; list = lists[box.options[box.selectedIndex].value];&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; emptyList( box.form.reads ); // Here notice I have given name 'reads' for the form object&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; fillList( box.form.reads, list ); // that is the name tag on the select object in html for the second list&lt;br /&gt;
}&lt;br /&gt;
&amp;lt;/script&amp;gt;&lt;br /&gt;
&lt;br /&gt;
5. Now add the following to the body tag of your html page:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;body onload="changeList(document.forms['nextgen'].reference)"&amp;gt; // Here 'nextgen' is the name of the form. Notice, I add this trigger when the form gets loaded to execute changeList().&lt;br /&gt;
&lt;br /&gt;
The page will be fine at this stage, only problem is you have to refresh it after making a select on the first select drop down. If you don't want that do this last thing:&lt;br /&gt;
&lt;br /&gt;
6.&amp;nbsp; Write the following on the first select tag:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;select name="reference" size=1 onchange="changeList(this)"&amp;gt;&lt;br /&gt;
&lt;br /&gt;
In case, you want to be able to select multiple values from a select list(by shift + ctrl), go ahead and add the following to your second select tab:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;select name="reads" size=N multiple width=M&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Enjoy with javascript!&lt;/div&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-2593225786360220998?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/op_hCVJZkQpv1E4ZH1ZvuXnwusw/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/op_hCVJZkQpv1E4ZH1ZvuXnwusw/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/op_hCVJZkQpv1E4ZH1ZvuXnwusw/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/op_hCVJZkQpv1E4ZH1ZvuXnwusw/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/VUaslga3UX4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/2593225786360220998/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=2593225786360220998" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/2593225786360220998?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/2593225786360220998?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/VUaslga3UX4/javascript-changing-drop-down-lists.html" title="JavaScript: Changing drop down Lists" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-AKrsKdzjPAU/TmklnQAoq2I/AAAAAAAABpw/JAkogSs17C0/s72-c/temp.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/09/javascript-changing-drop-down-lists.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkYARng7eCp7ImA9WhdXE0w.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-3219870825074098222</id><published>2011-08-25T15:09:00.000-07:00</published><updated>2011-08-25T15:09:07.600-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-25T15:09:07.600-07:00</app:edited><title>Biostatistics vs. Lab Research</title><content type="html">&lt;iframe src="http://www.youtube.com/embed/PbODigCZqL8?fs=1" allowfullscreen="" frameborder="0" height="344" width="425"&gt;&lt;/iframe&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-3219870825074098222?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/17uecKhSsvlfBtoFiKfsK1neofQ/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/17uecKhSsvlfBtoFiKfsK1neofQ/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/17uecKhSsvlfBtoFiKfsK1neofQ/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/17uecKhSsvlfBtoFiKfsK1neofQ/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/VfIVmqA0XcI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/3219870825074098222/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=3219870825074098222" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/3219870825074098222?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/3219870825074098222?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/VfIVmqA0XcI/biostatistics-vs-lab-research.html" title="Biostatistics vs. Lab Research" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://img.youtube.com/vi/PbODigCZqL8/default.jpg" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/08/biostatistics-vs-lab-research.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEMGRnY5fCp7ImA9WhdRGEw.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-8190884906540597891</id><published>2011-08-08T07:07:00.000-07:00</published><updated>2011-08-08T07:07:07.824-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-08T07:07:07.824-07:00</app:edited><title>String matching algorithms - An overview</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
String matching algorithms are the most commonly used algorithms in sequence analysis. While we care less about the underlying principle that does the hard work for us, we remain elusive about the output. It helps a great deal if students get to know the algorithm that forms the basis of a search program. Here is a great link that illustrates various algorithms underlying the string search principle along with the C pseudo code. Enjoy...&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://igm.univ-mlv.fr/%7Elecroq/string/index.html"&gt;http://igm.univ-mlv.fr/~lecroq/string/index.html&lt;/a&gt; &lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-8190884906540597891?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/HlLhigupZBy2Nm4aqv2HPVOPm7I/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/HlLhigupZBy2Nm4aqv2HPVOPm7I/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/HlLhigupZBy2Nm4aqv2HPVOPm7I/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/HlLhigupZBy2Nm4aqv2HPVOPm7I/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/kzeyFljAXJs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/8190884906540597891/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=8190884906540597891" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/8190884906540597891?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/8190884906540597891?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/kzeyFljAXJs/string-matching-algorithms-overview.html" title="String matching algorithms - An overview" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/08/string-matching-algorithms-overview.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0YBQn05cSp7ImA9WhdSEkw.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-5272999601584473508</id><published>2011-07-15T19:07:00.000-07:00</published><updated>2011-07-20T20:32:33.329-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-20T20:32:33.329-07:00</app:edited><title>Department of defense Ovarian cancer award proposal</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
My collaborator Prof. Alka Nanda Basu from UNT health Science Center and I go to the same Zumba class. Once while talking, we both discovered that we both are in science and could be potential collaborators. Alaka invited me to present my work at UNT and it was very well received. Since then we have been working together on few grants. One of grant proposal on OCRP from DOD got green signal and we are all set to write the full proposal. As exciting as this news is, another exciting news came from one of her summer students who worked on role of AMP activated Protein Kinase(AMPK) on cancer cells developing resistance to drug cisplatin. The hypothesis that led to a prestigious google award can be summarized as:&lt;br /&gt;
&lt;span style="color: cyan; font-size: small;"&gt;&lt;b style="background-color: black;"&gt;&amp;nbsp;AMPK inhibition may decrease cisplatin resistance 
in ovarian cancer cells that are resistant to the drug.&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div style="background-color: white; color: black;"&gt;
&lt;/div&gt;
&lt;span style="color: cyan; font-size: small;"&gt;&lt;span style="background-color: white; color: black;"&gt;More on google award can be found at google web site at:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;h6 class="uiStreamMessage" data-ft="{&amp;quot;type&amp;quot;:1}"&gt;

&lt;span class="messageBody" data-ft="{&amp;quot;type&amp;quot;:3}"&gt; &lt;a href="https://sites.google.com/site/ampkandcisplatinresistance/home" rel="nofollow" target="_blank"&gt;https://sites.google.com/site/&lt;wbr&gt;&lt;/wbr&gt;​ampkandcisplatinresistance/hom&lt;wbr&gt;&lt;/wbr&gt;​e&lt;/a&gt;&lt;/span&gt;&lt;/h6&gt;
&lt;span style="color: cyan; font-size: small;"&gt;&lt;span style="background-color: white; color: black;"&gt;&amp;nbsp;&lt;/span&gt;&lt;b style="background-color: black;"&gt; &lt;/b&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-5272999601584473508?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/fYEa9hUnhZpnQsJkDaPAKml4fIM/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/fYEa9hUnhZpnQsJkDaPAKml4fIM/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/fYEa9hUnhZpnQsJkDaPAKml4fIM/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/fYEa9hUnhZpnQsJkDaPAKml4fIM/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/1KbxuYSAvzI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/5272999601584473508/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=5272999601584473508" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/5272999601584473508?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/5272999601584473508?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/1KbxuYSAvzI/department-of-defense-ovarian-cancer.html" title="Department of defense Ovarian cancer award proposal" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/07/department-of-defense-ovarian-cancer.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUCQXo7cSp7ImA9WhZbE08.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-7139635520528296950</id><published>2011-06-17T07:01:00.000-07:00</published><updated>2011-06-17T07:17:40.409-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-17T07:17:40.409-07:00</app:edited><title>Feeding twitter rolls into your web site</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;This is the simplest way you could start feeding twitter rolls of your interest into your web site, just replace the word1, word2... with your suitable key words.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;script&amp;gt;&lt;br /&gt;
new TWTR.Widget({&lt;br /&gt;
version: 2,&lt;br /&gt;
type: 'search',&lt;br /&gt;
search: 'Word1 OR word2 OR word3 OR word4',&lt;br /&gt;
interval: 6000,&lt;br /&gt;
title: 'Oomycetes tweet feed',&lt;br /&gt;
subject: 'Tweet Feed',&lt;br /&gt;
width: 250,&lt;br /&gt;
height: 300,&lt;br /&gt;
theme: {&lt;br /&gt;
shell: {&lt;br /&gt;
background: '#8ec1da',&lt;br /&gt;
color: '#ffffff'&lt;br /&gt;
},&lt;br /&gt;
tweets: {&lt;br /&gt;
background: '#ffffff',&lt;br /&gt;
color: '#444444',&lt;br /&gt;
links: '#1985b5'&lt;br /&gt;
}&lt;br /&gt;
},&lt;br /&gt;
features: {&lt;br /&gt;
scrollbar: false,&lt;br /&gt;
loop: true,&lt;br /&gt;
live: true,&lt;br /&gt;
hashtags: true,&lt;br /&gt;
timestamp: true,&lt;br /&gt;
avatars: true,&lt;br /&gt;
toptweets: true,&lt;br /&gt;
behavior: 'default'&lt;br /&gt;
}&lt;br /&gt;
}).render().start();&lt;br /&gt;
&amp;lt;/script&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-7139635520528296950?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/XmpCWVLby3ZHR7btM1D09g1TPK0/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/XmpCWVLby3ZHR7btM1D09g1TPK0/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/XmpCWVLby3ZHR7btM1D09g1TPK0/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/XmpCWVLby3ZHR7btM1D09g1TPK0/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/op4QEwONVDU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/7139635520528296950/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=7139635520528296950" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7139635520528296950?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7139635520528296950?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/op4QEwONVDU/feeding-twitter-rolls-into-your-web.html" title="Feeding twitter rolls into your web site" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/06/feeding-twitter-rolls-into-your-web.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQCQHc6eip7ImA9WhZQFU8.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-6272554469125744918</id><published>2011-04-21T11:53:00.000-07:00</published><updated>2011-04-22T19:06:01.912-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-22T19:06:01.912-07:00</app:edited><title>Easing the informatics bottleneck for transcript alignment and assembly for RNAseq data</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;My new blog post in Scientific Advisory Board..&lt;br /&gt;
&lt;a href="http://products.scienceboard.net/index.php/archives/2011/04/21/671/"&gt;http://products.scienceboard.net/index.php/archives/2011/04/21/671/&lt;/a&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-6272554469125744918?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/0Ra1t2B1vDAj2m29rX89ZiDRQ18/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/0Ra1t2B1vDAj2m29rX89ZiDRQ18/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/0Ra1t2B1vDAj2m29rX89ZiDRQ18/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/0Ra1t2B1vDAj2m29rX89ZiDRQ18/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/rTW1Dc0VRpI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/6272554469125744918/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=6272554469125744918" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/6272554469125744918?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/6272554469125744918?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/rTW1Dc0VRpI/easing-informatics-bottleneck-for.html" title="Easing the informatics bottleneck for transcript alignment and assembly for RNAseq data" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/04/easing-informatics-bottleneck-for.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEYEQXwzeCp7ImA9WhZTF0s.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-9049489961778716010</id><published>2011-03-21T20:55:00.000-07:00</published><updated>2011-03-21T20:55:00.280-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-21T20:55:00.280-07:00</app:edited><title>Analyzing oomycetes RNAseq data - Part 1</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;This is a small tutorial on how to work with large scale RNAseq data with open source softwares. I will be discussing various options for alignment of reads to the reference sequence and little bit on assembly. &lt;br /&gt;
&lt;br /&gt;
[When I started writing this post, tophat did not have colorspace support, but now it does] &lt;br /&gt;
A quicklist of software programs available for nextgen data analysis can be found &lt;a href="http://seqanswers.com/wiki/Software/list"&gt;here&lt;/a&gt;: &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
To begin with bowtie, get the pre compiled binaries from &lt;a href="http://bowtie-bio.sourceforge.net/"&gt;here&lt;/a&gt; .&lt;br /&gt;
&lt;br /&gt;
In order to start with bowtie, you need to build index of your reference sequence. This could be typically a fasta file. For me running command . "./bowtie-build /data/vbi/references/p_sojae.fasta sojaeV4"&lt;br /&gt;
to generate index for a 70 MB genome in a 64 bit machine&amp;nbsp; with dual core and 8 GB chip - took exactly 2 minutes. This command will generate 6 .ebwt files in the working directory. &lt;br /&gt;
ls -l&lt;br /&gt;
&lt;pre&gt;26385951 2010-06-11 15:18 sojaeV4.1.ebwt
9702096  2010-06-11 15:18 sojaeV4.2.ebwt
8711     2010-06-11 15:17 sojaeV4.3.ebwt
19404183 2010-06-11 15:17 sojaeV4.4.ebwt
26385951 2010-06-11 15:19 sojaeV4.rev.1.ebwt
9702096  2010-06-11 15:19 sojaeV4.rev.2.ebwt&amp;nbsp;&lt;/pre&gt;&lt;br /&gt;
Once created make a separate directory called as index/ in the same working area and place all these files in there.&lt;br /&gt;
&lt;br /&gt;
Test installation of index using the following command:&lt;br /&gt;
./bowtie -c index/sojaeV4 ATGGCCGCGAAAAGGTTTCTGAGACGCAACAAG&lt;br /&gt;
and withing fraction of a second you will see following printed to the stdout&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; -&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; super_16&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1314431 CTTGTTGCGTCTCAGAAACCTTTTCGCGGCCATIIIIIII&lt;br /&gt;
IIIIIIIIIIIIIIIIIIIIIIIIII&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&lt;br /&gt;
# reads processed: 1&lt;br /&gt;
# reads with at least one reported alignment: 1 (100.00%)&lt;br /&gt;
# reads that failed to align: 0 (0.00%)&lt;br /&gt;
Reported 1 alignments to 1 output stream(s)&lt;br /&gt;
&lt;br /&gt;
Good thing about bowtie(version 0.12 onwards, it supports colorspace alignment)&lt;br /&gt;
&lt;h2 id="colorspace-alignment"&gt;&lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#TOC-colorspace-alignment"&gt;&lt;/a&gt;&lt;/h2&gt;As of version 0.12.0, &lt;code&gt;bowtie&lt;/code&gt; can  align colorspace reads against a colorspace index when &lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#bowtie-options-C"&gt;&lt;code&gt;-C&lt;/code&gt;&lt;/a&gt;  is specified. Colorspace is the characteristic output format of Applied  Biosystems' SOLiD system.Look at color coding &lt;a href="http://genomics-array.blogspot.com/2010/09/color-space-to-fastq.html"&gt;here&lt;/a&gt; for more details.  See ABI's &lt;a href="http://tinyurl.com/ygnb2gn"&gt;Principles of Di-Base  Sequencing&lt;/a&gt; document for details.&lt;br /&gt;
&lt;h3 id="colorspace-reads"&gt;&lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#TOC-colorspace-reads"&gt;&lt;/a&gt;&lt;/h3&gt;All input formats (FASTA &lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#bowtie-options-f"&gt;&lt;code&gt;-f&lt;/code&gt;&lt;/a&gt;,  FASTQ &lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#bowtie-options-Q"&gt;&lt;code&gt;-q&lt;/code&gt;&lt;/a&gt;,  raw &lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#bowtie-options-r"&gt;&lt;code&gt;-r&lt;/code&gt;&lt;/a&gt;,  tab-delimited &lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#command-line"&gt;&lt;code&gt;--12&lt;/code&gt;&lt;/a&gt;,  command-line &lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#bowtie-options-C"&gt;&lt;code&gt;-c&lt;/code&gt;&lt;/a&gt;)  are compatible with colorspace (&lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#bowtie-options-C"&gt;&lt;code&gt;-C&lt;/code&gt;&lt;/a&gt;).  When &lt;a href="http://bowtie-bio.sourceforge.net/manual.shtml#bowtie-options-C"&gt;&lt;code&gt;-C&lt;/code&gt;&lt;/a&gt;  is specified, read sequences are treated as colors. Colors may be  encoded either as numbers (&lt;code&gt;0&lt;/code&gt;=blue, &lt;code&gt;1&lt;/code&gt;=green, &lt;code&gt;2&lt;/code&gt;=orange,  &lt;code&gt;3&lt;/code&gt;=red) or as characters &lt;code&gt;A/C/G/T&lt;/code&gt; (&lt;code&gt;A&lt;/code&gt;=blue,  &lt;code&gt;C&lt;/code&gt;=green, &lt;code&gt;G&lt;/code&gt;=orange, &lt;code&gt;T&lt;/code&gt;=red).&lt;br /&gt;
Some  reads include a primer base as the first character; e.g.:&lt;br /&gt;
&lt;pre&gt;&lt;code&gt;&amp;gt;1_53_33_F3
T2213120002010301233221223311331
&amp;gt;1_53_70_F3
T2302111203131231130300111123220&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&amp;nbsp;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;What about BFAST and SHRIMP and VMATCH?&lt;/b&gt;&lt;b&gt;&amp;nbsp;&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;These 3 softwares are also good for fast alignment of short reads into the genome.&amp;nbsp;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;BFAST stands for "BLAT-like Fast Accurate Search Tool", and it also supports colorspace&amp;nbsp;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;data. SHRIMP also supports colorspace reads and recently the major improvement is that,&amp;nbsp;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;it can work on small memory computers. VMATCH on the other hand is a software for&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;local alignments and needs licensing. I have not worked extensively on these 3 softwares,&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;so, can't say much about them&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt; &lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;TOPHAT:&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;&amp;nbsp;&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;Is designed to predict the splice junctions after aligning short reads to the reference.&amp;nbsp;&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;It usually works without a reference junction position(GFF) file, but can also work&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;if there is already a known gff file. This program chops the reads into smaller fragments&amp;nbsp;&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;and stitches the alignments later in separating introns from exons. Now tophat has a&amp;nbsp;&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;colorspace support.&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;&amp;nbsp;&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;Results:&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt; &lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_avzLfLUJNqM/TQ_QOQSY0_I/AAAAAAAABjQ/nV9E9VI2iXQ/s1600/NextGenSequencingPresentation.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://2.bp.blogspot.com/_avzLfLUJNqM/TQ_QOQSY0_I/AAAAAAAABjQ/nV9E9VI2iXQ/s320/NextGenSequencingPresentation.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;pre&gt;&lt;code&gt;&lt;b&gt;&amp;nbsp;&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;&lt;code&gt;&lt;b&gt;This result represents alignment percentage of 3 different assembly version of the&lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&lt;b&gt;same organism. &lt;/b&gt;&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
&lt;pre&gt;&lt;code&gt;&amp;nbsp;&lt;/code&gt;&lt;/pre&gt;Tophat Output: [For larger datasets try splitting them into smaller files since tophat exits with some error hard to tell why it exited]&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Caveats:&lt;/b&gt; I work with oomycetes pathogens and the introns and exons in this pathogen could be really much smaller than what tophat defines as default.&lt;br /&gt;
&lt;br /&gt;
Running tophat may not work for everyone. For example with &lt;i&gt;P.sojae&lt;/i&gt;, tophat produced awful number of junctions(just 700) with default parameters. Tweaking the parameters only produced worse results. As a work around, we tried aligning these transcripts to the predicted gene models as well. Comparing genome vs predicted gene model alignment results, we found around 10% of the alignments were not translated into predicted models(55% matched with genomes where as 45% match with predicted transcripts)&lt;br /&gt;
&lt;br /&gt;
There is an option to try and get the sequences aligned with the genome to assemble into contigs. One easier method for this is to try &lt;span id="goog_1542283174"&gt;&lt;/span&gt;&lt;a href="http://draft.blogger.com/"&gt;ABySS&amp;nbsp;&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
Running ABySS is quite straight forward. Unpack the distribution and follow installation instructions. Once installed you could try running ABySS with different k mers. One small shell script for running different k mers&lt;br /&gt;
is as belows:&lt;br /&gt;
&lt;br /&gt;
export PATH=$PATH:/home/sutripa/samtools-0.1.11/&lt;br /&gt;
&lt;br /&gt;
for i in {20..40};&lt;br /&gt;
do&lt;br /&gt;
./ABYSS -k $i /data/bowtieOutput/PS-1_F3V1.bam.sorted.bam -o tmp-k$i.fa&lt;br /&gt;
done&lt;br /&gt;
[&lt;b&gt;NOTE:&lt;/b&gt; Here the input file for assembly is sorted bam files. The bam files are generated by running bowtie]&lt;br /&gt;
&lt;br /&gt;
Then check the files for their N50 values [See N50 section of the blog on how to calculate N50 values]&lt;br /&gt;
In case of P.sojae here is the N50 results:&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="https://lh4.googleusercontent.com/-9-I7vYu8RS8/TYgcqeneBJI/AAAAAAAABlE/xun6oYvrPCk/s1600/N50Psojae20-40kvalue.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="179" src="https://lh4.googleusercontent.com/-9-I7vYu8RS8/TYgcqeneBJI/AAAAAAAABlE/xun6oYvrPCk/s320/N50Psojae20-40kvalue.gif" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&amp;nbsp;N50 Vs Number of scaffolds having values larger than N50 for&lt;i&gt; P.sojae&lt;/i&gt;.&lt;br /&gt;
Here, the results are not very encouraging. So, you may try running with a bunch of different k values. I will discuss about running trans-abyss and cufflink in part - 2 of this series.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;&lt;code&gt; &lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&amp;nbsp;&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;pre&gt;&lt;code&gt;&amp;nbsp;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-9049489961778716010?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/x0r3G7SjjxdrriHS4qBb2qzgL8o/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/x0r3G7SjjxdrriHS4qBb2qzgL8o/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/x0r3G7SjjxdrriHS4qBb2qzgL8o/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/x0r3G7SjjxdrriHS4qBb2qzgL8o/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/9WZlI_AVU0w" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/9049489961778716010/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=9049489961778716010" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/9049489961778716010?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/9049489961778716010?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/9WZlI_AVU0w/analyzing-oomycetes-rnaseq-data-part-1.html" title="Analyzing oomycetes RNAseq data - Part 1" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_avzLfLUJNqM/TQ_QOQSY0_I/AAAAAAAABjQ/nV9E9VI2iXQ/s72-c/NextGenSequencingPresentation.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/03/analyzing-oomycetes-rnaseq-data-part-1.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUYHQHk7eSp7ImA9Wx9aEUo.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-4689633851875971395</id><published>2011-02-28T12:43:00.000-08:00</published><updated>2011-03-03T09:25:31.701-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-03T09:25:31.701-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Variants" /><category scheme="http://www.blogger.com/atom/ns#" term="Heritability" /><category scheme="http://www.blogger.com/atom/ns#" term="Imputation" /><title>Genomics: In search of rare human variants</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;This paper makes a very good reading. Besides that following are the few concepts that caught my imagination... &lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;Missing Heritability:&lt;/b&gt;&lt;br /&gt;
obesity, diabetes and cardiovascular disease — are known to have a  strong genetic component, their associated genomic variants detected  through GWAS cannot explain most of the experimentally identified  genetic effects found in affected families. Human geneticists call this  problem the 'missing heritability'. Missing heritability may be due to very rare variants rather than the common ones discovered by GWAS.&lt;br /&gt;
Since GWAS misses out most of the common variants and sequencing thousands of individuals is going to be rather very expansive, a new concept is now getting rounds that is called as &lt;b&gt;Imputation&lt;/b&gt;. Imputation is defined as 'inventing data' for some individuals depending on data available for other individuals[see the figure below].&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="https://lh5.googleusercontent.com/-Q3gnCooKIyg/TWwFEDlQVAI/AAAAAAAABkI/c9t1iPdJ6bs/s1600/imputation.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="320" src="https://lh5.googleusercontent.com/-Q3gnCooKIyg/TWwFEDlQVAI/AAAAAAAABkI/c9t1iPdJ6bs/s320/imputation.jpg" width="233" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp;Remember: IT HAS ALWAYS BEEN AND IT STILL IS AN RNA EARTH LIFE.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Ref&lt;/b&gt;&amp;nbsp; [Rasmus Nielsen&lt;br /&gt;
Nature Volume: 467,Pages:    1050–1051 Date published:    (28 October 2010)&lt;br /&gt;
DOI: doi:10.1038/4671050a]&lt;br /&gt;
A very good read... &lt;/div&gt;&lt;br /&gt;
&lt;a href="http://technorati.com/tag/Imputation + MissingHeritability" rel="tag"&gt;Imputation MissingHeritability&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-4689633851875971395?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/Kf5tI8NTTpGMGC2u4IrDrZxQzF8/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Kf5tI8NTTpGMGC2u4IrDrZxQzF8/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/Kf5tI8NTTpGMGC2u4IrDrZxQzF8/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Kf5tI8NTTpGMGC2u4IrDrZxQzF8/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/Fhu-cyJP0jk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/4689633851875971395/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=4689633851875971395" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4689633851875971395?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4689633851875971395?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/Fhu-cyJP0jk/genomics-in-search-of-rare-human.html" title="Genomics: In search of rare human variants" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://lh5.googleusercontent.com/-Q3gnCooKIyg/TWwFEDlQVAI/AAAAAAAABkI/c9t1iPdJ6bs/s72-c/imputation.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/02/genomics-in-search-of-rare-human.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUYCQ3k5eip7ImA9Wx9aEUo.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-7511640012131883506</id><published>2011-02-26T02:01:00.000-08:00</published><updated>2011-03-03T09:26:02.722-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-03T09:26:02.722-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Contig Assembly" /><category scheme="http://www.blogger.com/atom/ns#" term="N50" /><title>Calculating N50 of contig assembly file</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;N50 is most often used in draft genome assembly - can be defined as the largest entity E such that at least half of the total  size of the entities is contained in entities larger than E. For example  if we have a collection of contigs with sizes 7, 4, 3, 2, 2, 1, and 1  kb (total size = 20kbp), the N50 length is 4 because we can cover 10 kb  with contigs bigger than 4kb.&lt;br /&gt;
&lt;br /&gt;
Here is a small step by step protocol to calculate N50:&lt;br /&gt;
&lt;br /&gt;
1. Read Fasta file and calculate sequence length.&lt;br /&gt;
2. Sort length on reverse order.&lt;br /&gt;
3. Calculate Total size.&lt;br /&gt;
4. Calculate N50.&lt;br /&gt;
&lt;br /&gt;
## Read Fasta File and compute length ###&lt;br /&gt;
my $length;&lt;br /&gt;
my $totalLength; &lt;br /&gt;
my @arr;&lt;br /&gt;
while(&lt;fh&gt;){&lt;/fh&gt;&lt;br /&gt;
&amp;nbsp;&amp;nbsp; chomp; &lt;br /&gt;
&amp;nbsp;&amp;nbsp; if(/&amp;gt;/){&lt;br /&gt;
&amp;nbsp;&amp;nbsp; push (@arr, $length);&lt;br /&gt;
&amp;nbsp;&amp;nbsp; $totalLength += $length; &lt;br /&gt;
&amp;nbsp;&amp;nbsp; $length=0;&lt;br /&gt;
&amp;nbsp;&amp;nbsp; next;&lt;br /&gt;
&amp;nbsp; }&lt;br /&gt;
&amp;nbsp; $length += length($_);&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
close(FH);&lt;br /&gt;
&lt;br /&gt;
my @sort = sort {$b &amp;lt;=&amp;gt; $a} @arr;&lt;br /&gt;
my $n50; &lt;br /&gt;
foreach my $val(@sort){&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; $n50+=$val;&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if($n50 &amp;gt;= $totalLength/2){&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; print "N50 length is $n50 and N50 value is: $val\n";&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; last; &lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;br /&gt;
}&lt;br /&gt;
&amp;nbsp; &lt;br /&gt;
If each sequence length is given in fasta header then one can grep '&amp;gt;' inputfile &amp;gt; out and do proper substitution&lt;br /&gt;
to get only the values(see vim editor section of the blog).&lt;br /&gt;
Then do a&lt;br /&gt;
$ sort -g -r inputfile &amp;gt; out&lt;br /&gt;
$ awk '{sum+=$1}END{print "Total:", sum} out&amp;nbsp; # To calculate total&lt;br /&gt;
$ Total : Number&lt;br /&gt;
&lt;br /&gt;
# Then use second part of the perl subroutine to get the N50 value&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-7511640012131883506?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/mBwxish9kvxd964EC-obl5vE85M/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mBwxish9kvxd964EC-obl5vE85M/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/mBwxish9kvxd964EC-obl5vE85M/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mBwxish9kvxd964EC-obl5vE85M/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/sUz9_YdogSI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/7511640012131883506/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=7511640012131883506" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7511640012131883506?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7511640012131883506?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/sUz9_YdogSI/calculating-n50-of-contig-assembly-file.html" title="Calculating N50 of contig assembly file" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2011/02/calculating-n50-of-contig-assembly-file.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUYMRHg-eyp7ImA9Wx9aEUo.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-6531555532784245249</id><published>2010-11-29T15:03:00.000-08:00</published><updated>2011-03-03T09:26:25.653-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-03T09:26:25.653-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Bayesian Probability" /><title>Bayesian Probability theory and its application</title><content type="html">The two probability theories, that are considered as pillars of all other complicated probabilistic models are; Addition theory and Multiplication theory:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Addition Theory(OR Theory):&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;If two events are mutually exclusive, then the possibility of occurrence of event 1 or event 2 is the addition of two probabilities.&lt;/b&gt;&lt;br /&gt;
&lt;b&gt; &lt;/b&gt;Example: Possibility of having an ace or a joker from a pack of 54 cards. There are only 2 jokers and 4 aces. So, the probability is: 2/54 + 4/54 = 6/54 = 1/9.&lt;br /&gt;
&lt;br /&gt;
Addition theory is slightly modified where the events are not mutually exclusive. For example, having a diamond and a queen from a pack of 52 cards .Symbolically, it is represented as: P(A or B) = P(A) + P(B) - P(A and B). So, the result will be 13/52 + 4/52 - 1/52.(Reason: there are 13 diamonds, 4 queens and only one queen in 52 cards). The result therefore is: 16/52= 4/13.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Multiplication Theory: (AND Theory)&lt;/b&gt;&lt;br /&gt;
When two events A and B are mutually exclusive, then the possibility of occurrence of A and B is P(A) X P(B).&lt;br /&gt;
Example: Tossing two coins simultaneously to obtain two tails or two heads is 1/4 X 1/4 = 1/16.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Bayesian Probability:&lt;/b&gt;&lt;br /&gt;
Bayesian statistics or Bayesian probability is also called as&lt;b&gt; conditional probability&lt;/b&gt; where two different conditions are evaluated jointly. Common example is a loaded die in a casino where there are 99% of the dies are honest but 1% of the dies are loaded. Where with the loaded die, there is a possibility of getting a 6 is 50% as against a honest die, where getting a 6 occurs 1/6th of the time. So, what is the probability that one gets 3 consecutive sixes?&lt;br /&gt;
In theory Bayesian statistics can be applied where:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;   The &lt;a href="http://stattrek.com/Help/Glossary.aspx?Target=Sample_space"&gt;sample  space&lt;/a&gt; is    partitioned into a &lt;a href="http://stattrek.com/Help/Glossary.aspx?Target=Set"&gt;set&lt;/a&gt; of &lt;a href="http://stattrek.com/Help/Glossary.aspx?Target=Mutually_exclusive"&gt;    mutually exclusive&lt;/a&gt; events { A&lt;sub&gt;1&lt;/sub&gt;, A&lt;sub&gt;2&lt;/sub&gt;, . . . ,  A&lt;sub&gt;n&lt;/sub&gt;  }.  &lt;/li&gt;
&lt;li&gt;   Within the sample space, there exists an &lt;a href="http://stattrek.com/Help/Glossary.aspx?Target=Event"&gt;    event&lt;/a&gt;  B, for which P(B) &amp;gt; 0.  &lt;/li&gt;
&lt;li&gt;   The analytical goal is to compute a conditional probability of the  form: P( A&lt;sub&gt;k&lt;/sub&gt;  | B ).  &lt;/li&gt;
&lt;li&gt;   You know at least one of the two sets of probabilities described  below.   &lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;&lt;ul&gt;&lt;li&gt;     P( A&lt;sub&gt;k&lt;/sub&gt; ∩      B ) for each A&lt;sub&gt;k&lt;/sub&gt;    &lt;/li&gt;
&lt;li&gt;     P( A&lt;sub&gt;k&lt;/sub&gt; ) and P( B | A&lt;sub&gt;k&lt;/sub&gt; ) for each A&lt;sub&gt;k.&lt;/sub&gt;&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;So, in the above case, we would like to know what is the possibility(likelihood) that there are 3 consecutive sixes, if it were a loaded die.&lt;br /&gt;
P(D Loaded|3 sixes) =&lt;u&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; P(3 sixes in Loaded die) * P(loaded die) &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/u&gt;&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; P(3 sixes in Loaded die) * P(loaded die) + P(3 sixes in fair die) * P(Fair die)&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; = &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; &lt;u&gt;(0.5)&lt;sup&gt;3&lt;/sup&gt; * (0.01) &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/u&gt;&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; (0.5)&lt;sup&gt;3&lt;/sup&gt; * (0.01) + (1/6)&lt;sup&gt;3&lt;/sup&gt; * 0.99&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; = &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; 0.21&lt;br /&gt;
&lt;br /&gt;
Same can be applied to test, what is the possibility that there are 3 consecutive sixes in a fair die:&lt;br /&gt;
&lt;br /&gt;
(1/6)&lt;sup&gt;3&lt;/sup&gt; * 0.99/ [  (0.5)&lt;sup&gt;3&lt;/sup&gt; * (0.01) + (1/6)&lt;sup&gt;3&lt;/sup&gt; * 0.99 ] = 0.78&lt;br /&gt;
Meaning that there is a fair chance that you will get 3 sixes consecutively in a unloaded die than a loaded die.&lt;br /&gt;
&lt;br /&gt;
Same probability theory can be applied for testing a case where the weather prediction channels have predicted about a sunny or a rainy day in a given day. If it rains 5 times a whole year and the weather prediction channel predicted about a rainy day for a particular day and the accuracy of the weather channel is about 90%, then we can calculate the possibility of it is raining the given day is:&lt;br /&gt;
(5/365) * 0.9 / [ (5/365) * 0.9 + (360/365) * 0.1] = 0.111&lt;br /&gt;
&lt;br /&gt;
Bayesian statistics is often used with sequence analysis where we have a situation to judge whether a protein is extracellular or intracellular. These 2 situations are mutually exclusive i.e; A protein can either be extracellular or intracellular and having more number of cystein residues at certain location decide at a certain confidence level whether that is the case.&lt;br /&gt;
Another test case could be:&lt;br /&gt;
&lt;br /&gt;
A rare genetic disease is discovered. Although only one in a million&lt;br /&gt;
people carry it, you consider getting screened. You are told that the genetic&lt;br /&gt;
test is extremely good; it is 100% sensitive (it is always correct if&lt;br /&gt;
you have the disease) and 99.99% specific (it gives a false positive result&lt;br /&gt;
only 0.01 % of the time). Using Bayes' theorem, explain why you might&lt;br /&gt;
decide not to take the test.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Nature is a tinkerer and not an inventor. New sequences are adapted from pre-existing sequences rather than invented de novo [Jacob 19771].&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-6531555532784245249?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/luUveJjqjQPueUot298wqkIt8m8/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/luUveJjqjQPueUot298wqkIt8m8/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/luUveJjqjQPueUot298wqkIt8m8/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/luUveJjqjQPueUot298wqkIt8m8/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/tYBGNRFLoIM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/6531555532784245249/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=6531555532784245249" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/6531555532784245249?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/6531555532784245249?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/tYBGNRFLoIM/bayesian-probability-theory-and.html" title="Bayesian Probability theory and its application" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/11/bayesian-probability-theory-and.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUUFQX4-cCp7ImA9Wx9aEUo.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-3430462682730233128</id><published>2010-11-23T19:13:00.000-08:00</published><updated>2011-03-03T09:26:50.058-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-03T09:26:50.058-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Sequence Alignment" /><category scheme="http://www.blogger.com/atom/ns#" term="Dynamic Programming" /><title>Dynamic programming for sequence Alignment</title><content type="html">According to wikipedia:&lt;br /&gt;
In &lt;a href="http://en.wikipedia.org/wiki/Mathematics" title="Mathematics"&gt;mathematics&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Computer_science" title="Computer 
science"&gt;computer science&lt;/a&gt;, &lt;b&gt;dynamic programming&lt;/b&gt; is a method  for solving complex problems by breaking them down into simpler steps.  It is applicable to problems exhibiting the properties of &lt;a href="http://en.wikipedia.org/wiki/Overlapping_subproblem" title="Overlapping subproblem"&gt;overlapping subproblems&lt;/a&gt; which are  only slightly smaller&lt;sup class="reference" id="cite_ref-0"&gt;&lt;a href="http://en.wikipedia.org/wiki/Dynamic_programming#cite_note-0"&gt;&lt;span&gt;[&lt;/span&gt;1&lt;span&gt;]&lt;/span&gt;&lt;/a&gt;&lt;/sup&gt;  and &lt;a href="http://en.wikipedia.org/wiki/Optimal_substructure" title="Optimal substructure"&gt;optimal substructure&lt;/a&gt; (described below).  When applicable, the method takes far less time than naïve methods.&lt;br /&gt;
Top-down dynamic programming simply means storing the results of  certain calculations, which are later used again since the completed  calculation is a sub-problem of a larger calculation. Bottom-up dynamic  programming involves formulating a complex calculation as a &lt;a href="http://en.wikipedia.org/wiki/Recursion" title="Recursion"&gt;recursive&lt;/a&gt;  series of simpler calculations.&lt;br /&gt;
&lt;br /&gt;
In biological sequence analysis, dynamic programming is often used for sequence alignment using global sequence alignment(Needleman and Wunsch method).&lt;br /&gt;
Here I present a simplest interpretation of dynamic programming for aligning two sequences using global alignment(N&amp;amp;W method).&lt;br /&gt;
&lt;br /&gt;
We have sequence 1: G A A T T C A G T T A&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sequence 2: G G A T C G A&lt;br /&gt;
&amp;nbsp;Here;&lt;br /&gt;
Length(seq1) = 11 and Length(seq2) = 7; lets call them k and l&lt;br /&gt;
In order to align them globally using dynamic programming method, we have to do the following;&lt;br /&gt;
1. Build a matrix M of size (k+1) X (l+1), putting seq1 in columns and seq2 in columns&lt;br /&gt;
2. Initialize the matrix where M[0,0..k] and M[0..l,0] is initialized into zero[Fig -1].&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_avzLfLUJNqM/TOwpRbYV6FI/AAAAAAAABiM/Z7YkdRrRS70/s1600/initial.gif" /&gt;&amp;nbsp;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;Fig - 1&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;The rest of the rows and columns can be filled using the following mathematical notation:&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;pre&gt;&lt;b&gt;M&lt;sub&gt;i,j&lt;/sub&gt; = MAXIMUM[
     M&lt;sub&gt;i-1, j-1&lt;/sub&gt; + S&lt;sub&gt;i,j&lt;/sub&gt;&lt;/b&gt; (match/mismatch in the diagonal),
     &lt;b&gt;M&lt;sub&gt;i,j-1&lt;/sub&gt; + w &lt;/b&gt;(gap in sequence #1),
     &lt;b&gt;M&lt;sub&gt;i-1,j&lt;/sub&gt; + w &lt;/b&gt;(gap in sequence #2)&lt;b&gt;]&lt;/b&gt;&lt;/pre&gt;&lt;pre&gt;&lt;b&gt;where &lt;/b&gt;&lt;b&gt;&lt;b&gt;S&lt;sub&gt;i,j&lt;/sub&gt;&lt;/b&gt;&lt;/b&gt; is 1 for a match and 0 for a mismatch&lt;/pre&gt;&lt;pre&gt;w = 0
&lt;b&gt; &lt;/b&gt;&lt;/pre&gt;&lt;pre&gt;&lt;b&gt;Now, lets talk about filling M(1,1) = MAX[ {M(0,0) + S(1,1)},&lt;/b&gt;&lt;/pre&gt;&lt;pre&gt;&lt;b&gt;                                           {M(1,0) + W},&lt;/b&gt;&lt;/pre&gt;&lt;pre&gt;&lt;b&gt;                                           {M(0,1) + W} ] &lt;/b&gt;&lt;/pre&gt;&lt;pre&gt;= MAX[ {0 + 1}, {0 + 0}, {0+0} ]&lt;/pre&gt;&lt;pre&gt;= 1&lt;/pre&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_avzLfLUJNqM/TOxQRDw8kQI/AAAAAAAABiQ/spnO3fvn7Hk/s1600/fig1-1.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_avzLfLUJNqM/TOxQRDw8kQI/AAAAAAAABiQ/spnO3fvn7Hk/s1600/fig1-1.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;pre&gt;&lt;/pre&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;Going by this trend, M[1,2] is going to be: Max[{1+0},{0+0},{0+0} ] = 1&lt;/pre&gt;&lt;pre&gt;&lt;/pre&gt;&lt;pre&gt;So, we can start filling in the matrix this way:&lt;/pre&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_avzLfLUJNqM/TOxQ17AMPlI/AAAAAAAABiU/v7dErtpuHek/s1600/fig1-2.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/_avzLfLUJNqM/TOxQ17AMPlI/AAAAAAAABiU/v7dErtpuHek/s1600/fig1-2.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;pre&gt;&lt;/pre&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;From this, a common observation can be drawn:&lt;/pre&gt;&lt;pre&gt;1. Increment by one the value of a cell only when there is a perfect match to&amp;nbsp;&lt;/pre&gt;&lt;pre&gt;that of the previous diagonal.&lt;/pre&gt;&lt;pre&gt;2. If there is a mismatch always continue with the value from the previous column/row.&lt;/pre&gt;&lt;pre&gt;&lt;/pre&gt;&lt;pre&gt;Going by this, we can end up filling the matrix with the following values:&lt;/pre&gt;&lt;pre&gt;&lt;/pre&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_avzLfLUJNqM/TOxRunr1dcI/AAAAAAAABiY/uhYm3yA8L8Q/s1600/fig1-3.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_avzLfLUJNqM/TOxRunr1dcI/AAAAAAAABiY/uhYm3yA8L8Q/s1600/fig1-3.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_avzLfLUJNqM/TOxRxNjM7WI/AAAAAAAABic/yNY50GsLGWA/s1600/fig1-4.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_avzLfLUJNqM/TOxRxNjM7WI/AAAAAAAABic/yNY50GsLGWA/s1600/fig1-4.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_avzLfLUJNqM/TOxR04-vxYI/AAAAAAAABig/S2TRUJ9quto/s1600/fig1-5.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/_avzLfLUJNqM/TOxR04-vxYI/AAAAAAAABig/S2TRUJ9quto/s1600/fig1-5.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
Now the last phase in dynamic programming:&lt;br /&gt;
3) Tracing back&lt;br /&gt;
&lt;br /&gt;
Tracing back almost always begins with the highest score and looks to  the diagonal up or to the left(gap in sequence-1) or up (gap in seq -2) &lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_avzLfLUJNqM/TOxR3Cpi_jI/AAAAAAAABik/k9NIk9DJZys/s1600/fig1-6.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_avzLfLUJNqM/TOxR3Cpi_jI/AAAAAAAABik/k9NIk9DJZys/s1600/fig1-6.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_avzLfLUJNqM/TOx9skAOkoI/AAAAAAAABio/BpsukpRf4To/s1600/fig1-7.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_avzLfLUJNqM/TOx9skAOkoI/AAAAAAAABio/BpsukpRf4To/s1600/fig1-7.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_avzLfLUJNqM/TOx9uETa4MI/AAAAAAAABis/nBsGl_CvQQ8/s1600/fig1-8.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_avzLfLUJNqM/TOx9uETa4MI/AAAAAAAABis/nBsGl_CvQQ8/s1600/fig1-8.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_avzLfLUJNqM/TOx9wP1c9YI/AAAAAAAABiw/_H5imWUcVmE/s1600/fig1-9.gif" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_avzLfLUJNqM/TOx9wP1c9YI/AAAAAAAABiw/_H5imWUcVmE/s1600/fig1-9.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&amp;nbsp;This tracing gives an alignment:&lt;br /&gt;
&lt;br /&gt;
G A A T T C A G T T A&lt;br /&gt;
| &amp;nbsp; &amp;nbsp; &amp;nbsp; | &amp;nbsp; | &amp;nbsp; &amp;nbsp;&amp;nbsp; | &amp;nbsp; &amp;nbsp; &amp;nbsp; | &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; | &lt;br /&gt;
G G A T -&amp;nbsp; C -&amp;nbsp; G&amp;nbsp; - -&amp;nbsp; A&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_avzLfLUJNqM/TOx9x-ybSQI/AAAAAAAABi0/PkbTkl13TeM/s1600/fig1-10.gif" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_avzLfLUJNqM/TOx9x-ybSQI/AAAAAAAABi0/PkbTkl13TeM/s1600/fig1-10.gif" /&gt;&lt;/a&gt;&lt;/div&gt;&amp;nbsp;OR&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
G&amp;nbsp; - A A T T C A G T T A&lt;br /&gt;
| &amp;nbsp; &amp;nbsp; &amp;nbsp; |&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; | &amp;nbsp; | &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp; | &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; | &amp;nbsp; &amp;nbsp;&amp;nbsp; &lt;br /&gt;
G G A&amp;nbsp; -&amp;nbsp; - T C -&amp;nbsp; G&amp;nbsp; - -&amp;nbsp; A&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
For longer sequences large number of other combination is possible, but the dynamic programming outputs only one result.&lt;br /&gt;
&lt;br /&gt;
[Ref: Images adapted from &lt;a href="http://www.avatar.se/molbioinfo2001/dynprog/dynamic.html"&gt;Eric C. Rouchka&lt;/a&gt;'s web page]&lt;br /&gt;
&lt;br /&gt;
In real life examples, different penalty scores are given for a mismatch and for a gap[Every time, you go down the columns or on a row without a match should be given a penalty as a gap]&lt;br /&gt;
&lt;br /&gt;
&lt;pre&gt;&lt;/pre&gt;&lt;pre&gt;&lt;/pre&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-3430462682730233128?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/qXTJVHAO3Y91P2M9e0TgWF1YhRs/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/qXTJVHAO3Y91P2M9e0TgWF1YhRs/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/qXTJVHAO3Y91P2M9e0TgWF1YhRs/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/qXTJVHAO3Y91P2M9e0TgWF1YhRs/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/w90aEmNbouI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/3430462682730233128/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=3430462682730233128" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/3430462682730233128?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/3430462682730233128?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/w90aEmNbouI/dynamic-programming-for-sequence.html" title="Dynamic programming for sequence Alignment" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/_avzLfLUJNqM/TOwpRbYV6FI/AAAAAAAABiM/Z7YkdRrRS70/s72-c/initial.gif" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/11/dynamic-programming-for-sequence.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUUNR3g7fyp7ImA9Wx9aEUo.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-8147611835378416373</id><published>2010-11-05T11:05:00.000-07:00</published><updated>2011-03-03T09:28:16.607-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-03T09:28:16.607-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Inteins Exeins" /><category scheme="http://www.blogger.com/atom/ns#" term="Random DNA/Protein Sequence permutation" /><title>Generating random DNA/protein sequence permutation</title><content type="html">In general terminology, sequence permutation is defined as shuffling of the bases without disturbing the base composition. In other words, all the permuted sequence should have the same base composition.It is fairly easy to implement such permutations when we talk about single nucleotide composition; and Fisher and Yates 1938, algorithm is a good one for implementing it without introducing much sequence bias. However, in real life situation, it may be necessary to generate a random DNA sequence where there is a need to preserve the di-tri nucleotide frequencies in overlapping windows.Altschul  SF, Erickson  BW., have explained how to generate such a sequence while conserving the di-tri nucleotide composition using a&amp;nbsp; graph theory of generating random Eulerian walks on a  directed multigraph. [&lt;a href="http://mbe.oxfordjournals.org/content/2/6/526.long"&gt;Paper Here&lt;/a&gt;]&lt;br /&gt;
&lt;br /&gt;
R has a bioString object with DNAString function, that although does not implement Fisher Yates algorithm, but generates non-biased permuted DNA string. For dinucleotide/trinucleotide composition preservation also, this method can be used effectively as long as they don't overlap:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Example Code:&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&amp;gt;&lt;/b&gt;library(Biostrings)&lt;br /&gt;
&amp;gt;&amp;nbsp; x &amp;lt;- paste(sample(c("A", "C", "G", "T"), 1000, replace=TRUE,prob=c(0.2, 0.55&lt;br /&gt;
, 0.1, 0.15)), collapse="") &lt;br /&gt;
&amp;gt; x &amp;lt;- DNAString(x)&lt;br /&gt;
&amp;gt;x # will print x&lt;br /&gt;
&amp;gt; alphabetFrequency(x,baseOnly=TRUE)&amp;nbsp;&amp;nbsp; # Will print base composition in numbers&lt;br /&gt;
&lt;br /&gt;
In order to run it for di/tri nucleotide, just change the code slightly into:&lt;br /&gt;
&amp;gt;&amp;nbsp; x &amp;lt;- paste(sample(c("AT", "CC", "GT", "TA", "AA"), 1000,  replace=TRUE,prob=c(0.2, 0.55&lt;br /&gt;
, 0.1, 0.10,0.05)), collapse="")&amp;nbsp; &lt;br /&gt;
&amp;gt; x&amp;lt;-DNAString(x)&lt;br /&gt;
&amp;gt; x&lt;br /&gt;
&amp;nbsp; 2000-letter "DNAString" instance&lt;br /&gt;
seq: CCATCCCCCCCCTAATCCTATACCTATACCTACCGT...CCATATTATATACCTAATAACCCCCCTACCATCCAT&lt;br /&gt;
&lt;b&gt;Please NOTE: Here you will not get any other dinucleotide than specified IN FRAME&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
Same can be done for tri-tetra nucleotides as well.&lt;br /&gt;
&lt;br /&gt;
A very simple unix implementation of random sequence generator could be:&lt;br /&gt;
&lt;br /&gt;
$echo {a,t,g,c}{a,t,g,c}{a,t,g,c}{a,t,g,c} &lt;br /&gt;
This generates a cross product of a,t,g,c to a,t,g,c, and thus has all tetra nucleotides represented. Similar order can be generated for dinucleotides such as:&lt;br /&gt;
$echo {aa,tt,gg,cc}{at,tg,ga,ca}{aa,tc,ag,tc}{aa,at,cg,cc}&lt;br /&gt;
Enjoy generating permuted sequences.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-8147611835378416373?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/YF3yeFaipvxq_9f2eNEneDy9iLQ/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/YF3yeFaipvxq_9f2eNEneDy9iLQ/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/YF3yeFaipvxq_9f2eNEneDy9iLQ/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/YF3yeFaipvxq_9f2eNEneDy9iLQ/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/UTWHWUb3llk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/8147611835378416373/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=8147611835378416373" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/8147611835378416373?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/8147611835378416373?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/UTWHWUb3llk/generating-random-dnaprotein-sequence.html" title="Generating random DNA/protein sequence permutation" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/11/generating-random-dnaprotein-sequence.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUAGRncycCp7ImA9WhdXGU0.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-7987568817904487041</id><published>2010-11-03T18:57:00.000-07:00</published><updated>2011-09-01T12:02:07.998-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-01T12:02:07.998-07:00</app:edited><title>Some unix/perl oneliners for Bioinformatics</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
1&amp;nbsp;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;&amp;nbsp;&amp;nbsp; File format conversion/line counting/counting number of files etc.&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;1.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ wc –l &lt;filename&gt;&amp;nbsp; : count number of lines in a file.&lt;/filename&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;2.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ ls | wc –l&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; : count number of files in a directory.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;3.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ tac &lt;filename&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; : print the file in reverse order e.g; last line first, first line last.&lt;/filename&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;4.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ rev &lt;filename&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; : reverse the file in lines.&lt;/filename&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;5.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ sed 's/.$//' or sed 's/^M$//' or sed 's/\x0D$//' : converts a dos file into unix mode.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;6.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed "s/$/`echo -e \\\r`/" or sed 's/$/\r/' or sed "s/$//": converts a unix newline into a DOS newline.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;7.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ awk '1; { print "" }' : Double space a file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;8.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ awk '{ total = total + NF }; END { print total+0 }' : prints the number of words in a file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;9.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed '/^$/d' or [grep ‘.’] : Delete all blank lines in a file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;10.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed '/./,$!d' : Delete all blank lines in the beginning of the file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;11.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -e :a -e '/^\n*$/{$d;N;ba' -e '}': Delete all blank lines at the end of the file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;12.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -e :a -e 's/&amp;lt;[^&amp;gt;]*&amp;gt;//g;/&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;13.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed 's/^[ \t]*//' : deleting all leading white space tabs in a file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;14.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ sed 's/[ \t]*$//' : Delete all trailing white space and tab in a file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;15.&amp;nbsp;&amp;nbsp;&amp;nbsp; $ sed 's/^[ \t]*//;s/[ \t]*$//' : Delete both leading and trailing white space and tab in a file.&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;2.2&amp;nbsp;&amp;nbsp;&amp;nbsp; Working with Patterns/numbers in a sequence file&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;16.&amp;nbsp;&amp;nbsp;&amp;nbsp; $awk '/Pattern/ { n++ }; END { print n+0 }' : print the total number of lines containing the word pattern.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;17.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed 10q : print first 10 lines.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;18.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/regexp/p' : Print the line that matches the pattern.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;19.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed '/regexp/d' : Deletes the lines that matches the regexp.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;20.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/regexp/!p' : Print the lines that does not match the pattern.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;21.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed '/regexp/!d' : Deletes the lines that does NOT match the regular expression.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;22.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/^.\{65\}/p' : print lines that are longer than 65 characters.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;23.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/^.\{65\}/!p' : print lines that are lesser than 65 characters.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;24.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/regexp/{g;1!p;};h' : print one line before the pattern match.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;25.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/regexp/{n;p;}' : print one line after the pattern match.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;26.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/^.\{65\}/ {g;1!p;};h' &amp;lt; sojae_seq &amp;gt; tmp : print the names of the sequences that are larger than 65 nucleotide long.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;27.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '/regexp/,$p' : Print regular expression to the end of file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;28.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '8,12p' : print line 8 to 12(inclusive)&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;29.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed -n '52p' : print only line number 52.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;30.&amp;nbsp;&amp;nbsp;&amp;nbsp; $seq ‘/pattern1/,/pattern2/d’ &amp;lt; inputfile &amp;gt; outfile : will delete all the lines between pattern1 and pattern2.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;31.&amp;nbsp;&amp;nbsp;&amp;nbsp; $sed ‘/20,30/d’ &amp;lt; inputfile &amp;gt; outfile : will delete all lines between 20 and 30.&amp;nbsp;&amp;nbsp; OR sed ‘/20,30/d’ &amp;lt; input &amp;gt; output will delete lines between 20 and 30.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;32.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '/baz/ { gsub(/foo/, "bar") }; { print }' : Substitute foo with bar in lines that contains ‘baz’.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;33.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '!/baz/ { gsub(/foo/, "bar") }; { print }' : Substitute foo with bar in lines that does not contain ‘baz’.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;34.&amp;nbsp;&amp;nbsp;&amp;nbsp; grep –i –B 1 ‘pattern’ filename &amp;gt; out : Will print the name of the sequence and the sequence having the pattern in a case insensitive way(make sure the sequence name and the sequence each occupy a single line).&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;35.&amp;nbsp;&amp;nbsp;&amp;nbsp; grep –i –A 1 ‘seqname’ filename &amp;gt; out : will print the sequence name as well as the sequence into file ‘out’.&amp;nbsp; &lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;2.3&amp;nbsp;&amp;nbsp;&amp;nbsp; Inserting Data into a file:&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;36.&amp;nbsp;&amp;nbsp;&amp;nbsp; gawk --re-interval 'BEGIN{ while(a++&amp;lt;49) s=s "x" }; { sub(/^.{6}/,"&amp;amp;" s) }; 1' &lt;filename&gt; &amp;gt; fileout : will insert 49 ‘X’ in the sixth position of every line.&lt;/filename&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;37.&amp;nbsp;&amp;nbsp;&amp;nbsp; gawk --re-interval 'BEGIN{ s="YourName" }; { sub(/^.{6}/,"&amp;amp;" s) }; 1' : Insert your name at the 6 th position in every line.&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;3.&amp;nbsp;&amp;nbsp;&amp;nbsp; Working with Data Files[Tab delimited files]:&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;3.1&amp;nbsp;&amp;nbsp;&amp;nbsp; Error Checking and data handling:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;38.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '{ print NF ":" $0 } ' : print the number of fields of each line followed by the line.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;39.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '{ print $NF }' : print the last field of each line.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;40.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk 'NF &amp;gt; n' : print every line with more than ‘n’ fields.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;41.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '$NF &amp;gt; n' : print every line where the last field is greater than n.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;42.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '{ print $2, $1 }' &lt;file&gt; : prints just first 2 fields of a data file in reverse order.&lt;/file&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;43.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '{ temp = $1; $1 = $2; $2 = temp; print }' : prints all the fields in the correct order except the first 2 fields.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;44.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '{ for (i=NF; i&amp;gt;0; i--) printf("%s ", $i); printf ("\n") }' : prints all the fields in reverse order.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;45.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '{ $2 = ""; print }' : deletes the 2nd field in each line.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;46.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '$5 == "abc123"' : print each line where the 5th field is equal to ‘abc123’.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;47.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '$5 != "abc123"' : print each line where 5th field is NOT equal to abc123.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;48.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '$7&amp;nbsp; ~ /^[a-f]/' : Print each line whose 7th field matches the regular expression.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;49.&amp;nbsp;&amp;nbsp;&amp;nbsp; awk '$7 !~ /^[a-f]/' : print each line whose 7th field does NOT match the regular expression.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;50.&amp;nbsp;&amp;nbsp;&amp;nbsp; cut –f n1,n2,n3.. &lt;inputfile&gt; &amp;gt; output file : will cut n1,n2,n3 columns(fields) from input file and print the output in output file. If delimiter is other than TAB then give additional argument such as cut –d ‘,’ –f n1,n2.. inputfile &amp;gt; out&lt;/inputfile&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;51.&amp;nbsp;&amp;nbsp;&amp;nbsp; sort –n –k 2,2 –k 4,4 file &amp;gt; fileout : Will conduct a numerical sort of column 2, and then column 4. If –n is not specified, then, sort will do a lexicographical sort(of the ascii value).&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;4.&amp;nbsp;&amp;nbsp;&amp;nbsp; Miscellaneous:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;52.&amp;nbsp;&amp;nbsp;&amp;nbsp; uniq –u inputfile &amp;gt; out : will print only the uniq lines present in the sorted input file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;53.&amp;nbsp;&amp;nbsp;&amp;nbsp; uniq –d inputfile &amp;gt; out : will print only the lines that are in doubles from the sorted input file.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;54.&amp;nbsp;&amp;nbsp;&amp;nbsp; cat file1 file2 file3 … fileN &amp;gt; outfile : Will concatenate files back to back in outfile.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;55.&amp;nbsp;&amp;nbsp;&amp;nbsp; paste file1 file2 &amp;gt; outfile : will merge two files horizontally. This function is good for merging with same number of rows but different column width.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;56.&amp;nbsp;&amp;nbsp;&amp;nbsp; !&lt;pattern&gt;:p : will print the previous command run with the ‘pattern’ in it.&lt;/pattern&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;57.&amp;nbsp;&amp;nbsp;&amp;nbsp; !! : repeat the last command entered at the shell.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;58.&amp;nbsp;&amp;nbsp;&amp;nbsp; ~ : Go back to home directory&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;59.&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;echo {a,t,g,c}{a,t,g,c}{a,t,g,c}{a,t,g,c} : will generate all tetramers using ‘atgc’. If you want pentamers/hexamers etc. then just increase the number of bracketed entities.NOTE: This is not a efficient sequence shuffler. If you wish to generate longer sequences then use other means.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;60.&amp;nbsp;&amp;nbsp;&amp;nbsp; kill -HUP ` ps -aef | grep -i firefox | sort -k 2 -r | sed 1d | awk ' { print $2 } ' ` : Kills a hanging firefox process.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;61.&amp;nbsp;&amp;nbsp;&amp;nbsp; csplit -n 7 input.fasta '/&amp;gt;/' '{*}' : will split the file ‘input.fasta’ wherever it encounters delimiter ‘&amp;gt;’. The file names will appear as 7 digit long strings.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;62.&amp;nbsp;&amp;nbsp;&amp;nbsp; find . -name data.txt –print: finds and prints the path for file data.txt.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;Sample Script to make set operations on sequence files:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;63.&amp;nbsp;&amp;nbsp;&amp;nbsp; grep ‘&amp;gt;’ filenameA &amp;gt; list1&amp;nbsp; # Will list just the sequence names in a file names.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;grep ‘&amp;gt;’ filenameB &amp;gt; list2 # Will list names for file 2&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;cat list1 list2 &amp;gt; tmp # concatenates list1 and list2 into tmp&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;sort tmp &amp;gt; tmp1 # File sorted&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;uniq –u tmp1 &amp;gt; uniq&amp;nbsp;&amp;nbsp;&amp;nbsp; # AUB – A ∩ B (OR (A-B) U (B-A)) &lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;uniq –d tmp1 &amp;gt; double&amp;nbsp; # Is the intersection (A ∩ B)&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;cat uniq double &amp;gt; Union # AUB&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;cat list1 double &amp;gt; tmp&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;sort tmp | uniq –u &amp;gt; list1uniq # A - B&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;cat list2 double &amp;gt; tmp&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;sort tmp | uniq –u &amp;gt; list2uniq # B - A&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;PERL ONELINERS:&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;1.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe '$\="\n"' &lt;filename&gt;&amp;nbsp; : double space a file&lt;/filename&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;2.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe '$_ .= "\n" unless /^$/' : double space a file except blank lines&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;3.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe '$_.="\n"x7' : 7 space in a line.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;4.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -ne 'print unless /^$/' : remove all blank lines&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;5.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -lne 'print if length($_) &amp;lt; 20' : print all lines with length less than 20.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;6.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -00 -pe '' : If there are multiple spaces, delete all leaving one(make the file a single spaced file).&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;7.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -00 -pe '$_.="\n"x4' : Expand single blank lines into 4 consecutive blank lines&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;8.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe '$_ = "$. $_"': Number all lines in a file&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;9.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe '$_ = ++$a." $_" if /./' : Number only non-empty lines in a file&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;10.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -ne 'print ++$a." $_" if /./' : Number and print only non-empty lines in a file&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;11.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe '$_ = ++$a." $_" if /regex/' ; Number only lines that match a pattern&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;12.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -ne 'print ++$a." $_" if /regex/' : Number and print only lines that match a pattern&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;13.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -ne 'printf "%-5d %s", $., $_ if /regex/' : Left align lines with 5 white spaces if matches a pattern (perl -ne 'printf "%-5d %s", $., $_' : for all the lines)&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;14.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le 'print scalar(grep{/./}&amp;lt;&amp;gt;)' : prints the total number of non-empty lines in a file&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;15.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -lne '$a++ if /regex/; END {print $a+0}' : print the total number of lines that matches the pattern&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;16.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -alne 'print scalar @F' : print the total number fields(words) in each line.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;17.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -alne '$t += @F; END { print $t}' : Find total number of words in the file&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;18.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -alne 'map { /regex/ &amp;amp;&amp;amp; $t++ } @F; END { print $t }' : find total number of fields that match the pattern&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;19.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -lne '/regex/ &amp;amp;&amp;amp; $t++; END { print $t }' : Find total number of lines that match a pattern&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;20.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le '$n = 20; $m = 35; ($m,$n) = ($n,$m%$n) while $n; print $m' : will calculate the GCD of two numbers.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;21.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le '$a = $n = 20; $b = $m = 35; ($m,$n) = ($n,$m%$n) while $n; print $a*$b/$m' : will calculate lcd of 20 and 35.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;22.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le '$n=10; $min=5; $max=15; $, = " "; print map { int(rand($max-$min))+$min } 1..$n' : Generates 10 random numbers between 5 and 15.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;23.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le 'print map { ("a".."z",”0”..”9”)[rand 36] } 1..8': Generates a 8 character password from a to z and number 0 – 9.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;24.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le 'print map { ("a",”t”,”g”,”c”)[rand 4] } 1..20': Generates a 20 nucleotide long random residue.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;25.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le 'print "a"x50': generate a string of ‘x’ 50 character long&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;26.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le 'print join ", ", map { ord } split //, "hello world"': Will print the ascii value of the string hello world.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;27.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le '@ascii = (99, 111, 100, 105, 110, 103); print pack("C*", @ascii)': converts ascii values into character strings.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;28.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le '@odd = grep {$_ % 2 == 1} 1..100; print "@odd"': Generates an array of odd numbers.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;29.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -le '@even = grep {$_ % 2 == 0} 1..100; print "@even"': Generate an array of even numbers&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;30.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -lpe 'y/A-Za-z/N-ZA-Mn-za-m/' file: Convert the entire file into 13 characters offset(ROT13)&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;31.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -nle 'print uc' : Convert all text to uppercase:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;32.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -nle 'print lc' : Convert text to lowercase:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;33.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -nle 'print ucfirst lc' : Convert only first letter of first word to uppercas&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;34.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -ple 'y/A-Za-z/a-zA-Z/' : Convert upper case to lower case and vice versa&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;35.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -ple 's/(\w+)/\u$1/g' : Camel Casing&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;36.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe 's|\n|\r\n|' : Convert unix new lines into DOS new lines:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;37.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe 's|\r\n|\n|' : Convert DOS newlines into unix new line&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;38.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe 's|\n|\r|' : Convert unix newlines into MAC newlines:&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace;"&gt;39.&amp;nbsp;&amp;nbsp;&amp;nbsp; perl -pe '/regexp/ &amp;amp;&amp;amp; s/foo/bar/' : Substitute a foo with a bar in a line with a regexp.&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Some other Perl Tricks&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Want to display some progress bars while perl does your job:&lt;br /&gt;
&lt;br /&gt;
For this perl provides a nice utility called "pipe opens" ('perldoc -f open' will provide more info)&lt;br /&gt;
&lt;pre class="lang-perl prettyprint"&gt;&lt;code&gt;&lt;span class="pln"&gt;open&lt;/span&gt;&lt;span class="pun"&gt;(&lt;/span&gt;&lt;span class="kwd"&gt;my&lt;/span&gt;&lt;span class="pln"&gt; $file&lt;/span&gt;&lt;span class="pun"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="str"&gt;'-|'&lt;/span&gt;&lt;span class="pun"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="str"&gt;'command'&lt;/span&gt;&lt;span class="pun"&gt;,'option', 'option',&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun"&gt;...)&lt;/span&gt;&lt;span class="pln"&gt; or &lt;/span&gt;&lt;span class="kwd"&gt;die&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="str"&gt;"Could not run tar ... - $!"&lt;/span&gt;&lt;span class="pun"&gt;;&lt;/span&gt;&lt;span class="pln"&gt;
&amp;nbsp; &lt;/span&gt;&lt;span class="kwd"&gt;while&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun"&gt;(&amp;lt;&lt;/span&gt;&lt;span class="pln"&gt;$file&amp;gt;&lt;/span&gt;&lt;span class="pun"&gt;)&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun"&gt;{&lt;/span&gt;&lt;span class="pln"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span class="kwd"&gt;print&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="str"&gt;"-"&lt;/span&gt;&lt;span class="pun"&gt;;&lt;/span&gt;&lt;span class="pln"&gt;
&amp;nbsp; &lt;/span&gt;&lt;span class="pun"&gt;}&lt;/span&gt;&lt;span class="pln"&gt;
&amp;nbsp; &lt;/span&gt;&lt;span class="kwd"&gt;print&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="str"&gt;"\n"&lt;/span&gt;&lt;span class="pun"&gt;;&lt;/span&gt;&lt;span class="pln"&gt;
&amp;nbsp; close&lt;/span&gt;&lt;span class="pun"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;$file&lt;/span&gt;&lt;span class="pun"&gt;);&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;
&lt;pre class="lang-perl prettyprint"&gt;&lt;code&gt;&lt;span class="pun"&gt;&amp;nbsp;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;
&lt;pre class="lang-perl prettyprint"&gt;&lt;code&gt;&lt;span class="pun"&gt;Will print - on the screen till the process is completed &lt;/span&gt;&lt;span class="pln"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-7987568817904487041?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/qkByMxCyL5bmEejVkSUF6k20BMQ/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/qkByMxCyL5bmEejVkSUF6k20BMQ/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/qkByMxCyL5bmEejVkSUF6k20BMQ/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/qkByMxCyL5bmEejVkSUF6k20BMQ/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/sSOwlf4VuRo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/7987568817904487041/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=7987568817904487041" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7987568817904487041?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7987568817904487041?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/sSOwlf4VuRo/some-unixperl-oneliners-for.html" title="Some unix/perl oneliners for Bioinformatics" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/11/some-unixperl-oneliners-for.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0cMQXw9eCp7ImA9Wx5bEko.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-4292497891723319088</id><published>2010-10-27T16:40:00.000-07:00</published><updated>2010-10-28T06:04:40.260-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-10-28T06:04:40.260-07:00</app:edited><title>Protein Splicing : Inteins and Exteins</title><content type="html">Protein splicing with inteins(protein introns), exteins(protein exons) was discovered some 20 years ago. Not to mention, this process is efficient and autocatalytic where the intein excises itself from the primary protein product (precursor protein) and then catalyzes the joining of the broken ends forming 2 protein products: 1) The mature protein 2) Intein itself.&amp;nbsp; So, the protein fragments that are joined together to form the mature protein is called as exteins(Same as RNA exons), and inteins are the protein introns.&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_avzLfLUJNqM/TMi4MJB95eI/AAAAAAAABYg/5jOqiR31r7w/s1600/Slide1.JPG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://3.bp.blogspot.com/_avzLfLUJNqM/TMi4MJB95eI/AAAAAAAABYg/5jOqiR31r7w/s320/Slide1.JPG" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_avzLfLUJNqM/TMi4O1hKouI/AAAAAAAABYk/yAdGwpCv1vU/s1600/Slide2.JPG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://2.bp.blogspot.com/_avzLfLUJNqM/TMi4O1hKouI/AAAAAAAABYk/yAdGwpCv1vU/s320/Slide2.JPG" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_avzLfLUJNqM/TMi2UomrHiI/AAAAAAAABYc/yqxaEk1fshk/s1600/inteins.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;br /&gt;
&lt;/a&gt;&lt;/div&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Fig reference: &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed?term=%22Elleuche%20S%22%5BAuthor%5D"&gt;Elleuche  S&lt;/a&gt;, &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed?term=%22P%C3%B6ggeler%20S%22%5BAuthor%5D"&gt;Pöggeler  S&lt;/a&gt;. Applied Microbial Biotechnology,2010&lt;br /&gt;
&lt;br /&gt;
All these inteins have sequence similarity with homing nucleases. Homing nucleases are very interesting class of proteins that cleaves the DNA at a particular recognition site. The recognition site is long enough(12 - 40 bp, as against restriction enzymes that recognize 8 bp or less) to occur in a genome by chance. Usually the proteins encoding homing nucleases occur inside the DNA element that is the recognition site for cleavage by themselves. Thus preventing the cleavage of the DNA sequence that carries them, so they are a class of &lt;b&gt;selfish genes&lt;/b&gt;. It is very interesting how these homing nucleases propagate themselves to their non-allelic forms. The allele that has homing nuclease is called as HEG+ and the one not having it is called as HEG-. Usually the HEG+ alleles cleave the HEG- gene thus initiating homologous recombination DNA repair. Once repair is initiated, HEG+ is copied at the HEG- locus thus propagating it.&lt;br /&gt;
There are currently 4 structural domains of homing endonucleases:&lt;br /&gt;
1)&lt;b&gt;LAGLIDADG: If present alone, needs a homodimer to act against a DNA sequence.&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;2)&lt;/b&gt;&lt;b&gt;GIY-YIG: Acts as a monomer, occurs in the N terminus.&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;3)&lt;/b&gt;&lt;b&gt;His-Cys box: 2 histidines and 3 cysteins, acts as a monomer.&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;4)H-N-H: 2 pairs of histidines flanking one asparagine.&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;A list of intein integrated sites are listed here [&lt;a href="http://bioinformatics.weizmann.ac.il/%7Epietro/inteins/Intein_alleles.html"&gt;Link&lt;/a&gt;]&amp;nbsp;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;Intein database is [&lt;a href="http://www.neb.com/neb/inteins.html"&gt;Here&lt;/a&gt;]&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Reference:&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;http://www.ncbi.nlm.nih.gov/pmc/articles/PMC523631/pdf/nar00031-0013.pdf&amp;nbsp;&lt;/b&gt;&lt;b&gt; &lt;/b&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-4292497891723319088?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/OCU826AXsriPwNnY6JIGI2LcVqw/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/OCU826AXsriPwNnY6JIGI2LcVqw/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/OCU826AXsriPwNnY6JIGI2LcVqw/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/OCU826AXsriPwNnY6JIGI2LcVqw/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/QSOZ3Jzgmvk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/4292497891723319088/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=4292497891723319088" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4292497891723319088?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/4292497891723319088?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/QSOZ3Jzgmvk/protein-splicing-inteins-and-exteins.html" title="Protein Splicing : Inteins and Exteins" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/_avzLfLUJNqM/TMi4MJB95eI/AAAAAAAABYg/5jOqiR31r7w/s72-c/Slide1.JPG" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/10/protein-splicing-inteins-and-exteins.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkEHQ3o7fip7ImA9Wx5QGUg.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-1407768034126876468</id><published>2010-09-03T17:04:00.000-07:00</published><updated>2010-09-08T07:03:52.406-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-08T07:03:52.406-07:00</app:edited><title>Color Space to fastq</title><content type="html">As next generation sequencing is still evolving, so also myriad of tools that are built in and around these sequences . Applied Biosystems Dibase sequencing that uses ligation based chemistry ensures system accuracy and high throughputness.&lt;br /&gt;
&amp;nbsp;&lt;b&gt;What is SOLiD system?&lt;/b&gt;&lt;br /&gt;
SOLiD stands for "&lt;i&gt;Sequencing&lt;/i&gt; by Oligonucleotide Ligation and Detection". Due to its 2 base encoding system, it ensures greater accuracy e.g; 99.94% .&lt;br /&gt;
The decoding process is kind of tricky, since each color represents a dinucleotide. Altogether there are 4 colors i.e; 0,1,2,3 representing 4 bases A,C,G,T&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_avzLfLUJNqM/TIA7pDoehrI/AAAAAAAABW8/FLuaWHtWzeM/s1600/NextGenSequencingPresentation.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="240" src="http://1.bp.blogspot.com/_avzLfLUJNqM/TIA7pDoehrI/AAAAAAAABW8/FLuaWHtWzeM/s320/NextGenSequencingPresentation.png" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Color Decoding&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;So, if a csfasta file has the first base known, then the subsequent bases can be calculated using the decoding table as below:&lt;br /&gt;
&lt;br /&gt;
Example:&lt;br /&gt;
[0 -&amp;gt; AA, GG, CC, TT ; 1 -&amp;gt; CA, AC, TG, GT; 2-&amp;gt; GA, TC, AG, CT; 3 -&amp;gt; TA, GC, CG, AT]&lt;br /&gt;
&amp;gt;44_35_267_F3&lt;br /&gt;
T20220213203000111000122223221121222&lt;br /&gt;
&lt;br /&gt;
T2 -&amp;gt; TC (number 2 can be GA,TC,AG,CT: but only TC starts with T, so the first number is deciphered to 'TC')&lt;br /&gt;
0&amp;nbsp; -&amp;gt;&amp;nbsp; CC&lt;br /&gt;
2&amp;nbsp; -&amp;gt;&amp;nbsp; CT&lt;br /&gt;
2&amp;nbsp; -&amp;gt;&amp;nbsp; TC&lt;br /&gt;
0&amp;nbsp; -&amp;gt;&amp;nbsp; CC&lt;br /&gt;
2&amp;nbsp; -&amp;gt;&amp;nbsp; CT&lt;br /&gt;
1&amp;nbsp; -&amp;gt;&amp;nbsp; TG&lt;br /&gt;
3&amp;nbsp; -&amp;gt;&amp;nbsp; GC&lt;br /&gt;
2&amp;nbsp; -&amp;gt;&amp;nbsp; CT&lt;br /&gt;
0&amp;nbsp; -&amp;gt;&amp;nbsp; TT and so on...&lt;br /&gt;
&lt;br /&gt;
So, the colorspace translates into CCTCCTGCTT......&lt;br /&gt;
&lt;br /&gt;
Now how about an error? If the colorspace is represented by a number &amp;gt;=4 or a "." , how to decode the rest of the reads? I guess, in that case, we can designate the rest of the reads as 'N', this can be done especially because we generate abysmally large number of reads and ignoring some of them will not matter much.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Converting Quality Scores:&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Now, the next step is to convert the quality scores into sanger fastq format. Sanger Fastq standard was defined by Jim Mullikin, gradually disseminated, but never formally documented. The biggest drawback with the phred quality scores is that the need to separate numbers with space which increases storage space and numbers are often 2 digits numbers, which again adds up to the space issue.&lt;br /&gt;
&lt;br /&gt;
Phred value is calculated as:&lt;br /&gt;
&lt;br /&gt;
Qphred = -10 X log10(Pe), where P stands for probability score.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;From Phred to Sanger:&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Converting phred quality scores to Sanger Quality score is quite straight forward. Phred values 0 - 93 are represented by ASCII 33 - 126. 33 was used as a offset because ASCII 32 represents a white space.&lt;br /&gt;
The paper describing the details of sanger fastq and colorspace can be found &lt;a href="http://nar.oxfordjournals.org/cgi/content/full/gkp1137v1"&gt;here&lt;/a&gt; :&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
So, in order to convert Phred to Sanger in perl language;&lt;br /&gt;
&lt;br /&gt;
$q = chr(($Q&amp;lt;=93? $Q : 93) + 33);&lt;br /&gt;
The paper describing Fastq format can be found here&lt;br /&gt;
Now how to code the colorspace to nucleotide conversion:&lt;br /&gt;
# Generate hash [popular notation]&lt;br /&gt;
my @code = ([0,1,2,3],[1,0,3,2],[2,3,0,1],[3,2,1,0]);&lt;br /&gt;
my @bases = qw(A C G T);&lt;br /&gt;
my %decode = ();&lt;br /&gt;
foreach my $i(0..3)  {&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; foreach my $j(0..3)          {&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; $decode{$code[$i]-&amp;gt;[$j]} -&amp;gt; {$bases[$i]} = $bases[$j];&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
Here $decode hash has values like:&lt;br /&gt;
&lt;br /&gt;
$decode{0}-&amp;gt;A = A;&lt;br /&gt;
$decode{1}-&amp;gt;A = C;&lt;br /&gt;
$decode{2}-&amp;gt;;A =G ; &lt;br /&gt;
$decode{3}-&amp;gt;A =T ;  &lt;br /&gt;
$decode{1}-&amp;gt;C =A ; &lt;br /&gt;
$decode{0}-&amp;gt;C =C ;  &lt;br /&gt;
$decode{3}-&amp;gt;C =G ; &lt;br /&gt;
$decode{2}-&amp;gt;C =T ;  &lt;br /&gt;
$decode{2}-&amp;gt;G = A; &lt;br /&gt;
$decode{3}-&amp;gt;G =C ;  &lt;br /&gt;
$decode{0}-&amp;gt;G =G ; &lt;br /&gt;
$decode{1}-&amp;gt;G =T ;  &lt;br /&gt;
$decode{3}-&amp;gt;T =A ; &lt;br /&gt;
$decode{2}-&amp;gt;T =C ;  &lt;br /&gt;
$decode{0}-&amp;gt;T =G ; &lt;br /&gt;
$decode{1}-&amp;gt;T =T ;&lt;br /&gt;
sub decode{&lt;br /&gt;
my $str=shift;&lt;br /&gt;
my @arr = split($str,'');&lt;br /&gt;
my $seq='';&lt;br /&gt;
my $base='';&lt;br /&gt;
my $anchor = shift(@arr); # The first anchor tag&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; for(my $i=0;$i&amp;lt;@arr;$i++){&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; $base=$decode{$arr[$i]}-&amp;gt;{$anchor};&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; $seq .= $base;&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; $anchor = $base;&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;br /&gt;
return $seq;&lt;br /&gt;
&lt;br /&gt;
} # End of subroutine&lt;br /&gt;
[Modified version of the script can be found here]&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-1407768034126876468?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/NrdEm-xyv7z_S_YtBYj0Kljg4JY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/NrdEm-xyv7z_S_YtBYj0Kljg4JY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/NrdEm-xyv7z_S_YtBYj0Kljg4JY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/NrdEm-xyv7z_S_YtBYj0Kljg4JY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/c-2BIev6ZhM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/1407768034126876468/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=1407768034126876468" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/1407768034126876468?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/1407768034126876468?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/c-2BIev6ZhM/color-space-to-fastq.html" title="Color Space to fastq" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_avzLfLUJNqM/TIA7pDoehrI/AAAAAAAABW8/FLuaWHtWzeM/s72-c/NextGenSequencingPresentation.png" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/09/color-space-to-fastq.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkMCRH05eSp7ImA9Wx5QEUw.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-7247935460192138387</id><published>2010-08-27T14:28:00.000-07:00</published><updated>2010-08-29T14:47:45.321-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-29T14:47:45.321-07:00</app:edited><title>Plotting SAM output</title><content type="html">Output from Nextgeneration sequence alignment to genome assembly comes in SAM(Sequence Alignment Map) format. SAM files  can be large, so a binary format called BAM is used most often for ease  of handling. While full documentation on samtools can be found&lt;a href="http://samtools.sourceforge.net/samtools.shtml"&gt; here&lt;/a&gt; , documentation on SAM format can be found&lt;a href="http://samtools.sourceforge.net/SAM1.pdf"&gt; here&lt;/a&gt;. For quick reference, let me put the SAM alignment format here:&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
[qname][flag][rname][pos][mapq][cigar][mrnm][mpos][isize][seq][qual][tag][vtype]&lt;br /&gt;
&lt;b&gt;[qname]:&lt;/b&gt; Query Name&lt;br /&gt;
&lt;b&gt;[flag]:&lt;/b&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Is a bitwise operator represented in&amp;nbsp; decimal format, where the value when converted into binary should have the following meaning:&lt;br /&gt;
0x0001 the read is paired in sequencing, no matter whether it is mapped in a pair&lt;br /&gt;
0x0002 the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment) 1&lt;br /&gt;
0x0004 the query sequence itself is unmapped&lt;br /&gt;
0x0008 the mate is unmapped 1&lt;br /&gt;
0x0010 strand of the query (0 for forward; 1 for reverse strand)&lt;br /&gt;
0x0020 strand of the mate 1&lt;br /&gt;
0x0040 the read is the first read in a pair 1,2&lt;br /&gt;
0x0080 the read is the second read in a pair 1,2&lt;br /&gt;
0x0100 the alignment is not primary (a read having split hits may have multiple primary alignment records)&lt;br /&gt;
0x0200 the read fails platform/vendor quality checks&lt;br /&gt;
0x0400 the read is either a PCR duplicate or an optical duplicate&lt;br /&gt;
Where;&lt;br /&gt;
1. Flag 0x02, 0x08, 0x20, 0x40 and 0x80 are only meaningful when flag 0x01 is present.&lt;br /&gt;
2. If in a read pair the information on which read is the first in the pair is lost in the upstream analysis, flag 0x01 should be present and 0x40 and 0x80 are both zero.&lt;br /&gt;
Example: In our case, we mostly get 0 or 16 as the value, where 0 means(000000&lt;b&gt;0&lt;/b&gt;0000) forward strand and 16 means(000000&lt;b&gt;1&lt;/b&gt;0000) reverse strand. We are NOT concerned about rest of the bits because ours is not a paired end alignment. &lt;br /&gt;
CIGAR FORMAT:&lt;br /&gt;
M Alignment match (can be a sequence match or mismatch)&lt;br /&gt;
I Insertion to the reference&lt;br /&gt;
D Deletion from the reference&lt;br /&gt;
N Skipped region from the reference&lt;br /&gt;
S Soft clip on the read (clipped sequence present in &lt;seq&gt;)&lt;br /&gt;
H Hard clip on the read (clipped sequence NOT present in &lt;seq&gt;)&lt;br /&gt;
P Padding (silent deletion from the padded reference sequence)&lt;br /&gt;
Lets not discuss about the other fields &lt;br /&gt;
Samtools view command:&lt;br /&gt;
samtools view&amp;nbsp; $DATAFILE/sorted.bam super_0:1000-30000 | cut -f 2,3,4,10 &amp;gt; tmp2&lt;br /&gt;
[One thing to remember here is the sorted bam files need to be indexed before using this command. So, in other words keep the index files(sorted.bam.bai ) in the same directory.&lt;br /&gt;
super_0 25699&amp;nbsp;&amp;nbsp; ATTTAAACTAAGCTACGCTTCCTCACATACACGCGTACACGTGTAAGC&amp;nbsp;&lt;/seq&gt;&lt;/seq&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;OR&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;If you want to see it in human readable format use '-X' after 'samtools view'&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;The output could be:&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; super_0 35110&amp;nbsp;&amp;nbsp; CGGTTGCTAGCGTTAGTGCTGAGGAAACCCTTTAGATCGTAATCCAGT&lt;br /&gt;
r&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; super_0 41561&amp;nbsp;&amp;nbsp; TGTCGTGTGTACTGAGAAACTTGTATGATGTCTGAATTCTTCAGGCTG&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&lt;br /&gt;
&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;Where the first line means the forward strand and the second line means the reverse strand&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&lt;br /&gt;
Lets now try to display this information on the browser:&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;my $rowht=0.005; # Or change it according to your need&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;my $rowwidth = 0.005;&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;my $leftedge = #define here;&amp;nbsp;&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;my $leftheight=#define here;&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&lt;br /&gt;
&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;for my $i(0 .. $#points){&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; # Positive strand make it blue &lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if($points[0] == 0){&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; $image-&amp;gt;filledRectangle(($points[2]-$leftedge), $leftheight,($points[2] - $leftedge + $rowwidth), ($leftheight+$rowht), $color1);&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; }&amp;nbsp;&amp;nbsp;&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; els&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;if($points[0] == 16){&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&amp;nbsp;  &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; $image-&amp;gt;filledRectangle(($points[2]-$leftedge),  $leftheight,($points[2] - $leftedge + $rowwidth), ($leftheight+$rowht),  $color2);&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&amp;nbsp;  &amp;nbsp; &amp;nbsp;&amp;nbsp; }&lt;br /&gt;
} &lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&lt;br /&gt;
&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;print FH $image-&amp;gt;png;&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&lt;br /&gt;
&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;br /&gt;
&lt;qname&gt;&lt;flag&gt;&lt;rname&gt;&lt;pos&gt;&lt;mapq&gt;&lt;cigar&gt;&lt;mrnm&gt;&lt;mpos&gt;&lt;isize&gt;&lt;seq&gt;&lt;qual&gt;&lt;tag&gt;&lt;vtype&gt;&lt;value&gt;&lt;br /&gt;
&lt;/value&gt;&lt;/vtype&gt;&lt;/tag&gt;&lt;/qual&gt;&lt;/seq&gt;&lt;/isize&gt;&lt;/mpos&gt;&lt;/mrnm&gt;&lt;/cigar&gt;&lt;/mapq&gt;&lt;/pos&gt;&lt;/rname&gt;&lt;/flag&gt;&lt;/qname&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-7247935460192138387?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/tc4DWMkgl0PqQJ-Rux30BQQuZkU/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tc4DWMkgl0PqQJ-Rux30BQQuZkU/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/tc4DWMkgl0PqQJ-Rux30BQQuZkU/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tc4DWMkgl0PqQJ-Rux30BQQuZkU/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/Nd2I1aNjZV8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/7247935460192138387/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=7247935460192138387" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7247935460192138387?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/7247935460192138387?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/Nd2I1aNjZV8/plotting-sam-output.html" title="Plotting SAM output" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/08/plotting-sam-output.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0YARns6fip7ImA9WxFUEEU.&quot;"><id>tag:blogger.com,1999:blog-28219072.post-5552104236732287818</id><published>2010-06-20T19:59:00.000-07:00</published><updated>2010-06-20T19:59:07.516-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-06-20T19:59:07.516-07:00</app:edited><title>Pneumococcal Fratricides</title><content type="html">&amp;nbsp;Excerpts from &lt;em&gt;&lt;strong&gt;&lt;span style="font-size: 1.1em;"&gt;César Sánchez's blog&lt;/span&gt;&lt;/strong&gt;&lt;/em&gt;&lt;br /&gt;
Some bacteria produce substances that kill surrounding microbes, and use the resulting dead bodies as a source of nutrients. Sometimes, killer and victim belong to the same species, or even they are siblings. In these cases, researchers speak of &lt;a href="http://dx.doi.org/10.1038/nrmicro1613"&gt;cannibalism or fratricide&lt;/a&gt;; although if you view microbial populations as coordinated, multicellular entities, then you may prefer to use the term &lt;a href="http://dx.plos.org/10.1371/journal.pgen.0020135"&gt;programmed cell death&lt;/a&gt;.&lt;br /&gt;
Among pneumococci, some cells in a population become &lt;a href="http://en.wikipedia.org/wiki/Competence_%28biology%29"&gt;competent&lt;/a&gt; in response to certain signals; which means that they are able to take up DNA from their surroundings, and incorporate this genetic information into their own chromosome. This way, competent cells can acquire new inheritable abilities—such as production of a new capsule type, or resistance to an antibiotic—that can be very important for their survival. (This was the underlying mechanism in the famous &lt;a href="http://en.wikipedia.org/wiki/Avery%E2%80%93MacLeod%E2%80%93McCarty_experiment"&gt;Avery-MacLeod-McCarty experiment&lt;/a&gt; that helped identify DNA as the hereditary material in cells.)&lt;br /&gt;
But competent pneumococci do something else: they encourage non-competent siblings and other closely-related bacteria to commit suicide. They do this by releasing a particular lytic enzyme, called CbpD, that diffuses through the milieu and—somehow—activates LytC and other lytic enzymes that are already present in the non-competent siblings. Cell wall weakening finally results in a big bang: that is, the explosion of the non-competent pneumococci. The materials released serve not only as nutrients and sources of genetic information (DNA), but also as virulence factors that help competent cells to survive in their human host.&lt;br /&gt;
&lt;div style="float: right; margin: 10px 0px 5px 25px;"&gt;&lt;a href="http://schaechter.asmblog.org/.a/6a00d8341c5e1453ef0134828845a5970c-popup" onclick="window.open( this.href, '_blank', 'width=640,height=480,scrollbars=no,resizable=no,toolbar=no,directories=no,location=no,menubar=no,status=no,left=0,top=0' ); return false"&gt;&lt;img alt="Lytc" at-xid-6a00d8341c5e1453ef0134828845a5970c="" src="http://schaechter.asmblog.org/.a/6a00d8341c5e1453ef0134828845a5970c-200wi" style="width: 190px;" /&gt;&lt;/a&gt;  &lt;span style="font-family: times,'Times New Roman',serif; font-size: 1em; line-height: 1.2;"&gt;&lt;br /&gt;
&lt;br /&gt;
The structure of the pneumococcal&lt;br /&gt;
autolysin, LytC. &lt;a href="http://www.xtal.iqfr.csic.es/projects/bio-cryst/bio-crystallography.html"&gt;Source&lt;/a&gt;.&lt;/span&gt;&lt;/div&gt;&lt;div style="padding-top: 2em;"&gt;The &lt;a href="http://www.nature.com/nsmb/journal/v17/n5/abs/nsmb.1817.html"&gt;3D structure of LytC&lt;/a&gt; now provides the clues to explain the enzyme's peculiar behaviour during pneumococcal fratricide. Have a look at the model of LytC on the right: ain't it a beauty? A substrate-binding module (in blue and green in the image) recognizes and binds the cell wall peptidoglycan, whereas a catalytic module (in red) is responsible for breaking a specific linkage in the substrate. Because of the unusual hook shape of the protein, the substrate-binding module and the catalytic module partially block each other. As a result, LytC cannot bind the highly cross-linked peptidoglycan that is predominant under normal circumstances. Only when CbpD or other lytic enzymes cut specific linkages in the cell wall, LytC is able to bind the 'loosened' peptidoglycan and comes into action—with deleterious consequences for the non-competent pneumococci.&lt;/div&gt;&lt;object height="385" width="640"&gt;&lt;param name="movie" value="http://www.youtube.com/v/meTjfMA3ToU&amp;amp;border=1&amp;amp;color1=0xb1b1b1&amp;amp;color2=0xd0d0d0&amp;amp;hl=en_GB&amp;amp;feature=player_embedded&amp;amp;fs=1"&gt;&lt;/param&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;/param&gt;&lt;param name="allowScriptAccess" value="always"&gt;&lt;/param&gt;&lt;embed src="http://www.youtube.com/v/meTjfMA3ToU&amp;amp;border=1&amp;amp;color1=0xb1b1b1&amp;amp;color2=0xd0d0d0&amp;amp;hl=en_GB&amp;amp;feature=player_embedded&amp;amp;fs=1" type="application/x-shockwave-flash" allowfullscreen="true" allowScriptAccess="always" width="640" height="385"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/28219072-5552104236732287818?l=genomics-array.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/5VgUuITLDSWl7d8fuEFp5peZ8X8/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/5VgUuITLDSWl7d8fuEFp5peZ8X8/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/5VgUuITLDSWl7d8fuEFp5peZ8X8/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/5VgUuITLDSWl7d8fuEFp5peZ8X8/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/BUfNt/~4/nBTqdMuPLOU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://genomics-array.blogspot.com/feeds/5552104236732287818/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=28219072&amp;postID=5552104236732287818" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/5552104236732287818?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/28219072/posts/default/5552104236732287818?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/BUfNt/~3/nBTqdMuPLOU/pneumococcal-fratricides.html" title="Pneumococcal Fratricides" /><author><name>Sucheta</name><uri>http://www.blogger.com/profile/17433426304045795341</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="24" height="32" src="http://3.bp.blogspot.com/_avzLfLUJNqM/S5Vb5iddlnI/AAAAAAAABQE/hNiZxDAYufY/S220/facebook.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://genomics-array.blogspot.com/2010/06/pneumococcal-fratricides.html</feedburner:origLink></entry></feed>

