<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:blogger='http://schemas.google.com/blogger/2008' xmlns:georss='http://www.georss.org/georss' xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-14688252</id><updated>2025-05-25T13:44:41.573+02:00</updated><category term="bioinformatics"/><category term="java"/><category term="code"/><category term="xml"/><category term="ncbi"/><category term="xslt"/><category term="ngs"/><category term="rdf"/><category term="api"/><category term="pubmed"/><category term="source"/><category term="c"/><category term="database"/><category term="javascript"/><category term="snp"/><category term="wikipedia"/><category term="science"/><category term="svg"/><category term="sequence"/><category term="json"/><category term="semantic web"/><category term="tool"/><category term="ucsc"/><category term="parsing"/><category term="visualization"/><category term="network"/><category term="vcf"/><category term="genetics"/><category term="genomics"/><category term="c++"/><category term="history"/><category term="algorithm"/><category term="make"/><category term="protein"/><category term="social"/><category term="google"/><category term="mediawiki"/><category term="mysql"/><category term="drawing"/><category term="graph"/><category term="annotation"/><category term="berkeleydb"/><category term="html"/><category term="mozilla"/><category term="programming"/><category term="wiki"/><category term="extension"/><category term="firefox"/><category term="publication"/><category term="art"/><category term="biology"/><category term="blast"/><category term="generator"/><category term="library"/><category term="ontology"/><category term="tutorial"/><category term="foaf"/><category term="format"/><category term="graphics"/><category term="linux"/><category term="script"/><category term="server"/><category term="sql"/><category term="webservices"/><category term="apache"/><category term="go"/><category term="makefile"/><category term="parser"/><category term="samtools"/><category term="tips"/><category term="twitter"/><category term="web2.0"/><category term="article"/><category term="blog"/><category term="freebase"/><category term="job"/><category term="journal"/><category term="nature"/><category term="scientists"/><category term="sparql"/><category term="sqlite"/><category term="bam"/><category term="chart"/><category term="dot"/><category term="flex"/><category term="graphviz"/><category term="jena"/><category term="lsid"/><category term="paper"/><category term="pdf"/><category term="rest"/><category term="timeline"/><category term="tree"/><category term="video"/><category term="wsdl"/><category term="canvas"/><category term="community"/><category term="geneontology"/><category term="gis"/><category term="ibm"/><category term="interaction"/><category term="mongodb"/><category term="presentation"/><category term="r"/><category term="rss"/><category term="sax"/><category term="search"/><category term="soap"/><category term="time"/><category term="velocity"/><category term="workflow"/><category term="xhtml"/><category term="xul"/><category term="BioHackathon"/><category term="browser"/><category term="citation"/><category term="css"/><category term="gatk"/><category term="networking"/><category term="open"/><category term="toolbox"/><category term="xsd"/><category term="bwa"/><category term="chemistry"/><category term="darwin"/><category term="das"/><category term="diagram"/><category term="engine"/><category term="ensembl"/><category term="evolution"/><category term="feeds"/><category term="filter"/><category term="fun"/><category term="hadoop"/><category term="inkscape"/><category term="kml"/><category term="knime"/><category term="knowledge"/><category term="pipeline"/><category term="spreadsheet"/><category term="stax"/><category term="systems biology"/><category term="taxonomy"/><category term="treemap"/><category term="variation"/><category term="3D"/><category term="ajax"/><category term="animation"/><category term="arq"/><category term="ceph"/><category term="cgi"/><category term="connotea"/><category term="curl"/><category term="data"/><category term="dbpedia"/><category term="designpatterns"/><category term="disease"/><category term="erlang"/><category term="flash"/><category term="france"/><category term="friendfeed"/><category term="geek"/><category term="gene"/><category term="git"/><category term="hapmap"/><category term="javacc"/><category term="jni"/><category term="jsp"/><category term="linkage"/><category term="mail"/><category term="map"/><category term="mapreduce"/><category term="node.js"/><category term="nosql"/><category term="operon"/><category term="owl"/><category term="pedigree"/><category term="php"/><category term="picard"/><category term="postscript"/><category term="python"/><category term="scifoo"/><category term="taverna"/><category term="tomcat"/><category term="udf"/><category term="xsl"/><category term="access"/><category term="administration"/><category term="adobe"/><category term="ant"/><category term="applet"/><category term="asn1"/><category term="bioformatics"/><category term="biogang"/><category term="chemoinformatics"/><category term="cluster"/><category term="dia"/><category term="education"/><category term="family"/><category term="file"/><category term="flickr"/><category term="fop"/><category term="form"/><category term="framework"/><category term="genealogy"/><category term="greasemonkey"/><category term="gui"/><category term="gwt"/><category term="htsjdk"/><category term="impact"/><category term="indexing"/><category term="information"/><category term="japan"/><category term="javafx"/><category term="jaxws"/><category term="jquery"/><category term="jxb"/><category term="keyvalue"/><category term="mesh"/><category term="nar"/><category term="node"/><category term="oreilly"/><category term="perl"/><category term="solr"/><category term="structured"/><category term="ubiquity"/><category term="xquery"/><category term="xsl-fo"/><category term="abstract"/><category term="ancestors"/><category term="aop"/><category term="avro"/><category term="batch"/><category term="batik"/><category term="bees"/><category term="biomoby"/><category term="bionformatics"/><category term="bison"/><category term="boinc"/><category term="boinformatics"/><category term="book;system"/><category term="boston"/><category term="collaboration"/><category term="complexity"/><category term="compression"/><category term="cookie"/><category term="couchdb"/><category term="curses"/><category term="custom"/><category term="cxf"/><category term="del.icio.us"/><category term="doc"/><category term="docker"/><category term="documentation"/><category term="doi"/><category term="drools"/><category term="ebi"/><category term="eclipse"/><category term="ecology"/><category term="elixir"/><category term="emf"/><category term="factor"/><category term="fasta"/><category term="fastq"/><category term="flex2"/><category term="folding"/><category term="formatdb"/><category term="french"/><category term="fuse"/><category term="ga4gh"/><category term="galaxy"/><category term="gdd07"/><category term="gdd07fr"/><category term="gears"/><category term="gedcom"/><category term="geni"/><category term="glassfish"/><category term="grid"/><category term="hdf5"/><category term="health"/><category term="hibernate"/><category term="hts"/><category term="ibd"/><category term="illumina"/><category term="internet"/><category term="interview"/><category term="jjtree"/><category term="latex"/><category term="lims"/><category term="lucene"/><category term="lzw"/><category term="mathematics"/><category term="meta"/><category term="mircorarray"/><category term="mmbean"/><category term="modeling"/><category term="molecular biology"/><category term="multithread prime"/><category term="nci"/><category term="nnb"/><category term="nodejs"/><category term="oauth"/><category term="openmpi"/><category term="orcid"/><category term="pcr"/><category term="pig"/><category term="pnas"/><category term="post"/><category term="processing"/><category term="protocols"/><category term="publlicaton"/><category term="pumed"/><category term="quotes"/><category term="reasoner"/><category term="registry"/><category term="report"/><category term="rmi"/><category term="rnaseq"/><category term="rosetta"/><category term="rstats"/><category term="ruby"/><category term="sam"/><category term="sequencing"/><category term="seti"/><category term="sge"/><category term="simile"/><category term="sketch"/><category term="slide"/><category term="slidy"/><category term="slri"/><category term="slurm"/><category term="sping"/><category term="spring"/><category term="swissprot"/><category term="systems"/><category term="thesis"/><category term="uniprot"/><category term="university"/><category term="validation"/><category term="wdl"/><category term="web desktop"/><category term="wereallgoingtodie"/><category term="xforms"/><title type='text'>YOKOFAKUN</title><subtitle type='html'></subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default?alt=atom'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default?alt=atom&amp;start-index=26&amp;max-results=25'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>578</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-14688252.post-7351561807092209370</id><published>2017-01-15T13:17:00.000+01:00</published><updated>2017-01-15T13:17:30.130+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="gatk"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="source"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><title type='text'>Creating a custom GATK Walker (GATK 3.6) : my notebook</title><summary type="text">
This is my notebook for creating a custom engine in GATK.
Description
I want to read a VCF file and to get a table of  category/count. Something like this:


HAVE_ID
TYPE
COUNT



YES
SNP
123


NO
SNP
3


NO
INDEL
13


Class Category
I create a class Category describing each row in the table. It&#39;s just a List of Strings
static class Category
        implements Comparable&amp;lt;Category&amp;gt;
        </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/7351561807092209370/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/7351561807092209370' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7351561807092209370'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7351561807092209370'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2017/01/creating-custom-gatk-walker-gatk-36-my.html' title='Creating a custom GATK Walker (GATK 3.6) : my notebook'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-7863787922045801396</id><published>2016-10-27T12:40:00.000+02:00</published><updated>2016-10-27T12:40:06.642+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="wdl"/><category scheme="http://www.blogger.com/atom/ns#" term="workflow"/><title type='text'>Hello WDL ( Workflow Description Language )</title><summary type="text">This is a quick note about my first  WDL workflow (Workflow Description Language) https://software.broadinstitute.org/wdl/.

As a Makefile, my workflow would be the following one:

NAME?=world
$(NAME)_sed.txt : $(NAME).txt
 sed &#39;s/Hello/Goodbye/&#39; $&amp;lt; &amp;gt; $@
$(NAME).txt:
 echo &quot;Hello $(NAME)&quot; &amp;gt; $@
Executed as:$ make NAME=WORLD

echo &quot;Hello WORLD&quot; &gt; WORLD.txt
sed &#39;s/Hello/Goodbye/&#39; WORLD.txt </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/7863787922045801396/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/7863787922045801396' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7863787922045801396'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7863787922045801396'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/10/hello-wdl-workflow-description-language.html' title='Hello WDL ( Workflow Description Language )'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-4844411536179180925</id><published>2016-09-22T11:10:00.000+02:00</published><updated>2016-09-22T11:10:46.526+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="gatk"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="sequence"/><title type='text'>Writing a Custom ReadFilter for the GATK, my notebook.</title><summary type="text">
The GATK contains a set of predefined read filters that &quot;filter or transfer incoming SAM/BAM data files&quot;:BadCigar
BadMate
CountingRead
DuplicateRead
FailsVendorQualityCheck
LibraryRead
MalformedRead
MappingQuality
MappingQualityUnavailable
(...)
With the help of the modular architecture of the GATK, it&#39;s possible to write a custom ReadFilter. In this post I&#39;ll write a ReadFilter that removes the</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/4844411536179180925/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/4844411536179180925' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/4844411536179180925'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/4844411536179180925'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/09/writing-custom-readfilter-for-gatk-my.html' title='Writing a Custom ReadFilter for the GATK, my notebook.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-6869637378863093341</id><published>2016-09-09T16:18:00.001+02:00</published><updated>2016-09-09T16:18:31.394+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="blast"/><category scheme="http://www.blogger.com/atom/ns#" term="bwa"/><category scheme="http://www.blogger.com/atom/ns#" term="hts"/><category scheme="http://www.blogger.com/atom/ns#" term="ncbi"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><title type='text'>Playing with #magicblast, the #NCBI Short read mapper. My notebook</title><summary type="text">NCBI MAGIC Blast was recently mentioned by BioMickWatson on twitter.
Looks pretty cool.  Perhaps once again the answer to all bfx questions will be BLAST RE https://t.co/4D5e9QQnrb pic.twitter.com/bwW3y0yl2n- Mick Watson (@BioMickWatson) September 9, 2016

Here, I&#39;ll be playing with magicblast  and I&#39;ll compare its output with bwa (Makefile below).

First, here is an extract of the manual for </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/6869637378863093341/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/6869637378863093341' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6869637378863093341'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6869637378863093341'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/09/playing-with-magicblast-ncbi-short-read.html' title='Playing with #magicblast, the #NCBI Short read mapper. My notebook'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-3987954556411274168</id><published>2016-05-27T15:30:00.000+02:00</published><updated>2016-05-27T15:30:44.179+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="javascript"/><category scheme="http://www.blogger.com/atom/ns#" term="journal"/><category scheme="http://www.blogger.com/atom/ns#" term="pubmed"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><title type='text'>pubmed: extracting the 1st authors&#39; gender and location who published in the Bioinformatics journal.</title><summary type="text">In this post I&#39;ll get some statistics about the 1st authors in the &quot;Bioinformatics&quot; journal from pubmed. I&#39;ll extract their genders and locations.
I&#39;ll use some tools I&#39;ve already described some years ago but I&#39;ve re-written them.
Downloading the dataTo download the paper published in Bioinformatics, the pubmed/entrez query is &#39;&quot;Bioinformatics&quot;[jour]&#39;.
I use pubmeddump to download all those </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/3987954556411274168/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/3987954556411274168' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3987954556411274168'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3987954556411274168'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/05/pubmed-extracting-1st-authors-gender.html' title='pubmed: extracting the 1st authors&#39; gender and location who published in the Bioinformatics journal.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-7049014703853142032</id><published>2016-05-21T14:52:00.000+02:00</published><updated>2016-05-21T14:52:14.757+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="article"/><category scheme="http://www.blogger.com/atom/ns#" term="bioformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="graph"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="ncbi"/><category scheme="http://www.blogger.com/atom/ns#" term="network"/><category scheme="http://www.blogger.com/atom/ns#" term="orcid"/><category scheme="http://www.blogger.com/atom/ns#" term="pubmed"/><category scheme="http://www.blogger.com/atom/ns#" term="sql"/><category scheme="http://www.blogger.com/atom/ns#" term="sqlite"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><category scheme="http://www.blogger.com/atom/ns#" term="xslt"/><title type='text'>Playing with the @ORCID_Org / @ncbi_pubmed graph. My notebook.</title><summary type="text">&quot;ORCID provides a persistent digital identifier that distinguishes you from every other researcher and, through integration in key research workflows such as manuscript and grant submission, supports automated linkages between you and your professional activities ensuring that your work is recognized. &quot;I&#39;ve recently discovered that pubmed now integrates ORCID identfiers.
and so it begins ! :-D @</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/7049014703853142032/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/7049014703853142032' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7049014703853142032'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7049014703853142032'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/05/playing-with-orcidorg-ncbipubmed-graph.html' title='Playing with the @ORCID_Org / @ncbi_pubmed graph. My notebook.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-5128228804709899142</id><published>2016-05-17T13:29:00.001+02:00</published><updated>2016-05-17T13:29:50.789+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="gene"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="javascript"/><category scheme="http://www.blogger.com/atom/ns#" term="make"/><category scheme="http://www.blogger.com/atom/ns#" term="makefile"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="rnaseq"/><category scheme="http://www.blogger.com/atom/ns#" term="samtools"/><title type='text'>finding new intron-exon junctions using the public Encode RNASeq data</title><summary type="text">I&#39;ve been asked to look for some new / suspected / previously uncharacterized intron-exon junctions in public RNASeq data.
I&#39;ve used the BAMs under http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeCaltechRnaSeq/.

The following command is used to build the list of BAMs:

curl -s  &quot;http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeCaltechRnaSeq/&quot; |\
tr &#39; &amp;lt;&amp;gt;&quot;&#39; &quot;</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/5128228804709899142/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/5128228804709899142' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5128228804709899142'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5128228804709899142'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/05/finding-new-intron-exon-junctions-using.html' title='finding new intron-exon junctions using the public Encode RNASeq data'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-4962064268613746085</id><published>2016-03-04T17:16:00.000+01:00</published><updated>2016-03-04T17:16:29.570+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bam"/><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="htsjdk"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="javascript"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="picard"/><category scheme="http://www.blogger.com/atom/ns#" term="sam"/><category scheme="http://www.blogger.com/atom/ns#" term="source"/><category scheme="http://www.blogger.com/atom/ns#" term="tool"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><title type='text'>Now in picard: two javascript-based tools filtering BAM and VCF files.</title><summary type="text">
SamJS and VCFFilterJS are two tools I wrote for jvarkit. Both tools use the embedded java javascript engine to filter BAM or VCF file.
To get a broader audience, I&#39;ve copied those functionalities to Picard in &#39;FilterSamReads&#39; and &#39;FilterVcf&#39;.

FilterSamReadsFilterSamReads filters a SAM or BAM file with a javascript expression using the java javascript-engine.  
 The script puts the following </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/4962064268613746085/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/4962064268613746085' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/4962064268613746085'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/4962064268613746085'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/03/now-in-picard-two-javascript-based.html' title='Now in picard: two javascript-based tools filtering BAM and VCF files.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-1113206358267686142</id><published>2016-03-04T16:41:00.001+01:00</published><updated>2016-03-04T16:41:17.405+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="file"/><category scheme="http://www.blogger.com/atom/ns#" term="htsjdk"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="snp"/><category scheme="http://www.blogger.com/atom/ns#" term="source"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><title type='text'>Reading a VCF file faster with java  8, htsjdk and java.util.stream.Stream</title><summary type="text">java 8 streams &amp;quot;support functional-style operations on streams of elements, such as map-reduce transformations on collections&amp;quot;. In this post, I will show how I&#39;ve implemented a java.util.stream.Stream of VCF variants that counts the number of items in dbsnp.This example uses the java htsjdk API for reading variants.When using parallel streams, the main idea is to implement a </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/1113206358267686142/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/1113206358267686142' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/1113206358267686142'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/1113206358267686142'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/03/reading-vcf-file-faster-with-java-8.html' title='Reading a VCF file faster with java  8, htsjdk and java.util.stream.Stream'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total><georss:featurename>Centre-Ville, Nantes, France</georss:featurename><georss:point>47.209384873179857 -1.553542617475614</georss:point><georss:box>47.208036373179858 -1.556064117475614 47.210733373179856 -1.5510211174756139</georss:box></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-519504445164483976</id><published>2016-02-24T12:53:00.000+01:00</published><updated>2016-02-24T12:53:36.774+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="api"/><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="curl"/><category scheme="http://www.blogger.com/atom/ns#" term="elixir"/><category scheme="http://www.blogger.com/atom/ns#" term="json"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="registry"/><category scheme="http://www.blogger.com/atom/ns#" term="tool"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><category scheme="http://www.blogger.com/atom/ns#" term="xsl"/><category scheme="http://www.blogger.com/atom/ns#" term="xslt"/><title type='text'>Registering a tool in the @ELIXIREurope regisry using XML, XSLT, JSON and curl. My notebook.</title><summary type="text">The Elixir Registry / pmid:26538599 &quot;A portal to bioinformatics resources world-wide. With community support, the registry can become a standard for dissemination of information about bioinformatics resources: we welcome everyone to join us in this common endeavour. The registry is freely available at https://bio.tools.&quot;In this post, I will describe how I&#39;ve used the bio.tools API to register </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/519504445164483976/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/519504445164483976' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/519504445164483976'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/519504445164483976'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2016/02/registering-tool-in-elixireurope.html' title='Registering a tool in the @ELIXIREurope regisry using XML, XSLT, JSON and curl. My notebook.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-5835788102329737595</id><published>2015-12-05T00:18:00.001+01:00</published><updated>2015-12-05T00:18:58.494+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="blog"/><category scheme="http://www.blogger.com/atom/ns#" term="meta"/><title type='text'>Happy birthday my blog. You are now ten-year-old.</title><summary type="text">Happy birthday my blog. You are now 10-year-old.




</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/5835788102329737595/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/5835788102329737595' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5835788102329737595'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5835788102329737595'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/12/happy-birthday-my-blog-you-are-now-ten.html' title='Happy birthday my blog. You are now ten-year-old.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-1838891765495212798</id><published>2015-12-03T10:20:00.000+01:00</published><updated>2015-12-03T10:20:38.973+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="gatk"/><category scheme="http://www.blogger.com/atom/ns#" term="gui"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="json"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><category scheme="http://www.blogger.com/atom/ns#" term="xslt"/><title type='text'>GATK-UI : a java-swing interface for the Genome Analysis Toolkit.</title><summary type="text">I&#39;ve just pushed GATK-UI, a java swing interface for the  Genome Analysis Toolkit GATK at https://github.com/lindenb/gatk-ui. This tool is also available as a WebStart/JNLP application.

Screenshot
Why did you create this tool ?Some non-bioinformatician collaborators often want some coverage data for a defined set of BAM, for a specific region...Did you test every tool ?NOHow did you create an </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/1838891765495212798/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/1838891765495212798' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/1838891765495212798'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/1838891765495212798'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/12/gatk-ui-java-swing-interface-for-genome.html' title='GATK-UI : a java-swing interface for the Genome Analysis Toolkit.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-3858177302151464936</id><published>2015-07-13T15:44:00.000+02:00</published><updated>2015-07-13T15:44:18.151+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="docker"/><category scheme="http://www.blogger.com/atom/ns#" term="linux"/><title type='text'>Playing with #Docker , my notebook</title><summary type="text">This post is my notebook about  docker after we had a very nice introduction about docker by  François Moreews (INRIA/IRISA, Rennes). I&#39;ve used docker today for the first time, my aim was  just to create an image containing https://github.com/lindenb/verticalize, a small tool I wrote to verticalize text files.
Install dockeryou hate running this kind of command-lines, aren&#39;t you ?
$ wget -qO- </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/3858177302151464936/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/3858177302151464936' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3858177302151464936'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3858177302151464936'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/07/playing-with-docker-my-notebook.html' title='Playing with #Docker , my notebook'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-5536839165764175916</id><published>2015-06-29T11:30:00.000+02:00</published><updated>2015-06-29T11:31:38.952+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="blast"/><category scheme="http://www.blogger.com/atom/ns#" term="c"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="samtools"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><title type='text'>A BLAST to SAM converter.</title><summary type="text">Some times ago, I&#39;ve received a set of Ion-Torrent /mate-reads with a poor quality. I wasn&#39;t able to align much things using bwa. I&#39;ve always wondered if I could get better alignments using NCBI-BLASTN (short answer: no) . That&#39;s why I asked guyduche, my intership student to write a C program to convert the output of blastn to SAM. His code is available on github at :https://github.com/guyduche/</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/5536839165764175916/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/5536839165764175916' title='8 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5536839165764175916'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5536839165764175916'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/06/a-blast-to-sam-converter.html' title='A BLAST to SAM converter.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>8</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-7443966706774764240</id><published>2015-06-18T13:07:00.000+02:00</published><updated>2015-06-18T13:07:06.424+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="apache"/><category scheme="http://www.blogger.com/atom/ns#" term="api"/><category scheme="http://www.blogger.com/atom/ns#" term="avro"/><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="ga4gh"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="json"/><category scheme="http://www.blogger.com/atom/ns#" term="makefile"/><category scheme="http://www.blogger.com/atom/ns#" term="source"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><title type='text'>Playing with the #GA4GH schemas and #Avro : my notebook</title><summary type="text">After watching  David Haussler&#39;s talk &quot;Beacon Project and Data Sharing ApIs&quot;, I wanted to play with Avro and the models and APIs defined by the Global Alliance for Genomics and Health  (ga4gh) coalition Here is my notebook.
  
 (Wikipedia) Avro: &quot;Avro is a remote procedure call and data serialization framework developed within Apache&#39;s Hadoop project. It uses JSON for defining data types and </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/7443966706774764240/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/7443966706774764240' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7443966706774764240'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/7443966706774764240'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/06/playing-with-ga4gh-schemas-and-avro-my.html' title='Playing with the #GA4GH schemas and #Avro : my notebook'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-6367704022208475665</id><published>2015-05-07T21:47:00.000+02:00</published><updated>2015-05-07T21:47:59.133+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="mmbean"/><category scheme="http://www.blogger.com/atom/ns#" term="source"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><title type='text'>Monitoring a java application with mbeans. An example with samtools/htsjdk.</title><summary type="text">&quot;A MBean is a Java object that follows the JMX specification. A MBean can represent a device, an application, or any resource that needs to be managed.  The JConsole graphical user interface is a monitoring tool that complies to the JMX specification.&quot;. In this post I&#39;ll show how I&#39;ve modified the sources of the htsjdk library  to monitor the java program reading a VCF file from the Exac server. </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/6367704022208475665/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/6367704022208475665' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6367704022208475665'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6367704022208475665'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/05/monitoring-java-application-with-mbeans.html' title='Monitoring a java application with mbeans. An example with samtools/htsjdk.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-6609243507322494666</id><published>2015-05-05T17:24:00.001+02:00</published><updated>2015-05-05T17:24:16.688+02:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="hadoop"/><category scheme="http://www.blogger.com/atom/ns#" term="mapreduce"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><title type='text'>Playing with hadoop/mapreduce and htsjdk/VCF : my notebook.</title><summary type="text">The aim of this test is to get a count of each type of variant/genotypes in a VCF file using Apache Hadoop and the java library for NGS htsjdk. My source code is available at: https://github.com/lindenb/hadoop-sandbox/blob/master/src/main/java/com/github/lindenb/hadoop/Test.java.

First, and this is my main problem, I needed to create a class &#39;VcfRow&#39; that would contains the whole data about a </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/6609243507322494666/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/6609243507322494666' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6609243507322494666'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6609243507322494666'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/05/playing-with-hadoopmapreduce-and.html' title='Playing with hadoop/mapreduce and htsjdk/VCF : my notebook.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-9140008001319321202</id><published>2015-02-28T17:01:00.000+01:00</published><updated>2015-02-28T17:01:22.611+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="galaxy"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="vcf"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><title type='text'>Integrating a java program in #usegalaxy.</title><summary type="text">This is my notebook for the integration of java programs in https://usegalaxy.org/ .
create a directory for your tools under ${galaxy-root}/tools 
mkdir ${galaxy-root}/tools/jvarkit

put all the required jar files and the XML files describing your tools (see below) in this new directory:$ ls ${galaxy-root}/tools/jvarkit/
commons-jexl-2.1.1.jar
groupbygene.jar
htsjdk-1.128.jar
vcffilterjs.jar
</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/9140008001319321202/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/9140008001319321202' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/9140008001319321202'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/9140008001319321202'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/02/integrating-java-program-in-usegalaxy.html' title='Integrating a java program in #usegalaxy.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-6932318720570976215</id><published>2015-02-22T17:54:00.000+01:00</published><updated>2015-02-22T17:54:01.196+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="genetics"/><category scheme="http://www.blogger.com/atom/ns#" term="visualization"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><category scheme="http://www.blogger.com/atom/ns#" term="xslt"/><title type='text'>Drawing a Manhattan plot in SVG using a GWAS+XML model.</title><summary type="text">On friday, I saw my colleague  @b_l_k starting writing SVG+XML code to draw a Manhattan plot. I told him that a better idea would be to  describe the data using XML and to transform the XML to SVG using XSLT. 
So, let&#39;s do this. I put the XSLT stylesheet on github at https://github.com/lindenb/xslt-sandbox/blob/master/stylesheets/bio/manhattan.xsl . And the model of data would look like this (I </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/6932318720570976215/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/6932318720570976215' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6932318720570976215'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6932318720570976215'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/02/drawing-manhattan-plot-in-svg-using.html' title='Drawing a Manhattan plot in SVG using a GWAS+XML model.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-5038838376828092282</id><published>2015-02-18T20:26:00.000+01:00</published><updated>2015-02-18T20:26:46.127+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bionformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="fasta"/><category scheme="http://www.blogger.com/atom/ns#" term="generator"/><category scheme="http://www.blogger.com/atom/ns#" term="java"/><category scheme="http://www.blogger.com/atom/ns#" term="knime"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><category scheme="http://www.blogger.com/atom/ns#" term="xslt"/><title type='text'>Automatic code generation for @knime with XSLT: An example with two nodes: fasta reader and writer.</title><summary type="text">

KNIME is a java+eclipse-based graphical workflow-manager.
Biologists in my lab often use this tool to filter VCFs or other tabular data. A software Development kit (SDK) is provided to build new nodes. My main problem with this SDK is, that you need to write a large number of similar files and you also have to interact with a graphical interface. I wanted to automatize the generation of java </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/5038838376828092282/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/5038838376828092282' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5038838376828092282'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/5038838376828092282'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/02/automatic-code-generation-for-knime.html' title='Automatic code generation for @knime with XSLT: An example with two nodes: fasta reader and writer.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-3547965617912537958</id><published>2015-02-02T12:34:00.000+01:00</published><updated>2015-02-02T12:34:50.604+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="blast"/><category scheme="http://www.blogger.com/atom/ns#" term="c++"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="database"/><category scheme="http://www.blogger.com/atom/ns#" term="ncbi"/><category scheme="http://www.blogger.com/atom/ns#" term="sequence"/><category scheme="http://www.blogger.com/atom/ns#" term="toolbox"/><title type='text'>Listing the &#39;Subject&#39; Sequences in a BLAST database using the NCBI C++ toolbox. My notebook.</title><summary type="text">In my previous post (http://plindenbaum.blogspot.com/2015/01/filtering-fasta-sequences-using-ncbi-c.html) I&#39;ve built an application filtering FASTA sequences using theNCBI C++ toolbox (http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/). Here, I&#39;m gonna write a tool listing the &#39;subject&#39; sequences in a BLAST database.This new application ListBlastDatabaseContent takes only one argument &#39;-db&#39;, the </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/3547965617912537958/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/3547965617912537958' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3547965617912537958'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3547965617912537958'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/02/listing-subject-sequences-in-blast.html' title='Listing the &#39;Subject&#39; Sequences in a BLAST database using the NCBI C++ toolbox. My notebook.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-6398289360263454492</id><published>2015-01-30T11:07:00.000+01:00</published><updated>2015-01-30T11:07:27.296+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="api"/><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="c"/><category scheme="http://www.blogger.com/atom/ns#" term="c++"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="ncbi"/><category scheme="http://www.blogger.com/atom/ns#" term="sequence"/><category scheme="http://www.blogger.com/atom/ns#" term="toolbox"/><title type='text'>Filtering Fasta Sequences using the #NCBI C++ API. My notebook.</title><summary type="text">In my previous post (http://plindenbaum.blogspot.com/2015/01/compiling-c-hello-world-program-using.html) I&#39;ve built a simple &amp;quot;Hello World&amp;quot; application using theNCBI C++ toolbox (http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/). Here, I&#39;m gonna to extend the code in order to create a program filtering FASTA sequences on their sizes.This new application FastaFilterSize needs three new </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/6398289360263454492/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/6398289360263454492' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6398289360263454492'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/6398289360263454492'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/01/filtering-fasta-sequences-using-ncbi-c.html' title='Filtering Fasta Sequences using the #NCBI C++ API. My notebook.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-8852659460633145940</id><published>2015-01-29T13:09:00.000+01:00</published><updated>2015-01-29T13:09:00.313+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="c++"/><category scheme="http://www.blogger.com/atom/ns#" term="code"/><category scheme="http://www.blogger.com/atom/ns#" term="ncbi"/><category scheme="http://www.blogger.com/atom/ns#" term="programming"/><category scheme="http://www.blogger.com/atom/ns#" term="toolbox"/><title type='text'>Compiling a C++ &#39;Hello world&#39;  program using the #NCBI C++ toolbox: my notebook.</title><summary type="text">This post is my notebook for compiling a simple C++ application using the NCBI C++ toolbox (http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/).This application prints &#39;Hello world&#39; and takes two arguments:&#39;-o&#39; to specificiy the output filename (default is standard output)
&#39;-n&#39; to set the name to be printed (default: &amp;quot;Word !&amp;quot;)
The code I used is the one containing in the distribution of </summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/8852659460633145940/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/8852659460633145940' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/8852659460633145940'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/8852659460633145940'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2015/01/compiling-c-hello-world-program-using.html' title='Compiling a C++ &#39;Hello world&#39;  program using the #NCBI C++ toolbox: my notebook.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-1694145637851943881</id><published>2014-12-05T21:08:00.000+01:00</published><updated>2014-12-05T21:08:00.352+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="algorithm"/><category scheme="http://www.blogger.com/atom/ns#" term="boinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="make"/><category scheme="http://www.blogger.com/atom/ns#" term="makefile"/><category scheme="http://www.blogger.com/atom/ns#" term="pipeline"/><category scheme="http://www.blogger.com/atom/ns#" term="workflow"/><title type='text'>Divide-and-conquer in a #Makefile : recursivity and #parallelism.</title><summary type="text">This post is my notebook about implementing a divide-and-conquer strategy in GNU make.Say you have a list of &#39;N&#39; VCFs files. You want to create a list of:common SNPs in vcf1 and vcf2
common SNPs in vcf3 and the previous list
common SNPs in vcf4 and the previous list 
(...)
common SNPs in vcfN and the previous list  
Yes, I know I can do this using:grep -v &#39;^#&#39; f.vcf|cut -f 1,2,4,5 | sort | uniq

</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/1694145637851943881/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/1694145637851943881' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/1694145637851943881'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/1694145637851943881'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2014/12/divide-and-conquer-in-makefile.html' title='Divide-and-conquer in a #Makefile : recursivity and #parallelism.'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-14688252.post-3131621193245656041</id><published>2014-12-04T20:40:00.001+01:00</published><updated>2014-12-04T20:40:39.565+01:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bioinformatics"/><category scheme="http://www.blogger.com/atom/ns#" term="make"/><category scheme="http://www.blogger.com/atom/ns#" term="makefile"/><category scheme="http://www.blogger.com/atom/ns#" term="ngs"/><category scheme="http://www.blogger.com/atom/ns#" term="samtools"/><category scheme="http://www.blogger.com/atom/ns#" term="xml"/><category scheme="http://www.blogger.com/atom/ns#" term="xslt"/><title type='text'>XML+XSLT = #Makefile -based #workflows for #bioinformatics</title><summary type="text">I&#39;ve recently read some conversations on Twitter about Makefile-based bioinformatics workflows. I&#39;ve suggested on biostars.org (Standard simple format to describe a bioinformatics analysis pipeline) that a XML file could be used to describe a model of data and XSLT could transform this model to a Makefile-based workflow. I&#39;ve already explored this idea in a previous post (Generating a pipeline of</summary><link rel='replies' type='application/atom+xml' href='http://plindenbaum.blogspot.com/feeds/3131621193245656041/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/14688252/3131621193245656041' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3131621193245656041'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/14688252/posts/default/3131621193245656041'/><link rel='alternate' type='text/html' href='http://plindenbaum.blogspot.com/2014/12/xmlxslt-makefile-based-workflows-for.html' title='XML+XSLT = #Makefile -based #workflows for #bioinformatics'/><author><name>Pierre Lindenbaum</name><uri>http://www.blogger.com/profile/13765837643388003852</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry></feed>