<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">

<channel>
	<title>Prashanth Ellina</title>
	
	<link>http://blog.prashanthellina.com</link>
	<description>In Pursuit of Truth</description>
	<lastBuildDate>Sun, 28 Nov 2010 09:35:27 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.2.1</generator>
		<atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://feeds.feedburner.com/prashanthellina" /><feedburner:info uri="prashanthellina" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><item>
		<title>Notemonk – An exciting new way to learn your textbooks</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/BAyGB7sXIUU/</link>
		<comments>http://blog.prashanthellina.com/2010/04/03/notemonk-an-exciting-new-way-to-learn-your-textbooks/#comments</comments>
		<pubDate>Fri, 02 Apr 2010 21:06:24 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[web]]></category>
		<category><![CDATA[education]]></category>
		<category><![CDATA[headrun]]></category>
		<category><![CDATA[notemonk]]></category>
		<category><![CDATA[text books]]></category>
		<category><![CDATA[website]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=151</guid>
		<description><![CDATA[People, I&#8217;ve just launched http://www.notemonk.com, an education portal. At notemonk you can Download books Share and read notes on text book topics Watch relevant videos Notemonk aims to create a rich and fun learning experience around text books. For launch, we&#8217;ve included the higher standards in NCERT. Please take a look and spread the work. [...]]]></description>
			<content:encoded><![CDATA[<p>People, I&#8217;ve just launched <a href="http://www.notemonk.com">http://www.notemonk.com</a>, an education portal. At notemonk you can</p>
<p><center><br />
<img src="http://www.notemonk.com/static/images/notemonk_mascot_medium.png" alt="Notemonk Mascot" /><br />
<img src="http://www.notemonk.com/static/images/notemonk_logo.png" alt="Notemonk Logo" /><br />
</center></p>
<ul>
<li> Download books
<li> Share and read notes on text book topics
<li> Watch relevant videos
</ul>
<p>Notemonk aims to create a rich and fun learning experience around text books. For launch, we&#8217;ve included the higher standards in NCERT. Please take a look and spread the work. Your feedback is most welcome!</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=BAyGB7sXIUU:ATh4HQ0YvYk:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=BAyGB7sXIUU:ATh4HQ0YvYk:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=BAyGB7sXIUU:ATh4HQ0YvYk:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=BAyGB7sXIUU:ATh4HQ0YvYk:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=BAyGB7sXIUU:ATh4HQ0YvYk:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=BAyGB7sXIUU:ATh4HQ0YvYk:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=BAyGB7sXIUU:ATh4HQ0YvYk:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=BAyGB7sXIUU:ATh4HQ0YvYk:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/BAyGB7sXIUU" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2010/04/03/notemonk-an-exciting-new-way-to-learn-your-textbooks/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2010/04/03/notemonk-an-exciting-new-way-to-learn-your-textbooks/</feedburner:origLink></item>
		<item>
		<title>A new chapter</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/WiN8jlZcvhw/</link>
		<comments>http://blog.prashanthellina.com/2010/01/28/a-new-chapter/#comments</comments>
		<pubDate>Thu, 28 Jan 2010 08:28:44 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[adventure]]></category>
		<category><![CDATA[life]]></category>
		<category><![CDATA[startup]]></category>
		<category><![CDATA[veveo]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=140</guid>
		<description><![CDATA[Looking back at early accidents When I look back at my short life I can see distinct periods or chapters. The time when I moved to Chennai and joined a new school in 3rd standard was the beginning of discovery that I could do well at studies and be competitive. During that stage I found [...]]]></description>
			<content:encoded><![CDATA[<p><center><img src="http://www.prashanthellina.com/images/new_horizons.gif" alt="New Horizons" /></center></p>
<p><strong>Looking back at early accidents</strong><br />
When I look back at my short life I can see distinct periods or chapters. The time when I moved to Chennai and joined a new school in 3rd standard was the beginning of discovery that I could do well at studies and be competitive. During that stage I found my interest in science and technology and aptitude for various extra-curricular activities like quizzing, painting, essay-writing and such. Another parallel phase started in 5th standard when I stumbled upon my interest for computers and programming. In 6th standard I realized my potential in being a leader and continued to be the class representative for most of school life capping it with my role as a School Pupil leader in 12th std. All these phases happened more or less without any conscious effort on my part. They just happened. I lot of credit goes to my family and teachers for guiding me through these. The point however is that the phase transitions were by and large accidental from my point of view.</p>
<p><strong>Hmm&#8230; Can we make accidents happen?</strong><br />
The first time when I consciously ushered in a new phase was in 11th std. An incident at school made me introspect and got me to start the process of understanding myself and people around me. What motivates us? What do we all want? What elevates us into happiness? What drops up into the abyss of sorrow? I learnt to control my anger and to think more rationally. I learnt to observe people around me and look behind the eyes. It taught me to empathize and connect. That was when I realized that I could decide what I could do and be &#8211; that future phases in life need not be accidental.</p>
<p>Life is a powerful force that keeps pushing you around. I have not met anyone who is in complete control. I don&#8217;t even believe complete control makes sense in the framework of life. At best we can attempt to understand the forces behind life and learn to go with the flow. Whether or not we can influence the direction of it, we can atleast benefit from the interaction. We can learn and become better at it &#8211; at understanding. The one thing I decided not to do was to get too comfortable. Learning happens when there is a differential current in the river of life.</p>
<p>Undergraduation was a new phase filled with freedom, learning and fun. As opposed to school where your peers come from your immediate locality, in college, you get to meet a more diverse set. You meet people who think differently and are motivated by various ends. In understanding them there was a great opportunity to discover more of myself which I did.</p>
<p><strong>Veveo &#8211; my first workplace</strong><br />
After college I was confused about what to do. Do I pursue higher education or should I get a job? The companies coming to my campus for recruitment were Wipro, TCS, CTS, Infosys and so on. I work profile being offered did not excite me. Pursuing MS was the option I was considering and started the application process. At that time I came to know of a startup in Bangalore who were looking to hire freshers. Having set my mind on MS, I applied to this company half-heartedly, more to experience the interview process than anything else. I was interviewed by two people from the company one of them a co-founder and other a VP. What hooked me was the way they interacted with me. From my point of view they were experienced professionals working in the industry having etched out successful careers and I was a little no-body &#8211; a fresher from a private college. For the first time I felt I belonged somewhere and could establish a relationship based on mutual respect. I had always detested the authoritarian nature of our society. In schools and colleges I saw a lack of reciprocative respect from the faculty. Our society has a lot to learn in terms of treating each other humanely. Anyway, that&#8217;s that. I got hooked and told my family. Despite their concerns about this being a &#8220;small&#8221; company without &#8220;security&#8221;, I decided to dive in head long to discover the &#8220;insecurity&#8221; of being in a puny startup. Thus I joined Veveo.</p>
<p>Veveo was a very productive experience. I discovered startup culture and rediscovered myself. I walked up to the co-founder who interviewed me after getting my first paycheck and thanked him for paying me for having fun at work. That is a double scoop. I could not believe that was happening to me. For the first time in my life, I felt I belonged in a group. I found people who thought like me with whom I could share my ideas and I found other smart people who were so different that I would spend countless hours bludgeoning their logic (and getting bludgeoned too <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_wink.gif' alt=';)' class='wp-smiley' /> . The 4.5 years of experience at Veveo feels more than the sum of what I had experienced before. Phew! What a ride it has been. The day I quit Veveo, I found it difficult to justify my decision to do so. Even today the thought is not extinct.</p>
<p><strong>What next?</strong><br />
Looking back I realize that I was getting too comfortable at Veveo. A good work environment, nice people and wonderful pay &#8212; yummy! Time for an adventure. I came up with an idea for making a physics simulation engine to aid game devs in creating awesome destructible environments. That was slightly more than a year back. I worked on it part-time in the evenings and weekends and then in March last year I requested my superior to release me so I could do it full-time. Much to my surprise he offered a part-time schedule where I could work at Veveo three days a week and spend the rest working on my project. This opportunity was god-sent and I lapped it up. A friend of mine from college who was also my roomie quit his job and joined me to work on this full-time.</p>
<p>The period since March last year is easily the most productive and exciting phase of my life. I have learnt immensely on a multitude of subjects &#8211; technology, project management, startup fundamentals, spirituality and philosophy, business development etc. The experience has altered me as a person and opened up avenues of my brain which I thought inexistent. I decided to take a plunge into this world full-time and took that step. I quit Veveo to work on this full-time from Jan this year. The journey so far would have been but a dream had it not been for support from my family, friends and colleagues at Veveo. Thank you all for being there.</p>
<p>Startups businesses are difficult beasts to tame and most people get flattened. The promise of glory (and $&#8217;s <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_wink.gif' alt=';)' class='wp-smiley' />  ) is a worthy lure for a persistent one. I intend to succeed doing this but failure is no less a success considering the path you tread and the things you experience. I will keep you posted in the forthcoming articles on this blog. A new chapter awaits.</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=WiN8jlZcvhw:uNmrItaQxcU:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=WiN8jlZcvhw:uNmrItaQxcU:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=WiN8jlZcvhw:uNmrItaQxcU:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=WiN8jlZcvhw:uNmrItaQxcU:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=WiN8jlZcvhw:uNmrItaQxcU:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=WiN8jlZcvhw:uNmrItaQxcU:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=WiN8jlZcvhw:uNmrItaQxcU:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=WiN8jlZcvhw:uNmrItaQxcU:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/WiN8jlZcvhw" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2010/01/28/a-new-chapter/feed/</wfw:commentRss>
		<slash:comments>12</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2010/01/28/a-new-chapter/</feedburner:origLink></item>
		<item>
		<title>All your aliases are belong to you</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/8CCPZxIN0IY/</link>
		<comments>http://blog.prashanthellina.com/2009/08/28/all-your-aliases-are-belong-to-you/#comments</comments>
		<pubDate>Fri, 28 Aug 2009 02:47:06 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[linux]]></category>
		<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[text processing]]></category>
		<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[veveo]]></category>
		<category><![CDATA[bash]]></category>
		<category><![CDATA[productivity]]></category>
		<category><![CDATA[script]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=129</guid>
		<description><![CDATA[I like setting up shortcuts to frequently used commands whether I used Windows or Linux. I use the terminal often and create shortcuts to frequently used commands using &#8220;alias&#8221; feature of BASH. This has saved me considerable time in the past. However, I recently felt that if I could have a helper tool to monitor [...]]]></description>
			<content:encoded><![CDATA[<p>I like setting up shortcuts to frequently used commands whether I used Windows or Linux. I use the terminal often and create shortcuts to frequently used    commands using &#8220;alias&#8221; feature of BASH. This has saved me considerable time in the past. However, I recently felt that if I could have a helper tool to       monitor my usage of commands and automatically suggest candidates for aliasing, that would be useful. The output of that is Aliaser.</p>
<p>Aliaser works by monitoring your bash history. It analyses command frequency and suggests candidates for aliasing. It manages aliases so created. The feature I like most in Aliaser is that it reminds you to use the aliases you created by showing tips on opening a new terminal session.</p>
<p>Download Aliaser from <a href="http://aliaser.googlecode.com">http://aliaser.googlecode.com</a>.</p>
<p><a href="http://aliaser.googlecode.com"><br />
<img align="center" src="http://aliaser.googlecode.com/files/aliaser_tips.png"/><br />
</a></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=8CCPZxIN0IY:mM-Lqrjvofg:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=8CCPZxIN0IY:mM-Lqrjvofg:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=8CCPZxIN0IY:mM-Lqrjvofg:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=8CCPZxIN0IY:mM-Lqrjvofg:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=8CCPZxIN0IY:mM-Lqrjvofg:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=8CCPZxIN0IY:mM-Lqrjvofg:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=8CCPZxIN0IY:mM-Lqrjvofg:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=8CCPZxIN0IY:mM-Lqrjvofg:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/8CCPZxIN0IY" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/08/28/all-your-aliases-are-belong-to-you/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/08/28/all-your-aliases-are-belong-to-you/</feedburner:origLink></item>
		<item>
		<title>Query Wikipedia from your terminal</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/vIORJi32Pjc/</link>
		<comments>http://blog.prashanthellina.com/2009/08/23/query-wikipedia-from-your-terminal/#comments</comments>
		<pubDate>Sun, 23 Aug 2009 05:34:23 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[linux]]></category>
		<category><![CDATA[web]]></category>
		<category><![CDATA[wikipedia]]></category>
		<category><![CDATA[bash]]></category>
		<category><![CDATA[code]]></category>
		<category><![CDATA[function]]></category>
		<category><![CDATA[productivity]]></category>
		<category><![CDATA[terminal]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=122</guid>
		<description><![CDATA[I refer Wikipedia frequently. I use this BASH function to help me do that from the terminal. For explanation of how this works head over here. BASH function # wiki # eg: wiki India # wiki Apple_Inc # wiki Anglo_Saxon wiki() { dig +short txt $1.wp.dg.cx } Example usage prashanth@prashanth-desktop:~$ wiki India "India, officially the [...]]]></description>
			<content:encoded><![CDATA[<p>I refer Wikipedia frequently. I use this BASH function to help me do that from the terminal. For explanation of how this works head over <a href="http://www.commandlinefu.com/commands/view/2829/query-wikipedia-via-console-over-dns">here</a>.</p>
<p><strong> BASH function </strong></p>
<pre lang="bash">
# wiki
<page>
# eg: wiki India
#     wiki Apple_Inc
#     wiki Anglo_Saxon
wiki()
{
    dig +short txt $1.wp.dg.cx
}
</pre>
<p><br/></p>
<p><strong> Example usage </strong></p>
<pre lang="bash">
prashanth@prashanth-desktop:~$ wiki India
"India, officially the Republic of India ( '\; see also other Indian languages), is a country in South Asia.
It is the seventh-largest country by geographical area, the second-most populous country, and the most
populous democracy in the world. Bounded by t" "he Indian Ocean on the south, the Arabian Sea on
the west, and the Bay of Bengal on the east, India has a coastline of ... http://a.vu/w:India"

prashanth@prashanth-desktop:~$ wiki Anglo_Saxon
"Anglo-Saxons (or Anglo-Saxon) is the term usually used to describe the invading tribes in the south
and east of Great Britain starting from the early 5th century AD, and their creation of the English
nation, lasting until the Norman conquest of 1066. The " "Benedictine monk, Bede, identified
them as the descendants of three Germanic tribes: http://a.vu/w:Anglo-Saxons"
</pre>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=vIORJi32Pjc:a8TLpIqmWhI:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=vIORJi32Pjc:a8TLpIqmWhI:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=vIORJi32Pjc:a8TLpIqmWhI:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=vIORJi32Pjc:a8TLpIqmWhI:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=vIORJi32Pjc:a8TLpIqmWhI:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=vIORJi32Pjc:a8TLpIqmWhI:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=vIORJi32Pjc:a8TLpIqmWhI:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=vIORJi32Pjc:a8TLpIqmWhI:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/vIORJi32Pjc" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/08/23/query-wikipedia-from-your-terminal/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/08/23/query-wikipedia-from-your-terminal/</feedburner:origLink></item>
		<item>
		<title>Command-line language translation</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/_8w6JTbJiXA/</link>
		<comments>http://blog.prashanthellina.com/2009/08/18/command-line-language-translation/#comments</comments>
		<pubDate>Tue, 18 Aug 2009 14:15:42 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[web]]></category>
		<category><![CDATA[google]]></category>
		<category><![CDATA[language]]></category>
		<category><![CDATA[script]]></category>
		<category><![CDATA[tool]]></category>
		<category><![CDATA[translation]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=117</guid>
		<description><![CDATA[Here is a simple utility created using Python for translating text from various languages into English. It uses the Google AJAX API to do this. Usage prashanth@prashanth-desktop:~$ translate bonjour hello prashanth@prashanth-desktop:~$ translate guten morgen Good morning Code #!/usr/bin/env python ''' Translates text into english using Google Translate. Usage: python translate.py (or) echo &#124; python translate.py [...]]]></description>
			<content:encoded><![CDATA[<p>Here is a simple utility created using Python for translating text from various languages into English. It uses the Google AJAX API to do this.</p>
<p><strong>Usage</strong></p>
<pre lang="bash">
prashanth@prashanth-desktop:~$ translate bonjour
hello
prashanth@prashanth-desktop:~$ translate guten morgen
Good morning
</pre>
<p><br/></p>
<p><strong>Code</strong></p>
<pre lang="python">
#!/usr/bin/env python
'''
Translates text into english using Google Translate.
Usage: python translate.py <text>
        (or)
       echo <text> | python translate.py

For convenience, make a symlink to this file from /usr/bin/translate.
'''
# derived from : http://code.google.com/p/py-gtranslate/source/browse/trunk/gtrans.py

import sys
import urllib2
import urllib
import simplejson as json

FROM_LANGUAGE = ''
TO_LANGUAGE = 'en'
BASE_URL = 'http://ajax.googleapis.com/ajax/services/language/translate'

def translate(from_language, to_language, text):
    langpair = '%s|%s' % (from_language, to_language)
    params = {'v': '1.0', 'langpair': langpair, 'q': urllib.quote_plus(text)}

    params = '%s' % ('&#038;'.join(['%s=%s' % (k,v) for (k,v) in params.items()]))

    url = '%s?%s' % (BASE_URL, params)
    resp = json.load(urllib2.urlopen(url))
    try:
        return resp['responseData']['translatedText']
    except:
        return text

def main(text):
    if text:
        print translate(FROM_LANGUAGE, TO_LANGUAGE, text)

    else:
        lines = [l.strip() for l in sys.stdin.readlines()]
        for line in lines:
            if line:
                text = translate(FROM_LANGUAGE, TO_LANGUAGE, line)
                print '[%s]' % line
                print text
            else:
                print

if __name__ == '__main__':
    args = sys.argv[1:]
    text = ' '.join(sys.argv[1:])
    main(text)
</pre>
<p><br/></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=_8w6JTbJiXA:q2hUYUT1I9g:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=_8w6JTbJiXA:q2hUYUT1I9g:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=_8w6JTbJiXA:q2hUYUT1I9g:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=_8w6JTbJiXA:q2hUYUT1I9g:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=_8w6JTbJiXA:q2hUYUT1I9g:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=_8w6JTbJiXA:q2hUYUT1I9g:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=_8w6JTbJiXA:q2hUYUT1I9g:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=_8w6JTbJiXA:q2hUYUT1I9g:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/_8w6JTbJiXA" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/08/18/command-line-language-translation/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/08/18/command-line-language-translation/</feedburner:origLink></item>
		<item>
		<title>On setting up USB RAID</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/Ucx4mBdOnn0/</link>
		<comments>http://blog.prashanthellina.com/2009/08/07/on-setting-up-usb-raid/#comments</comments>
		<pubDate>Fri, 07 Aug 2009 12:09:16 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[computer hardware]]></category>
		<category><![CDATA[linux]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=102</guid>
		<description><![CDATA[I bought two Dane-Elec 8GB USB drives recently. Flash memory (as opposed to Hard disk storage) has faster &#8220;seek&#8221; capability. This is inherent in the design as flash memory is solid state whereas hard disks are electro-mechanical with a &#8220;head&#8221; that needs to be moved around using a &#8220;drive&#8221; mechanism. Since seek times are better [...]]]></description>
			<content:encoded><![CDATA[<p>I bought two Dane-Elec 8GB USB drives recently. Flash memory (as opposed to Hard disk storage) has faster &#8220;seek&#8221; capability. This is inherent in the design as flash memory is solid state whereas hard disks are electro-mechanical with a &#8220;head&#8221; that needs to be moved around using a &#8220;drive&#8221; mechanism. Since seek times are better on flash drives, they are faster when you are reading or writing a lot of small files.</p>
<p>However flash drives do not have sustained data transfer rates that hard disks have (i.e throughput). My thought process what that the throughput can be made up by slapping together two or more USB drives and applying software RAID 0 over them. Below are some performance results and they look encouraging.</p>
<p>Note that the timings are in seconds.</p>
<p><strong>Single 8GB Dane-Elec USB drive</strong><br />
131.11 for 683MB (write)<br />
44.62 for 683MB (read)</p>
<p><strong>Single 2GB Transcend USB drive</strong><br />
204.50 for 683MB (write)<br />
63.26 for 683MB (read)</p>
<p><strong>Single 8GB Sandisk drive</strong><br />
197.61 for 683MB (write)<br />
29.73 for 683MB (read)</p>
<p><strong>RAID0 (two Dane-Elec 8GB USB drives)</strong><br />
61.177 for 683MB (write)<br />
17.9 for 683MB (read)</p>
<p>I created a test file with 683 MB of data by funnelling /dev/urandom into it and then copied it to the USB drive(s) to measure write performance. Then I unmounted the USB drive(s) to make sure the buffer cache is emptied. On remounting I measured read performance by copying the test file back to the harddisk. The hard disk used was a Seagate Barracuda 320GB (7200rpm).</p>
<p>If four USB drives (say Sandisk 8GB) can be RAID&#8217;ed together on level 0 we would have a cheap SSD. One thing that prevents me from trying this out is the question of reliability. I have not used this setup long enough or stressed it hard enough to be comfortable using this for normal use.</p>
<p>To rig something like this yourself, read this <a href="http://linuxgazette.net/151/weiner.html">article</>.</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=Ucx4mBdOnn0:RXAfFkNsZ0w:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=Ucx4mBdOnn0:RXAfFkNsZ0w:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=Ucx4mBdOnn0:RXAfFkNsZ0w:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=Ucx4mBdOnn0:RXAfFkNsZ0w:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=Ucx4mBdOnn0:RXAfFkNsZ0w:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=Ucx4mBdOnn0:RXAfFkNsZ0w:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=Ucx4mBdOnn0:RXAfFkNsZ0w:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=Ucx4mBdOnn0:RXAfFkNsZ0w:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/Ucx4mBdOnn0" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/08/07/on-setting-up-usb-raid/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/08/07/on-setting-up-usb-raid/</feedburner:origLink></item>
		<item>
		<title>Extracting relevant text from HTML pages</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/giNhCHZ-D2A/</link>
		<comments>http://blog.prashanthellina.com/2009/07/27/extracting-relevant-text-from-html-pages/#comments</comments>
		<pubDate>Mon, 27 Jul 2009 11:28:09 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[text processing]]></category>
		<category><![CDATA[web]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=99</guid>
		<description><![CDATA[Some time back I had done some work on extracting topics from an arbitrary piece of text using Wikipedia data. Recently I thought of a concept to put that algorithm to work. As a part of this project, I need to extract relevant text from an arbitrary HTML page. By relevant I mean the &#8220;meat&#8221; [...]]]></description>
			<content:encoded><![CDATA[<p>Some time back I had done some work on <a href="http://blog.prashanthellina.com/2007/12/21/topic-extraction-using-wikipedia-data/">extracting topics</a> from an arbitrary piece of text using Wikipedia data. Recently I thought of a concept to put that algorithm to work. As a part of this project, I need to extract <strong>relevant</strong> text from an arbitrary HTML page. By relevant I mean the &#8220;meat&#8221; of the page devoid of navigation links and side-content.</p>
<p>This algorithm has the following <strong>steps</strong>:</p>
<ul>
<li>make doc from html data (clean html)
<li>identify content nodes (nodes having substantial content)
<li>prune xml tree to remove irrelevant nodes
<li>get the most linked node from pruned tree (subtree contains relevant text)
<li>make the dot graph
</ul>
<p>I&#8217;ve pasted the relevant python module below for easy reading. However, if you want to download the code and hack it, you can get all the files from <a href="http://code.prashanthellina.com/code/content_extraction">here</a>.</p>
<p><strong>Code files</strong></p>
<ul>
<li>content_extract.py &#8211; actual work gets done here (file pasted below)
<li>cextract.py &#8211; cgi front-end which fetched url content and feeds to above script.
<li>cextract_config.py &#8211; cgi script configuration file. You have to adjust this to your environment.
</ul>
<p><strong>Try it right here and right now</strong></p>
<form action="http://www.prashanthellina.com/cgi-bin/cextract.py" method="GET">
url:<br />
<input type="text" name="url" size="60"/>
<input type="submit" value="extract text"/>
</form>
<p><strong>Some samples</strong></p>
<ul>
<li><a href="http://www.prashanthellina.com/cextract_data/510fbe51d89334aecb70d9b1d1635711.html">http://news.bbc.co.uk/sport2/hi/motorsport/formula_one/8169436.stm</a>
<li><a href="http://www.prashanthellina.com/cextract_data/640ca0b7c6e818b2b1bf952a206a6388.html">http://www.prashanthellina.com/cextract_data/640ca0b7c6e818b2b1bf952a206a6388.html</a>
<li><a href="http://www.prashanthellina.com/cextract_data/8408efd51b4f1f6b91650d4ea3ce8924.html">http://www.telegraph.co.uk/news/worldnews/europe/france/5913494/Nicolas-Sarkozy-to-slow-down-after-collapsing-while-jogging.html</a>
</ul>
<p>Please let me know if you find cases for which the algorithm does not work. Even better would be to download the code and hack it up and post back. I am eager to see what you can come up with.</p>
<pre lang="python">
#!/usr/bin/env python

import sys
from cStringIO import StringIO

from lxml import etree #http://codespeak.net/lxml/

IGNORABLE_TAGS = set(['script', 'a'])
MIN_TEXT_LEN = 50

def get_text(node):
    '''
    Given a XML node, extract all the text it contains.
    (does not recurse into children)
    '''
    text = [node.text or '']
    for cnode in node.getchildren():
        tail = cnode.tail
        if tail is not None:
            text.append(cnode.tail)

    text = '\n'.join(text).strip()
    return text

def get_xml(node):
    '''
    Convert the sub-tree from node downwards
    into string XML representation.
    '''
    return etree.tostring(node)

def create_doc(data):
    '''
    Construct XML tree datastructure from xml string representation.
    '''
    parser = etree.HTMLParser()
    doc = etree.parse(StringIO(data), parser)
    return doc

def get_content_nodes(doc):
    '''
    Identify nodes in the XML document that
    have substantial text.
    '''
    nodes = []

    for n in doc.xpath('//*'):
        tag = n.tag

        if tag.lower() in IGNORABLE_TAGS:
            continue

        text = get_text(n)
        if not text:
            continue

        if len(text) < MIN_TEXT_LEN:
            continue

        nodes.append(n)

    return nodes

def make_pruned_tree(content_nodes):
    '''
    Prune the whole XML tree by remnoving nodes
    other than content nodes and their ancestors.
    '''
    nodes = {}
    links = {}

    for node in content_nodes:

        nodes[id(node)] = node

        parent = node.getparent()
        if parent is not None:
            links[id(node)] = id(parent)

        for anode in node.iterancestors():
            _id = id(anode)
            parent = anode.getparent()
            if parent is not None:
                links[_id] = id(parent)

            if _id not in nodes:
                nodes[_id] = anode

    return nodes, links

def get_inlink_counts(links):
    '''
    Given the inter-node links, find out which
    node has maximum number of links coming into it.
    '''
    counts = {}

    for from_id, to_id in links.iteritems():
        count = counts.setdefault(to_id, 0)
        counts[to_id] = count + 1

    return counts

def get_most_linked_node(nodes, links):
    '''
    Identify the node which is most linked.
    (i,e) has most number of inlinks.
    '''
    inlink_counts = get_inlink_counts(links)

    mcount, mid = max([(count, _id) for _id, count in inlink_counts.iteritems()])
    node = nodes[mid]
    return node

def make_dot_graph(nodes, links, chosen_node, stream):
    '''
    Construct the dot format graph representation
    so that graphviz can render the tree for visualization.
    '''
    o = stream

    print >> o, "digraph G {"

    for _id, node in nodes.iteritems():

        tlen = len(get_text(node))
        tag = node.tag

        if tlen:
            text = '%s (%d)' % (tag, tlen)
        else:
            text = tag

        if _id == chosen_node:
            attrs = 'style=filled color=lightblue'
        else:
            attrs = ''

        print >> o, "%s [label=\"%s\" %s];" % (_id, text, attrs)

    for fid, tid in links.iteritems():
        print >> o, "%d -> %d;" % (fid, tid)

    print >> o, "}"

def main():
    # make doc from html data (cleans html)
    doc = create_doc(sys.stdin.read())

    # identify content nodes
    content_nodes = get_content_nodes(doc)

    # prune xml tree to remove irrelevant nodes
    nodes, links = make_pruned_tree(content_nodes)

    # get the most linked node from pruned tree
    mnode = get_most_linked_node(nodes, links)

    # make the dot graph
    make_dot_graph(nodes, links, id(mnode), sys.stdout)

if __name__ == '__main__':
    #Eg: wget "http://blog.prashanthellina.com" -O - | python thisscript.py | dot -Tpng -o /tmp/test.png ; eog /tmp/test.png
    main()
</pre>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=giNhCHZ-D2A:XDXDj4VqFmI:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=giNhCHZ-D2A:XDXDj4VqFmI:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=giNhCHZ-D2A:XDXDj4VqFmI:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=giNhCHZ-D2A:XDXDj4VqFmI:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=giNhCHZ-D2A:XDXDj4VqFmI:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=giNhCHZ-D2A:XDXDj4VqFmI:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=giNhCHZ-D2A:XDXDj4VqFmI:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=giNhCHZ-D2A:XDXDj4VqFmI:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/giNhCHZ-D2A" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/07/27/extracting-relevant-text-from-html-pages/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/07/27/extracting-relevant-text-from-html-pages/</feedburner:origLink></item>
		<item>
		<title>Clustering Data using Python</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/qRwHYHgMmeo/</link>
		<comments>http://blog.prashanthellina.com/2009/07/25/clustering-data-using-python/#comments</comments>
		<pubDate>Sat, 25 Jul 2009 04:06:43 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[text processing]]></category>
		<category><![CDATA[clustering]]></category>
		<category><![CDATA[script]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=93</guid>
		<description><![CDATA[As a part of a project I am working on, I had to cluster urls on a page. After some light googling I found, python-cluster. You can find below a simple python script to illustrate the usage of python-cluster library. Code import pprint from difflib import SequenceMatcher # http://python-cluster.sourceforge.net/ from cluster import HierarchicalClustering # input [...]]]></description>
			<content:encoded><![CDATA[<p>As a part of a project I am working on, I had to cluster urls on a page. After some light googling I found, <a href="http://python-cluster.sourceforge.net/">python-cluster</a>. You can find below a simple python script to illustrate the usage of python-cluster library.</p>
<p><strong>Code</strong></p>
<pre lang="python">
import pprint
from difflib import SequenceMatcher

# http://python-cluster.sourceforge.net/
from cluster import HierarchicalClustering

# input urls to be clustered
urls = [
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814385',
    '#articles',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814335',
    'http://yro.slashdot.org/~drDugan/',
    'http://web.sourceforge.com/privacy.php',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28815123',
    'http://slashdot.org//slashdot.org/~Darkness404',
    'http://slashdot.org//radio.slashdot.org',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;op=Reply&#038;threshold=1&#038;commentsort=0&#038;mode=thread&#038;pid=28814429',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;op=Reply&#038;threshold=1&#038;commentsort=0&#038;mode=thread&#038;pid=28814457',
    'http://slashdot.org//slashdot.org/article.pl?sid=09/07/24/1545238',
    'http://slashdot.org//slashdot.org/comments.pl?sid=09/07/24/1545238&#038;cid=28810581',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28815269',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814657',
    'http://web.sourceforge.com/terms.php'
    'http://slashdot.org//it.slashdot.org/search',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814581',
    'http://xkcd.com/612/',
    'http://web.sourceforge.com/advertising',
    'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;op=Reply&#038;threshold=1&#038;commentsort=0&#038;mode=thread&#038;pid=28814785',
]

# distance function compares two urls and finds the distance
# uses SequenceMatcher from python standard module difflib
def distance(url1, url2):
    ratio = SequenceMatcher(None, url1, url2).ratio()
    return 1.0 - ratio

# Perform clustering
hc = HierarchicalClustering(urls, distance)
clusters = hc.getlevel(0.2)

pprint.pprint(clusters)
</pre>
<p><br/></p>
<p><strong> Output </strong></p>
<pre lang="python">
[['#articles'],
 ['http://xkcd.com/612/'],
 ['http://web.sourceforge.com/privacy.php'],
 ['http://web.sourceforge.com/advertising'],
 ['http://web.sourceforge.com/terms.phphttp://slashdot.org//it.slashdot.org/search'],
 ['http://yro.slashdot.org/~drDugan/'],
 ['http://slashdot.org//slashdot.org/~Darkness404'],
 ['http://slashdot.org//radio.slashdot.org'],
 ['http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;op=Reply&#038;threshold=1&#038;commentsort=0&#038;mode=thread&#038;pid=28814785',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;op=Reply&#038;threshold=1&#038;commentsort=0&#038;mode=thread&#038;pid=28814429',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;op=Reply&#038;threshold=1&#038;commentsort=0&#038;mode=thread&#038;pid=28814457'],
 ['http://slashdot.org//slashdot.org/article.pl?sid=09/07/24/1545238',
  'http://slashdot.org//slashdot.org/comments.pl?sid=09/07/24/1545238&#038;cid=28810581',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28815123',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28815269',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814385',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814335',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814657',
  'http://slashdot.org//it.slashdot.org/comments.pl?sid=1314601&#038;cid=28814581']]
</pre>
<p><br/></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=qRwHYHgMmeo:Yhj_oUWRUuA:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=qRwHYHgMmeo:Yhj_oUWRUuA:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=qRwHYHgMmeo:Yhj_oUWRUuA:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=qRwHYHgMmeo:Yhj_oUWRUuA:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=qRwHYHgMmeo:Yhj_oUWRUuA:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=qRwHYHgMmeo:Yhj_oUWRUuA:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=qRwHYHgMmeo:Yhj_oUWRUuA:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=qRwHYHgMmeo:Yhj_oUWRUuA:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/qRwHYHgMmeo" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/07/25/clustering-data-using-python/feed/</wfw:commentRss>
		<slash:comments>5</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/07/25/clustering-data-using-python/</feedburner:origLink></item>
		<item>
		<title>XMonad: A Window Manager for “real” people :)</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/iB6fqUW9uog/</link>
		<comments>http://blog.prashanthellina.com/2009/04/26/xmonad/#comments</comments>
		<pubDate>Sun, 26 Apr 2009 16:16:27 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[gnome]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[gui]]></category>
		<category><![CDATA[tiling window manager]]></category>
		<category><![CDATA[ubuntu]]></category>
		<category><![CDATA[xmonad]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=79</guid>
		<description><![CDATA[I have been a happy Gnome user for many years now and only recently started thinking about switching to KDE 4.2 when Ubuntu 9.04 (Jaunty Jackalope) comes out. However, it so happened that I bought two new widescreen monitors and setup a dual-monitor environment. This is when I started realizing the Gnome was clumsy at [...]]]></description>
			<content:encoded><![CDATA[<p><img src="http://www.prashanthellina.com/images/xmonad_1.png" alt="XMonad in Gnome" align="left"/> I have been a happy Gnome user for many years now and only recently started thinking about switching to KDE 4.2 when Ubuntu 9.04 (Jaunty Jackalope) comes out. However, it so happened that I bought two new widescreen monitors and setup a dual-monitor environment. This is when I started realizing the Gnome was clumsy at best when it comes to managing windows across monitors.</p>
<p>The reason I bought multiple monitors is to maximize my work area so I do not have to keep switching between overlapping windows. Gnome it seems is ill-suited to effectively and effortlessly managing space.</p>
<p>When I maximize a window on a widescreen monitor, the window takes up the entire space on the screen. Although this is the expected behavior, I could not rest looking at the huge amount of space being wasted. What I would like better is having two windows occupying the height of the screen but sitting right next to each other (i.e. tiled). This way less space is wasted and you get to see both windows at the same time. I could do this in gnome, but with considerable effort, as closing a window and re-opening another requires another round of frantic window arrangement.</p>
<p>In comes a tiling window manager &#8230; Tiling window managers do not allow overlapping windows (except for dialogs). They are a good solution when you need to tile windows to occupy all available screen space. Also, most tiling window managers emphasize keyboard control over mouse control. If you are productivity freak you will love the keyboard shortcuts that do away with a painful context switch required in navigating with the mouse.</p>
<p>There are many Tiling Window Managers out there. Rat Poison, Stump WM, Ion, dwm, wmii and XMonad are a few. I chose XMonad because I was able to figure how to integrate XMonad into the Gnome desktop environment. Most of these window managers don&#8217;t play well with Gnome. I use a number of Gnome apps and I would hate to not be able to use them.</p>
<p>Okay, here is how you <strong>Install XMonad on Ubuntu to work within Gnome</strong> &#8230;</p>
<pre lang="bash">
mkdir ~/.xmonad
cd ~/.xmonad
vim xmonad.hs
</pre>
<p><br/></p>
<p>Then paste the following into the xmonad.hs file and save it.</p>
<pre lang="haskell">
import XMonad
import XMonad.Config.Gnome
import XMonad.ManageHook

myManageHook :: [ManageHook]
myManageHook =
    [ resource  =? "Do"   --> doIgnore ]

main = xmonad gnomeConfig
    { manageHook = manageHook gnomeConfig <+> composeAll myManageHook}
</pre>
<p><br/></p>
<pre lang="bash">
sudo apt-get install xmonad libghc6-xmonad-dev
</pre>
<p><br/><br />
Note that libghc6-xmonad-dev is a 200MB dependency!</p>
<pre lang="bash">
vim ~/.gnomerc
</pre>
<p><br/></p>
<p>Add this line .gnomerc file and save.</p>
<pre lang="bash">
export WINDOW_MANAGER=xmonad
</pre>
<p>The installation is done. Now to experience your new Window Manager, log out and then log back in. You will see your usual desktop with Gnome panels.</p>
<p>To learn more about XMonad and how to use it, follow these links:</p>
<p><a href="http://ubuntu-snippets.blogspot.com/2008/08/xmonad-tiling-window-manager.html">http://ubuntu-snippets.blogspot.com/2008/08/xmonad-tiling-window-manager.html</a><br />
<a href="http://tombuntu.com/index.php/2009/03/17/introduction-to-the-xmonad-tiling-window-manager/">http://tombuntu.com/index.php/2009/03/17/introduction-to-the-xmonad-tiling-window-manager/</a><br />
<a href="https://www.haskell.org/haskellwiki/Xmonad/Using_xmonad_in_Gnome/0.5">https://www.haskell.org/haskellwiki/Xmonad/Using_xmonad_in_Gnome/0.5</a></p>
<p><img src="http://www.prashanthellina.com/images/xmonad_2.jpg" alt="XMonad in Gnome" align="left"/></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=iB6fqUW9uog:8CLtCW29h7E:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=iB6fqUW9uog:8CLtCW29h7E:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=iB6fqUW9uog:8CLtCW29h7E:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=iB6fqUW9uog:8CLtCW29h7E:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=iB6fqUW9uog:8CLtCW29h7E:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=iB6fqUW9uog:8CLtCW29h7E:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=iB6fqUW9uog:8CLtCW29h7E:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=iB6fqUW9uog:8CLtCW29h7E:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/iB6fqUW9uog" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/04/26/xmonad/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/04/26/xmonad/</feedburner:origLink></item>
		<item>
		<title>Microsoft Surface: Some Videos</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/TynwURN2MPE/</link>
		<comments>http://blog.prashanthellina.com/2009/04/22/microsoft-surface-some-videos/#comments</comments>
		<pubDate>Wed, 22 Apr 2009 15:15:10 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[computer hardware]]></category>
		<category><![CDATA[microsoft]]></category>
		<category><![CDATA[surface]]></category>
		<category><![CDATA[touch]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=78</guid>
		<description><![CDATA[I had written earlier about my experience with Microsoft Surface. I&#8217;ve captured some videos of me using it. Here they are &#8230;]]></description>
			<content:encoded><![CDATA[<p>I had written earlier about <a href="/2008/12/30/microsoft-surface-unboxing/">my experience with Microsoft Surface</a>. I&#8217;ve captured some videos of me using it. Here they are &#8230;</p>
<p><object width="425" height="344"><param name="movie" value="http://www.youtube.com/v/_GVSZUHt1UI&#038;hl=en&#038;fs=1&#038;color1=0x2b405b&#038;color2=0x6b8ab6"></param><param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.youtube.com/v/_GVSZUHt1UI&#038;hl=en&#038;fs=1&#038;color1=0x2b405b&#038;color2=0x6b8ab6" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="344"></embed></object></p>
<p><br/></p>
<p><object width="425" height="344"><param name="movie" value="http://www.youtube.com/v/sDlQoSelwZo&#038;hl=en&#038;fs=1&#038;color1=0x2b405b&#038;color2=0x6b8ab6"></param><param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.youtube.com/v/sDlQoSelwZo&#038;hl=en&#038;fs=1&#038;color1=0x2b405b&#038;color2=0x6b8ab6" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="344"></embed></object></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=TynwURN2MPE:gnd8PjH0ioI:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=TynwURN2MPE:gnd8PjH0ioI:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=TynwURN2MPE:gnd8PjH0ioI:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=TynwURN2MPE:gnd8PjH0ioI:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=TynwURN2MPE:gnd8PjH0ioI:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=TynwURN2MPE:gnd8PjH0ioI:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=TynwURN2MPE:gnd8PjH0ioI:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=TynwURN2MPE:gnd8PjH0ioI:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/TynwURN2MPE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/04/22/microsoft-surface-some-videos/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/04/22/microsoft-surface-some-videos/</feedburner:origLink></item>
		<item>
		<title>My Dual Monitor Setup</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/nlH9vMj1YTc/</link>
		<comments>http://blog.prashanthellina.com/2009/04/21/my-dual-monitor-setup/#comments</comments>
		<pubDate>Tue, 21 Apr 2009 17:30:26 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[math]]></category>
		<category><![CDATA[dual monitor]]></category>
		<category><![CDATA[dual-head]]></category>
		<category><![CDATA[geforce]]></category>
		<category><![CDATA[nvidia]]></category>
		<category><![CDATA[samsung]]></category>
		<category><![CDATA[syncmaster]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=77</guid>
		<description><![CDATA[Greetings webizen, I tried hard to get back to my blogging schedule but my laziness got the better of me. I am back truly with a new batch of posts which I will publish over the next few days. Recently, I went to Veveo&#8217;s main office near Boston, USA and had the privilege of experiencing [...]]]></description>
			<content:encoded><![CDATA[<p>Greetings webizen, I tried hard to get back to my blogging schedule but my laziness got the better of me. I am back truly with a new batch of posts which I will publish over the next few days.</p>
<p>Recently, I went to Veveo&#8217;s main office near Boston, USA and had the privilege of experiencing a finger freezing winter! (Not to mention a three day power-cut which I spent under multiple layers of blankets).</p>
<p>A few weeks before I was going to return to India, I started shopping. One of things I bought was a Refurbished NVidia GeForce 9800 GTX+ by EVGA. It was the best deal I could find on NewEgg in terms of value for money. I bought it primarily to have a betterr experience when flying over Paris &#8230; in Google Earth.</p>
<p>This is the first graphics card I bought ever and was very excited when I received it. I was surprised to see how big graphics cards have become nowadays (compared to S3 cards of the decade gone past). While oggling at the card I noticed that it had two video outputs. I had heard of dual-head setups but always considered them a distant reality considering my cash-strappedness as a college student. This sparked off an urge to finally setup my own Dual monitor setup back in Bangalore.</p>
<p>I got back to India and promptly headed to SP Road in Bangalore. After some asking around, I figured that the best choice at the moment was two Samsung SyncMaster 2233SW&#8217;s each capable of 1920&#215;1080 (Yes, HD). I bought both for about 22,000 Rupees (about USD 420).</p>
<p>I set them up in Linux and upgraded to the latest (at the time) Nvidia drivers (180.x). I spent a few hours trying to hide a satisfied smile lest people thing I had gone nuts.</p>
<p>Dual monitors give you more screen space to work with. They will help you reduce Window-Switching and Workspace-Switching which does save time as I have observed. Now that I have experience the dual-head setup, I am <strong>never</strong> going back to using a single monitor. It is painful <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_wink.gif' alt=';)' class='wp-smiley' />  So much did I feel this that I bought a low-end Nvidia GeForce card (7200) with dual-head support and stuck it into my computer at work <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<p>If you do not already have a dual-head setup, definitely consider an upgrade. You are worth it:)</p>
<p><center><br />
<object width="400" height="300"><param name="flashvars" value="offsite=true&#038;lang=en-us&#038;page_show_url=%2Fphotos%2Fprashanthellina%2Fsets%2F72157617045645747%2Fshow%2F&#038;page_show_back_url=%2Fphotos%2Fprashanthellina%2Fsets%2F72157617045645747%2F&#038;set_id=72157617045645747&#038;jump_to="></param><param name="movie" value="http://www.flickr.com/apps/slideshow/show.swf?v=70933"></param><param name="allowFullScreen" value="true"></param><embed type="application/x-shockwave-flash" src="http://www.flickr.com/apps/slideshow/show.swf?v=70933" allowFullScreen="true" flashvars="offsite=true&#038;lang=en-us&#038;page_show_url=%2Fphotos%2Fprashanthellina%2Fsets%2F72157617045645747%2Fshow%2F&#038;page_show_back_url=%2Fphotos%2Fprashanthellina%2Fsets%2F72157617045645747%2F&#038;set_id=72157617045645747&#038;jump_to=" width="400" height="300"></embed></object></p>
<p><a href="http://www.flickr.com/photos/prashanthellina/sets/72157617045645747/">Go to flickr set for this</a><br />
</center></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=nlH9vMj1YTc:eLcJtZDuPzE:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=nlH9vMj1YTc:eLcJtZDuPzE:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=nlH9vMj1YTc:eLcJtZDuPzE:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=nlH9vMj1YTc:eLcJtZDuPzE:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=nlH9vMj1YTc:eLcJtZDuPzE:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=nlH9vMj1YTc:eLcJtZDuPzE:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=nlH9vMj1YTc:eLcJtZDuPzE:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=nlH9vMj1YTc:eLcJtZDuPzE:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/nlH9vMj1YTc" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2009/04/21/my-dual-monitor-setup/feed/</wfw:commentRss>
		<slash:comments>6</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2009/04/21/my-dual-monitor-setup/</feedburner:origLink></item>
		<item>
		<title>Microsoft Surface Unboxing</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/zRYa-fslO14/</link>
		<comments>http://blog.prashanthellina.com/2008/12/30/microsoft-surface-unboxing/#comments</comments>
		<pubDate>Mon, 29 Dec 2008 23:58:11 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[computer hardware]]></category>
		<category><![CDATA[text processing]]></category>
		<category><![CDATA[veveo]]></category>
		<category><![CDATA[computer]]></category>
		<category><![CDATA[gadget]]></category>
		<category><![CDATA[interface]]></category>
		<category><![CDATA[microsoft]]></category>
		<category><![CDATA[surface]]></category>
		<category><![CDATA[touch]]></category>
		<category><![CDATA[unboxing]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=74</guid>
		<description><![CDATA[Today, we received the shipment from Microsoft at Veveo. If you have not heard of Microsoft Surface before, It is a touch screen based computer embedded in a table. The surface of table is illuminated from underneath by a projector (rear-projection) and touch input is implemented by reflecting IR radiation off the fingers and then [...]]]></description>
			<content:encoded><![CDATA[<p>Today, we received the shipment from Microsoft at Veveo. If you have not heard of Microsoft Surface before, It is a touch screen based computer embedded in a table. The surface of table is illuminated from underneath by a projector (rear-projection) and touch input is implemented by reflecting IR radiation off the fingers and then being captured by five IR camera hidden inside the unit.</p>
<p>To learn more about Microsoft Surface head over to:</p>
<ul>
<li> <a href="http://www.microsoft.com/SURFACE/index.html">Microsoft&#8217;s page on Surface</a>
<li> <a href="http://en.wikipedia.org/wiki/Microsoft_Surface">Wikipedia article on Microsoft Surface</a>
<li> <a href="http://www.youtube.com/watch?v=rP5y7yp06n0">Watch a Youtube video on Microsoft Surface</a>
</ul>
<p><strong>Unboxing Pictures</strong><br />
<center><iframe align="center" src="http://www.flickr.com/slideShow/index.gne?user_id=prashanthellina&#038;set_id=72157611858989460" frameBorder="0" width="500" scrolling="no" height="500"></iframe><br />
<a href="http://www.flickr.com/photos/prashanthellina/sets/72157611858989460/">flickr set on microsoft surface unboxing</a><br />
</center></p>
<p><strong>Some observations:</strong></p>
<ul>
<li> It is very heavy!
<li> and expensive (around $15,000)
<li> The power socket is hidden underneath and is very difficult to access. The power button is equally well hidden and difficult to find.
<li> Installation was non-trivial. The touch input did not start working out of the box. We had to use the bundled mouse to initial installation steps.
<li> The &#8220;surface shell&#8221; with the ripples in the water is a great way to understand the potential of this device. It feels like you are touching water! and your brain expects that water will drip when you lift your fingers up. I think it is more realistic (compared to devices with smaller touch screens) because of the size of the display and the fact that it is aligned horizontally making it more natural.
<li> Since rear-projection is used for the display, the viewing angle is very wide (nearly 180 degrees)
<li> The matte finish on the touch surface as a good feel (almost like paper).
</ul>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=zRYa-fslO14:12S8vL1dq2s:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=zRYa-fslO14:12S8vL1dq2s:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=zRYa-fslO14:12S8vL1dq2s:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=zRYa-fslO14:12S8vL1dq2s:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=zRYa-fslO14:12S8vL1dq2s:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=zRYa-fslO14:12S8vL1dq2s:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=zRYa-fslO14:12S8vL1dq2s:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=zRYa-fslO14:12S8vL1dq2s:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/zRYa-fslO14" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/12/30/microsoft-surface-unboxing/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/12/30/microsoft-surface-unboxing/</feedburner:origLink></item>
		<item>
		<title>Determining the difficulty of Arithmetic Operations</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/rL4RQ58ArN8/</link>
		<comments>http://blog.prashanthellina.com/2008/07/27/determining-the-difficulty-of-arithmetic-operations/#comments</comments>
		<pubDate>Sun, 27 Jul 2008 16:51:22 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[math]]></category>
		<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[arithmetic]]></category>
		<category><![CDATA[arithmetic algorithms]]></category>
		<category><![CDATA[borrow]]></category>
		<category><![CDATA[carry]]></category>
		<category><![CDATA[long division]]></category>
		<category><![CDATA[problem difficulty]]></category>
		<category><![CDATA[problem generator]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=69</guid>
		<description><![CDATA[I am trying to write a program to test my arithmetic skills. The program should pose arithmetic problems involving the four basic operations &#8211; addition, subtraction, multiplication and division. When the testing session starts, the program should issue problems of less difficulty and the difficulty should be ramped up gradually. A score should be computed [...]]]></description>
			<content:encoded><![CDATA[<p><img src="http://www.prashanthellina.com/images/kid_math_problem.gif" align="left" alt="kid math problem" padding="5"/>I am trying to write a program to test my arithmetic skills. The program should pose arithmetic problems involving the four basic operations &#8211; addition, subtraction, multiplication and division. When the testing session starts, the program should issue problems of less difficulty and the difficulty should be ramped up gradually. A score should be computed based on number of questions and the difficulty of questions. The hope is that if I keep using this program for a little time every day, I&#8217;ll be able to improve my abysmal arithmetic performance <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<p>Note that you will need to have <a href="http://www.python.org">Python</a> installed to try out the script and the examples in this article. I believe that the code is simple enough to be understood with minimal or no understanding of the Python language. If any part of the article is not clear, feel free to point it out to me. Thank you.</p>
<h2>Estimating difficulty</h2>
<p>Let us take a look at the following problems.</p>
<p><strong>Problem 1</strong></p>
<pre lang="python">
5 +
3
</pre>
<p><br/></p>
<p><strong>Problem 2</strong></p>
<pre lang="python">
99999 +
12345
</pre>
<p><br/></p>
<p>Now, which problem is more difficult? It is quite obvious that Problem 2 is more difficult. But why? One answer would be &#8211; &#8220;because Problem 2 involves the addition of two large numbers&#8221;. Fine. Consider this now &#8230;</p>
<p><strong>Problem 3</strong></p>
<pre lang="python">
1000000 +
1000001
</pre>
<p><br/></p>
<p>Is Problem 3 more difficult than Problem 2 because the numbers undergoing addition are bigger? The intuitive answer is &#8220;No&#8221;. But why? Because the time it takes to compute the result is lesser. Why? Because the number of operations performed during the course of solving Problem 3 are lesser than those of Problem 2? No.</p>
<p>Problem 3 is easier than Problem 2 because the nature of the operations performed in Problem 3. The operations there are simpler.</p>
<pre lang="python">
0 + 1,
0 + 0,
1 + 1
</pre>
<p><br/></p>
<p>are arguably simpler operations than</p>
<pre lang="python">
9 + 5,
9 + 2,
4 + 9
</pre>
<p><br/></p>
<p>I&#8217;ve attempted to capture the basic operations we perform while doing arithmetic and assign a level of difficulty to each. I&#8217;ve then emulated the algorithms we follow to compute solutions to such problems. By doing the following, it becomes possible to estimate the &#8220;difficulty&#8221; of an arithmetic problem (involving the basic operations).</p>
<p><center><img src="http://www.prashanthellina.com/images/math_cartoon_yesterday_x.jpg" alt="math cartoon yesterday x"/></center></p>
<h2>Operation difficulties</h2>
<p><br/></p>
<h3>Addition</h3>
<pre lang="python">
addition_difficulties = {
    'digit_zero' : 1,   # any digit added to zero
    'even_even'  : 2,   # sum of even digits
    'odd_odd'    : 2,   # sum of odd digits
    'even_odd'   : 3,   # sum of even and odd digits
    'carry'      : 2    # difficulty of carry (for remembering and then adding)
}
</pre>
<p><br/></p>
<p>Here is the function which computes the addition difficulty. It is a recursive function that can compute the addition difficulty for n numbers. It computes the sum and difficulty of the first two numbers and then inserts the sum at the head of the list of n numbers in place of the two numbers just summed. It then calls itself with the modified list.</p>
<pre lang="python">
def compute_addition_difficulty(*numbers):
    '''
    Generates a difficulty number and sum for given
    list of numbers for the addition operation
    *args - integers
    returns: (sum, difficulty)

    >>> compute_addition_difficulty(0, 0)
    (0, 1)

    >>> compute_addition_difficulty(0, 1)
    (1, 1)

    >>> compute_addition_difficulty(1, 1)
    (2, 2)

    >>> compute_addition_difficulty(1, 2)
    (3, 3)

    >>> compute_addition_difficulty(2, 2)
    (4, 2)

    >>> compute_addition_difficulty(19, 9)
    (28, 5)

    >>> compute_addition_difficulty(99999, 12345)
    (112344, 22)
    '''

    numbers = list(numbers)
    if len(numbers) == 0: return 0, 0

    elif len(numbers) == 1: return numbers[0], 0

    num1, num2 = numbers[:2]
    num1 = str(num1)
    num2 = str(num2)

    max_length = max([len(num1), len(num2)])
    num1 = num1.rjust(max_length, '0')
    num2 = num2.rjust(max_length, '0')

    difficulty = 0
    carry = 0
    r = reversed
    ad = addition_difficulties
    sum = []

    for index, (d1, d2) in enumerate( izip( r(num1), r(num2) ) ):
        d1 = int(d1)
        d2 = int(d2)

        d1_is_even = is_even(d1)
        d2_is_even = is_even(d2)

        if not d1 or not d2: difficulty += ad['digit_zero']
        elif d1_is_even and d2_is_even: difficulty += ad['even_even']
        elif not d1_is_even and not d2_is_even: difficulty += ad['odd_odd']
        elif d1_is_even != d2_is_even: difficulty += ad['even_odd']

        dsum = d1 + d2 + carry

        if dsum > 9:
            carry = 1
            difficulty += ad['carry']
        else:
            carry = 0

        sum.append( str(dsum % 10) )

    if carry:
        sum.append( str(carry) )

    sum.reverse()
    sum = ''.join(sum)
    sum = int (sum)

    numbers = [sum] + numbers[2:]

    sum, sub_difficulty = compute_addition_difficulty(*numbers)

    difficulty += sub_difficulty

    return sum, difficulty
</pre>
<p><br/></p>
<h3>Subtraction</h3>
<pre lang="python">
subtraction_difficulties = {
    'digit_zero' : 1,    # difference of zero and any digit
    'same_digits': 1,    # difference of same values digits
    'even_even'  : 2,    # difference of even digits
    'odd_odd'    : 2,    # difference of odd digits
    'even_odd'   : 3,    # difference of even and odd digits
    'borrow'     : 2,    # doing a borrow
    'twodigit_digit' : 4 # difference of two-digit number and digit
}
</pre>
<p><br/></p>
<p>The subtraction function is also recursive and operates similar to addition in that respect. Before looking at the subtraction algorithm, take a look at how &#8220;borrowing&#8221; is implemented. Let us say we are trying to perform the following,</p>
<pre lang="python">
200005 -
     6
</pre>
<p><br/></p>
<p>After borrowing the number would be, 19999(1)5. We would then subtract 6 from 15 and proceed. The do_borrow() function captures this aspect of substraction.</p>
<pre lang="python">
def do_borrow(num, index):
    if index == len(num)-1: raise Exception('cannot perform borrow')

    num_borrows = 1
    next_digit = int(num[index+1])

    if next_digit > 0:
        num[index+1] = str(next_digit-1)

    else:
        num[index+1] = str(9)
        num_borrows += do_borrow(num, index+1)

    return num_borrows
</pre>
<p><br/></p>
<p>This implementation is a bit confusing so check out this example (do_borrow() applied to the problem mentioned above).</p>
<pre lang="python">
>>> x = list('200005')
>>> x.reverse()
>>> x
['5', '0', '0', '0', '0', '2']
>>> do_borrow(x, 1)
4
>>> ''.join(x)
'509991'
>>> x.reverse()
>>> x
['1', '9', '9', '9', '0', '5']
</pre>
<p><br/></p>
<p>Note that do_borrow returned 4, which indicates that the difficulty of performing this borrow operation is 4. Also note that do_borrow takes a list of digits in reversed order. It takes a list because the result is provided in-place (remember that Python strings are immutable). Now that you have seen how the borrowing works, all introduce the subtraction function.</p>
<pre lang="python">
def compute_subtraction_difficulty(*numbers):
    '''
    Generates a difficulty number and result for given
    list of numbers for the subtraction operation
    *args (tuple) - integers
    returns: (difference, difficulty)

    >>> compute_subtraction_difficulty(0,0)
    (0, 1)

    >>> compute_subtraction_difficulty(1,0)
    (1, 1)

    >>> compute_subtraction_difficulty(1,1)
    (0, 1)

    >>> compute_subtraction_difficulty(2,1)
    (1, 3)

    >>> compute_subtraction_difficulty(4,2)
    (2, 2)

    >>> compute_subtraction_difficulty(4,3)
    (1, 3)

    >>> compute_subtraction_difficulty(100,1)
    (99, 10)

    >>> compute_subtraction_difficulty(5000007,9)
    (4999998, 22)
    '''

    numbers = list(numbers)
    if len(numbers) == 0: return 0, 0
    elif len(numbers) == 1: return numbers[0], 0

    num1, num2 = numbers[:2]
    if num1 < num2: num1, num2 = num2, num1

    num1 = str(num1)
    num2 = str(num2)

    max_length = max([len(num1), len(num2)])
    num1 = list(num1.rjust(max_length, '0'))
    num2 = list(num2.rjust(max_length, '0'))

    difficulty = 0
    borrow = 0
    sd = subtraction_difficulties
    difference = []
    num1.reverse()
    num2.reverse()

    for index, (d1, d2) in enumerate( izip( num1, num2 ) ):
        d1 = int(d1)
        d2 = int(d2)

        d1_is_even = is_even(d1)
        d2_is_even = is_even(d2)

        if d1 > d2:
            if not d1 or not d2: difficulty += sd['digit_zero']
            elif d1_is_even and d2_is_even: difficulty += sd['even_even']
            elif not d1_is_even and not d1_is_even: difficulty += sd['odd_odd']
            elif d1_is_even != d2_is_even: difficulty += sd['even_odd']
            ddiff = d1 - d2

        elif d1 < d2:
            num_borrows = do_borrow(num1, index)
            difficulty += sd['borrow']*num_borrows
            ddiff = 10 + d1 - d2
            difficulty += sd['twodigit_digit']

        elif d1 == d2:
            difficulty += sd['same_digits']
            ddiff = 0

        difference.append( str(ddiff) )

    difference.reverse()
    difference = ''.join(difference)
    difference = int (difference)

    numbers = [difference] + numbers[2:]

    difference, sub_difficulty = compute_subtraction_difficulty(*numbers)

    difficulty += sub_difficulty

    return difference, difficulty
</pre>
<p><br/></p>
<h3>Multiplication</h3>
<pre lang="python">
multiplication_difficulties = {
    'carry'  : 1,   # carry operation, summation difficulty is seperate
    'offset' : 1,
}
</pre>
<p><br/></p>
<p>The basic operation of multiplying big numbers is one digit by one digit multiplication. I've tried to capture the difficulty of this operation as follows</p>
<pre lang="python">
def get_single_digit_multiplication_difficulty(d1, d2):
    '''
    Max value of multiplication of two digits is 81 (9*9).
    Difficulty of d1*d2 increases as the resulting value
    of the multiplication increases. thereby 9*9=81 is most difficult
    and 0*0 is least difficult.

    We will represent this difficult with value from 1-4 (inclusive)

    Note that the difficulty will be incremented by 1 for presence
    of odd digit in the operands (except for 1 and 5 because it is arguably
    easier to multiply)

    >>> get_single_digit_multiplication_difficulty(0, 1)
    (0, 1)
    >>> get_single_digit_multiplication_difficulty(1, 1)
    (1, 1)
    >>> get_single_digit_multiplication_difficulty(2, 5)
    (10, 1)
    >>> get_single_digit_multiplication_difficulty(2, 7)
    (14, 2)
    >>> get_single_digit_multiplication_difficulty(2, 6)
    (12, 1)
    >>> get_single_digit_multiplication_difficulty(8, 9)
    (72, 5)
    >>> get_single_digit_multiplication_difficulty(8, <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' />
    (64, 4)
    '''

    if not d1 or not d2: return 0, 1 

    res = d1 * d2
    difficulty = math.ceil((4/81.) * res)

    # odd numbers are harder to multiply except 1 and 5
    if d1 not in [1,5] and d2 not in [1,5] and (is_odd(d1) or is_odd(d2)):
        difficulty += 1

    return res, int(difficulty)
</pre>
<p><br/></p>
<p>At the next level, the operation would be multiplying a multi-digit number by a single digit, which I call simple multiplication. Over here I take into account the difficulty of carry. I hope the following code is self explanatory.</p>
<pre lang="python">
def compute_simple_multiplication_difficulty(num, digit):
    '''
    >>> compute_simple_multiplication_difficulty(1, 0)
    (0, 1)
    >>> compute_simple_multiplication_difficulty(1, 1)
    (1, 1)
    >>> compute_simple_multiplication_difficulty(2, 1)
    (2, 1)
    >>> compute_simple_multiplication_difficulty(10, 1)
    (10, 2)
    >>> compute_simple_multiplication_difficulty(15, 1)
    (15, 2)
    >>> compute_simple_multiplication_difficulty(15, 5)
    (75, 7)
    >>> compute_simple_multiplication_difficulty(999, 7)
    (6993, 25)
    '''
    num = str(num)

    result = []
    md = multiplication_difficulties
    carry = 0
    difficulty = 0

    for index, d in enumerate(reversed(num)):
        d = int(d)
        res, s_diff = get_single_digit_multiplication_difficulty(d, digit)
        difficulty += s_diff

        if (carry):
            res, carry_sum_difficulty = compute_addition_difficulty(res, carry)
            difficulty += carry_sum_difficulty + md['carry']

        carry = res/10
        result_digit = int(str(res)[-1:])
        result.append( str(result_digit) )

    if carry:
        result.append( str(carry) )

    result.reverse()
    result = ''.join(result)
    result = int(result)

    return result, difficulty
</pre>
<p><br/></p>
<p>Simple multiplication is a basic operation of complex multiplication (multi-digit multiplied by multi-digit). Let us see an example of "complex multiplication" which the multiplication code follows.</p>
<pre lang="python">
    345 x
    123
  -----
   1035
   690x  # result is "offset" or multiplied by 10
  345xx  # result if "offset" or multiplied by 100
  -----
  42435  # result of summation of above numbers
  -----
</pre>
<p><br/></p>
<p>The first part of the multiplication is to perform simple multiplications - 345x3, 345x2, 345x1 and to offset the numbers accordingly. The second part of the operation is to sum the numbers. I used the addition algorithm to achieve the second part. Check out the multiplication function.</p>
<pre lang="python">
def compute_multiplication_difficulty(*numbers):
    '''
    Generates a difficulty number and result for given
    list of numbers for the multiplication operation
    *args (tuple) - integers
    returns: (product, difficulty)

    >>> compute_multiplication_difficulty(0,0)
    (0, 1)
    >>> compute_multiplication_difficulty(1,0)
    (0, 1)
    >>> compute_multiplication_difficulty(1,1)
    (1, 1)
    >>> compute_multiplication_difficulty(2,1)
    (2, 1)
    >>> compute_multiplication_difficulty(5,1)
    (5, 1)
    >>> compute_multiplication_difficulty(5,2)
    (10, 1)
    >>> compute_multiplication_difficulty(17,2)
    (34, 7)
    >>> compute_multiplication_difficulty(17,29)
    (493, 20)
    >>> compute_multiplication_difficulty(17,29,3)
    (1479, 31)
    >>> compute_multiplication_difficulty(1776,29,3)
    (154512, 77)
    '''

    numbers = list(numbers)
    if len(numbers) == 0: return 1, 0
    elif len(numbers) == 1: return numbers[0], 0

    num1, num2 = numbers[:2]
    if num1 < num2: num1, num2 = num2, num1

    num2 = str(num2)

    difficulty = 0
    borrow = 0
    md = multiplication_difficulties
    m_numbers = []

    for index, d in enumerate(reversed(num2)):

        d = int(d)

        m_number, m_diff = compute_simple_multiplication_difficulty(num1, d)
        difficulty += m_diff

        if index:
            m_number = int(m_number * math.pow(10, index))
            difficulty += md['offset']

        m_numbers.append(m_number)

    m_numbers_sum, m_numbers_diff = compute_addition_difficulty(*m_numbers)
    difficulty += m_numbers_diff

    numbers = [m_numbers_sum] + numbers[2:]
    product, m_diff = compute_multiplication_difficulty(*numbers)

    difficulty += m_diff

    return product, difficulty
</pre>
<p><br/></p>
<h3>Division</h3>
<pre lang="python">
division_difficulties = {
    # long division
    'use_digit'       : 1,  # brinding down digit from dividend
    'multiple_lookup' : 1,  # looking up precomputed multiple of divisor
    'quotient_update' : 1,  # updating quotient with digit or period
}
</pre>
<p><br/></p>
<p>Implementing long division posed a problem because my understanding of the mechanics behind the long division algorithm was minimal if not inexistent. I spent some time trying to "reverse-engineer" it. I came across "Egyptian Division" which made things clearer. With a little help, I managed to implement the following division algorithm. Please let me know, if you come up with a better approach.</p>
<pre lang="python">
def compute_division_difficulty(dividend, divisor, precision):
    '''
    Generates a difficulty number, quotient and remainder
    for dividend / divisor
    @precision (int) -- max required number of digits after decimal point in quotient
    returns: (quotient, remainder, difficulty)

    >>> compute_division_difficulty(54, 5, 0)
    (10.0, 4, 6)
    >>> compute_division_difficulty(50, 5, 0)
    (10.0, 0, 6)
    >>> compute_division_difficulty(575, 6, 0)
    (95.0, 5, 60)
    >>> compute_division_difficulty(575, 6, 1)
    (95.829999999999998, 20, 112)
    >>> compute_division_difficulty(6, 9, 1)
    (0.66000000000000003, 60, 50)
    >>> compute_division_difficulty(410, 2, 1)
    (205.0, 0, 54)
    '''

    if not divisor: raise Exception('Division by Zero')
    if not dividend: return 0, 0, 1

    dividend = str(dividend)

    divisor  = str(divisor)

    difficulty = 0
    previous_multiples_difficulty = 0
    precision_reached = 0
    decimal_point_used = 0
    dd = division_difficulties
    num = []
    quotient = []

    num = dividend[0]
    difficulty += dd['use_digit']
    index = 0

    while 1:
        q_digit = int(num) / int(divisor)

        multiple, multiple_difficulty = compute_multiples_difficulty(int(divisor), q_digit)
        difficulty += int(math.fabs(multiple_difficulty - previous_multiples_difficulty))
        if previous_multiples_difficulty: difficulty += dd['multiple_lookup']
        previous_multiples_difficulty = max(multiple_difficulty, previous_multiples_difficulty)

        quotient.append( str(q_digit) )
        difficulty += dd['quotient_update']

        if decimal_point_used:
            precision_reached += 1

        num, sub_difficulty = compute_subtraction_difficulty(int(num), multiple)
        difficulty += sub_difficulty
        num = str(num)

        index += 1

        if index == len(dividend):
            if precision == 0: break
            quotient.append('.')
            difficulty += dd['quotient_update']
            decimal_point_used = 1

        if not decimal_point_used:
            num += dividend[index]
        else:
            num += '0'

        difficulty += dd['use_digit']

        if (len(num) == 1 and not int(num)) or precision_reached >= precision + 1:
            break

    remainder = int(num)
    quotient = float(''.join(quotient))

    return quotient, remainder, difficulty
</pre>
<p><br/></p>
<p>You must've noticed the usage of compute_multiples_difficulty(). In long division, at every step, you will try to find the largest multiple of the divisor less than or equal to the number in hand to proceed further. During the process of division, if you've computed the 5th (say) multiple of the divisor with difficulty N. The computation of 6th multiple at a later point in the division is difficulty(lookup of 5th multiple) + difficulty(6-5th multiple).</p>
<h2>Results</h2>
<pre lang="python">
#Problem 1
>>> compute_addition_difficulty(5, 3)
(8, 2)

#Problem 2
>>> compute_addition_difficulty(99999, 12345)
(112344, 22)

#Problem 3
>>> compute_addition_difficulty(1000000, 1000001)
(2000001, <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_cool.gif' alt='8)' class='wp-smiley' />
</pre>
<p><br/></p>
<p>Problem 1: <strong>2</strong><br />
Problem 2: <strong>22</strong><br />
Problem 3: <strong>8</strong></p>
<h2>The End</h2>
<p>I wish I could have written a more detailed explanation instead of sprinkling this article with code. Time however prevents me from doing so <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_sad.gif' alt=':(' class='wp-smiley' /> . It is some relief that Python is a <a href="/2008/07/11/even-a-python-can-be-abused/">wonderful language for readability</a> and the code samples above are pretty close to pseudo-code.</p>
<p>I will post back when I complete the program to generate arithmetic problems by order of difficulty. Until then, Adios amigos. Almost forgot! Here is the <a href="http://code.prashanthellina.com/code/atrainer.py">code</a>.</p>
<p><center><img src="http://www.prashanthellina.com/images/meaningoflife.gif" alt="meaning of life math cartoon"/></center></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=rL4RQ58ArN8:aFay9DWLh7c:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=rL4RQ58ArN8:aFay9DWLh7c:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=rL4RQ58ArN8:aFay9DWLh7c:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=rL4RQ58ArN8:aFay9DWLh7c:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=rL4RQ58ArN8:aFay9DWLh7c:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=rL4RQ58ArN8:aFay9DWLh7c:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=rL4RQ58ArN8:aFay9DWLh7c:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=rL4RQ58ArN8:aFay9DWLh7c:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/rL4RQ58ArN8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/07/27/determining-the-difficulty-of-arithmetic-operations/feed/</wfw:commentRss>
		<slash:comments>5</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/07/27/determining-the-difficulty-of-arithmetic-operations/</feedburner:origLink></item>
		<item>
		<title>Even a python can be abused</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/NTYrgkQsnaw/</link>
		<comments>http://blog.prashanthellina.com/2008/07/11/even-a-python-can-be-abused/#comments</comments>
		<pubDate>Fri, 11 Jul 2008 03:16:47 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[ubuntu]]></category>
		<category><![CDATA[veveo]]></category>
		<category><![CDATA[C]]></category>
		<category><![CDATA[coding]]></category>
		<category><![CDATA[java]]></category>
		<category><![CDATA[perl]]></category>
		<category><![CDATA[readability]]></category>
		<category><![CDATA[VB]]></category>
		<category><![CDATA[visual basic]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=67</guid>
		<description><![CDATA[The first programming language I coded in is QuickBasic. I loved the simplicity and especially the IDE. It made things simple for a starter. Later I discovered Visual Basic which extended the same simplicity and added the &#8220;Visual&#8221; element with a splendid editor for GUI. In between I did some projects using Java, C#, C, [...]]]></description>
			<content:encoded><![CDATA[<p><img src="http://www.prashanthellina.com/images/python_abuse.jpg" alt="python abuse" align="left"/>The first programming language I coded in is <strong>QuickBasic</strong>. I loved the simplicity and especially the IDE. It made things simple for a starter. Later I discovered <strong>Visual Basic</strong> which extended the same simplicity and added the &#8220;Visual&#8221; element with a splendid editor for GUI.</p>
<p>In between I did some projects using Java, C#, C, C++. None of these impressed me too much. I hated Java&#8217;s imposition of stiff rules and it&#8217;s dogged adherence to &#8220;everything in a class&#8221; attitude. C# was better. C++ just turned me off because of the monster it is. I did not like C at all because of its total lack of automated memory handling (like GC). I&#8217;ve been doing a lot of coding in C now-a-days as part of my job and I must admit that I like it a lot for its simplicity in primitives and promise of &#8220;closeness to hardware&#8221; and hence the predictability and performance.</p>
<p>I did a small part of my final year project using Python. However, for some unfathomable reason, Python did not impress me at all then. When I started working at Veveo I used Python for a project and got hooked. It&#8217;s simplicity and &#8220;readability&#8221; got me. The power of wielding this tool got me drunk <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<p>Python was designed from the beginning to be a &#8220;easy to read&#8221; language. Most, if not all, of the syntax is intuitive. The indentation adds to the readability aspect. The policy of &#8220;only one way to do a thing&#8221; does wonders for readability. Everyone does a certain thing only the &#8220;one&#8221; way. If you are wondering why the heck I am talking so much about readability, you should consider the fact that an average programmer spends <strong>most</strong> of his time &#8220;reading&#8221; code. You have to read your code after you&#8217;ve just written it. You&#8217;ve to read your code the next day when you resume work. You&#8217;ve to read your code the moment a bug is found. You have to read your code when someone asks you how some aspect of it works a couple of months later. You&#8217;ve have to read your code when making a teeny-weeny feature addition. I just cannot emphasize enough how much time is spent is just reading. There have been times when I would spend a whole day just reading code and finally making &#8220;a single line of code change&#8221; at the end of the day!</p>
<p>So there it is. Python makes it possible to write readable code and that does wonders to programmer productivity. Maintaining your code becomes easier. Understanding your collegues code becomes easier and most of all understanding code written by someone across the world becomes easier &#8211; so you can start reusing components more quickly and with more confidence than ever before.</p>
<p><strong><big>I simply love Python.</big></strong></p>
<p>You must be wondering where I am taking this discussion&#8230; because the title says something about a Python being abused. Who is abusing the Python anyway?</p>
<p>I&#8217;ve noticed how newbies code in Python and found it particulary intriguing. What is interesting here is that every person comes from a certain programming background and are used to doing things in a certain way. When they are thrown into a situation where they have to learn a new language and write some code in it, they invariably apply the vast experience they have in their language of choice <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<p>I&#8217;ve had the oppurtunity of observing people from Java, C, Perl, VB (yes, Perl) backgrounds writing code in Python. The Java guys stress test multiple inhertitance in Python and bring the much cherished &#8220;everything in a class&#8221; practice to the Python. The C guys who are more often than not obsessed with performace and optimization put their brains to work and implement a strcpy using a &#8220;for loop&#8221; and insist on doing a &#8220;shift&#8221; instead of &#8220;division/multiplication&#8221;. The Perl guys just don&#8217;t seem to like the alphanumerals. They craft Python code with ingenious application making it look very concise. The more characters in one line the better the code. The more non-alphanumerals the better coder you are. That&#8217;s the way of the &#8220;Perl&#8217;ies&#8221;. The VB guys languish for a while complaining constantly about the lack of a proper IDE and after trying out various Python editors, decide to call it quits and go home to comfortable VB. I know, I know &#8230;.. I was a VB guy too and I did search for IDE&#8217;s too &#8230; But then I found VIM and everything was good <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<p>I&#8217;ve had the good (snigger) fortune of maintaining some of these brilliant artifacts and had my share of nightmares and laugh-outs. I thought I had seen it all, until I saw something today. I told myself &#8212; &#8220;Never underestimate a brilliant C programmer who has found exec and eval in Python&#8221; &#8230; Yes, you can quote me on this <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<pre lang="Python">
    guido = "is speechless"
    larry = "went nuts"
    sergey = "has seen it all now"
    ...
    ...
    for idx in ['guido','larry','sergey']:
        idxv=eval(idx)
        if(not idxv and dd.has_key(idx)):
            idxv='%s="%s"' %(idx,dd[idx])
            exec(idxv)

    ...
    ...
</pre>
<p><br/><br />
<strong>No&#8230;.. I don&#8217;t think I&#8217;ve seen it all&#8230;. <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_wink.gif' alt=';)' class='wp-smiley' /> </strong></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=NTYrgkQsnaw:wYoUZ5jnVxs:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=NTYrgkQsnaw:wYoUZ5jnVxs:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=NTYrgkQsnaw:wYoUZ5jnVxs:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=NTYrgkQsnaw:wYoUZ5jnVxs:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=NTYrgkQsnaw:wYoUZ5jnVxs:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=NTYrgkQsnaw:wYoUZ5jnVxs:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=NTYrgkQsnaw:wYoUZ5jnVxs:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=NTYrgkQsnaw:wYoUZ5jnVxs:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/NTYrgkQsnaw" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/07/11/even-a-python-can-be-abused/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/07/11/even-a-python-can-be-abused/</feedburner:origLink></item>
		<item>
		<title>Look who’s downloaded Firefox 3!</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/6jezq9R3PtE/</link>
		<comments>http://blog.prashanthellina.com/2008/06/18/look-whos-downloaded-firefox-3/#comments</comments>
		<pubDate>Wed, 18 Jun 2008 13:41:08 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[web]]></category>
		<category><![CDATA[download day]]></category>
		<category><![CDATA[firefox]]></category>
		<category><![CDATA[firefox3]]></category>
		<category><![CDATA[guiness book]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=66</guid>
		<description><![CDATA[Firefox 3 has been getting rave reviews ever since it got in to beta. The blogosphere was abuzz with reports on how much more efficient and snappier FF3 is compared to its earlier incarnations and more so with respect to the competition (Opera, IE7, Safari). Features like &#8220;Places&#8221; (Bookmarks on steroids), Cairo for rendering and [...]]]></description>
			<content:encoded><![CDATA[<p>Firefox 3 has been getting rave reviews ever since it got in to beta. The blogosphere was abuzz with reports on how much more efficient and snappier FF3 is compared to its earlier incarnations and more so with respect to the competition (Opera, IE7, Safari).</p>
<p>Features like &#8220;Places&#8221; (Bookmarks on steroids), Cairo for rendering and OS specific widgets have made the best browser better. At the time of writing of this blog post 6 million plus downloads from around the world have already happened.</p>
<p>Everywhere I look at work, I see the &#8220;Download Day&#8221; certificate from Mozilla corp &#8230; I got one myself too <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<p>But the question is &#8230;.</p>
<blockquote><p>
<font size="+3"><br />
<strong><br />
Do you know who else got the certificate ???<br />
</strong><br />
</font>
</p></blockquote>
<p>Who else but Billy and Ballmy <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<p><img src="http://www.prashanthellina.com/images/billgates_ff3_cert.jpg" alt="Bill gates firefox 3 certificate"/></p>
<p><img src="http://www.prashanthellina.com/images/steveballmer_ff3_cert.jpg" alt="Steve Ballmer firefox 3 certificate"/></p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=6jezq9R3PtE:LKFGYd37y2s:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=6jezq9R3PtE:LKFGYd37y2s:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=6jezq9R3PtE:LKFGYd37y2s:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=6jezq9R3PtE:LKFGYd37y2s:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=6jezq9R3PtE:LKFGYd37y2s:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=6jezq9R3PtE:LKFGYd37y2s:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=6jezq9R3PtE:LKFGYd37y2s:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=6jezq9R3PtE:LKFGYd37y2s:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/6jezq9R3PtE" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/06/18/look-whos-downloaded-firefox-3/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/06/18/look-whos-downloaded-firefox-3/</feedburner:origLink></item>
		<item>
		<title>Nose – TDD – Python</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/w1gh0WIb6m8/</link>
		<comments>http://blog.prashanthellina.com/2008/05/22/nose-tdd-python/#comments</comments>
		<pubDate>Thu, 22 May 2008 15:02:08 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[linux]]></category>
		<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[nose]]></category>
		<category><![CDATA[tdd]]></category>
		<category><![CDATA[testing]]></category>
		<category><![CDATA[tools]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=65</guid>
		<description><![CDATA[What, why I&#8217;ve been reading up on TDD and it has struck me as particularly useful methodology to achieve &#8220;clean code that works&#8221;. TDD encourages writing unit tests to cover all the code (because by definition, you write a test before a line of code is written). Because all your code is covered you are [...]]]></description>
			<content:encoded><![CDATA[<h3>What, why</h3>
<p>I&#8217;ve been reading up on <a href="http://en.wikipedia.org/wiki/Test-driven_development">TDD</a> and it has struck me as particularly useful methodology to achieve &#8220;clean code that works&#8221;. TDD encourages writing unit tests to cover all the code (because by definition, you write a test before a line of code is written). Because all your code is covered you are freed from the fear of breakage due to change and can instantly be more confident and productive. Also, the test cases act as a specification in code &#8211; very useful.</p>
<p>Python has standard modules, <a href="http://docs.python.org/lib/module-unittest.html">unittest</a> and <a href="http://docs.python.org/lib/module-doctest.html">doctest</a> to help you write test cases. I simply love doctest. It alleviates much of the pain of writing a test case (setup and all) besides acting as &#8220;executable documentation&#8221;. The unittest module has a Java legacy and is not to my taste. Also, I wanted to find a solution that would help in automated test enumeration (discovery) in my source directories without having to write any &#8220;infrastructure&#8221; code. One more thing I was looking for was a way to run both unit tests and doc tests together.</p>
<p>After a bit of searching, I found &#8220;<a href="http://somethingaboutorange.com/mrl/projects/nose/">Nose</a>&#8220;. Nose is a clone of &#8220;<a href="http://codespeak.net/py/dist/test.html">py.test</a>&#8221; which I liked better than the original (subjectively). To get a feel of &#8220;Nose&#8221;, I set up some python test files.</p>
<p>The following is the directory structure and the contents of the files. I&#8217;ve put in both unit tests and doc tests in the files to see how &#8220;Nose&#8221; handles them. Also, the tests are spread across directories. Note that I had to put an &#8220;__init__.py&#8221; to allow &#8220;Nose&#8221; to import tests in a subdirectory.</p>
<h3>The setup</h3>
<p><strong>The directory structure</strong></p>
<pre lang="bash">
prashanth@prashanth-desktop:~/tmp$ tree
.
|-- bingo.py
|-- somedir
|   |-- __init__.py
|   `-- test_another.py
`-- test_prashanth.py

1 directory, 4 files
</pre>
<p><br/></p>
<p><strong>bingo.py</strong></p>
<pre lang="python">
def boing(a, b):
    '''
    >>> boing(10, 20)
    30
    '''
    return a+b

def boing1(a, b):
    '''
    >>> boing1(10, 20)
    40
    '''
    return a+b
</pre>
<p><br/></p>
<p><strong>test_prashanth.py</strong></p>
<pre lang="python">
def test_a():
    assert 1

def test_b():
    print "hello"
    assert 0
</pre>
<p><br/></p>
<p><strong>somedir/test_another.py</strong></p>
<pre lang="python">
def test_bingo():
    raise Exception('hgello')
</pre>
<p><br/></p>
<h3>Installing &#8220;Nose&#8221;</h3>
<pre lang="bash">
sudo easy_install nose
</pre>
<p><br/></p>
<p>If you don&#8217;t have easy_install, head over <a href="http://somethingaboutorange.com/mrl/projects/nose/">here</a> to get information on installation.</p>
<h3>Running the tests</h3>
<p>Now that &#8220;Nose&#8221; is installed, let us run the tests,</p>
<pre lang="bash">
nosetests --with-doctest
</pre>
<p><br/></p>
<p>The output is</p>
<pre lang="bash">
..E.F
======================================================================
ERROR: somedir.test_another.test_bingo
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/usr/lib/python2.5/site-packages/nose-0.10.2-py2.5.egg/nose/case.py", line 182, in runTest
    self.test(*self.arg)
  File "/home/prashanth/tmp/somedir/test_another.py", line 2, in test_bingo
    raise Exception('hgello')
Exception: hgello

======================================================================
FAIL: test_prashanth.test_b
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/usr/lib/python2.5/site-packages/nose-0.10.2-py2.5.egg/nose/case.py", line 182, in runTest
    self.test(*self.arg)
  File "/home/prashanth/tmp/test_prashanth.py", line 7, in test_b
    assert 0
AssertionError:
-------------------- >> begin captured stdout << ---------------------
hello

--------------------- >> end captured stdout << ----------------------

----------------------------------------------------------------------
Ran 5 tests in 0.057s

FAILED (errors=1, failures=1)
</pre>
<p><br/></p>
<p>The first line in the output is the "test progress" indication (..E.F) . When a test succeeds, a '.' is written. When a test fails, an 'F' is written. When a test throws an Exception, an 'E' is written. Very useful to get a sense of progress as a huge test suite being executed.</p>
<p>"Nose" captures the stdout and stderr when a test case fails to help you debug the issue. To <a href="http://ivory.idyll.org/articles/nose-intro.html">learn more about using "Nose" go here</a>.</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=w1gh0WIb6m8:kOABjSedVzk:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=w1gh0WIb6m8:kOABjSedVzk:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=w1gh0WIb6m8:kOABjSedVzk:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=w1gh0WIb6m8:kOABjSedVzk:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=w1gh0WIb6m8:kOABjSedVzk:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=w1gh0WIb6m8:kOABjSedVzk:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=w1gh0WIb6m8:kOABjSedVzk:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=w1gh0WIb6m8:kOABjSedVzk:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/w1gh0WIb6m8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/05/22/nose-tdd-python/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/05/22/nose-tdd-python/</feedburner:origLink></item>
		<item>
		<title>Project Gutenberg Ngram data: English only</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/dh5eJIb4xA0/</link>
		<comments>http://blog.prashanthellina.com/2008/05/13/project-gutenberg-ngram-data-english-only/#comments</comments>
		<pubDate>Tue, 13 May 2008 16:36:05 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[data mining]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[text processing]]></category>
		<category><![CDATA[ngram]]></category>
		<category><![CDATA[project gutenberg]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=64</guid>
		<description><![CDATA[In my earlier post, I&#8217;d posted links to the Project Gutenberg Ngram data I had computed for e-books of all languages. If you are interested in only the English data, get these files instead. These two files are splits of a compressed file which contains all of the Project Gutenberg English e-books downloaded about a [...]]]></description>
			<content:encoded><![CDATA[<p>
In my <a href="/2008/05/04/n-gram-data-from-project-gutenberg/">earlier post</a>, I&#8217;d posted links to the <a href="http://www.gutenberg.org">Project Gutenberg</a> Ngram data I had computed for e-books of all languages. If you are interested in only the English data, get these files instead.
</p>
<p>
These two files are splits of a compressed file which contains all of the Project Gutenberg English e-books downloaded about a week before the date of this post.<br/><br />
<a href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_en_files.tar.bz2.0">gutenberg_en_files.tar.bz2.0</a> (<strong>2.0GB</strong>) <br/></p>
<p><a href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_en_files.tar.bz2.1">gutenberg_en_files.tar.bz2.1</a> (<strong>1.4GB</strong>) <br/></p>
<p>Unigrams along with frequency count from the text data above<br/><br />
<a href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_en_unigrams.tar.gz">gutenberg_en_unigrams.tar.gz</a> (<strong>7.4MB</strong>) <br/></p>
<p>Bi-grams and Tri-grams along with frequency count from the text data above<br/><br />
<a href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_en_bi_tri_grams.tar.gz">gutenberg_en_bi_tri_grams.tar.gz</a> (<strong>493MB</strong>) <br/>
</p>
<p>I had to split the files because my webserver has a limitation in serving out files larger than 2GB. After downloading the files, do this</p>
<pre lang="BASH">
mv gutenberg_en_files.tar.bz2.0 gutenberg_en_files.tar.bz2
cat gutenberg_en_files.tar.bz2.1 >> gutenberg_en_files.tar.bz2
rm gutenberg_en_files.tar.bz2.1
</pre>
<p><br/></p>
<p>If you find the data useful, I&#8217;d be delighted to hear the context in which you made use of it.</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=dh5eJIb4xA0:PQX4gVFZeN4:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=dh5eJIb4xA0:PQX4gVFZeN4:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=dh5eJIb4xA0:PQX4gVFZeN4:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=dh5eJIb4xA0:PQX4gVFZeN4:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=dh5eJIb4xA0:PQX4gVFZeN4:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=dh5eJIb4xA0:PQX4gVFZeN4:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=dh5eJIb4xA0:PQX4gVFZeN4:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=dh5eJIb4xA0:PQX4gVFZeN4:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/dh5eJIb4xA0" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/05/13/project-gutenberg-ngram-data-english-only/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/05/13/project-gutenberg-ngram-data-english-only/</feedburner:origLink></item>
		<item>
		<title>N-gram data from Project Gutenberg</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/JpXduZCi1fk/</link>
		<comments>http://blog.prashanthellina.com/2008/05/04/n-gram-data-from-project-gutenberg/#comments</comments>
		<pubDate>Sun, 04 May 2008 16:40:14 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[data mining]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[text processing]]></category>
		<category><![CDATA[gutenberg]]></category>
		<category><![CDATA[ngrams]]></category>
		<category><![CDATA[project gutenberg]]></category>
		<category><![CDATA[text parsing]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=63</guid>
		<description><![CDATA[I&#8217;ve been working on Wordza.com for which I needed Ngram data from a sufficiently large corpus. Initially, I thought of using Wikipedia data which I already have on my disk, but decided on using Project Gutenberg data as it is more representative of the general usage of English language. Get Project Gutenberg Ngram data The [...]]]></description>
			<content:encoded><![CDATA[<p>
  I&#8217;ve been working on <A href="http://www.wordza.com" name="Wordza">Wordza.com</A> for which I needed Ngram data from a sufficiently large corpus. Initially,  I thought of using Wikipedia data which I already <A href="/2007/12/21/topic-extraction-using-wikipedia-data/">have on my disk</A>, but decided on using <A href="http://www.gutenberg.org">Project Gutenberg</A> data as it is more representative of the general usage of English language.
</p>
<h2>Get Project Gutenberg Ngram data</h2>
<p>
The Ngram data contains bi-grams and tri-grams for now. I plan to generate uni-grams soon. I&#8217;ve made the data available here so you can download and use it! This data contains all of the e-books hosted by Project Gutenberg (which means the data contains English, French, German and other languages). If you want an English only dataset, check back in a week or two. I am in the process of generating the same.
</p>
<p>
  The Ngram data containing bi-grams and tri-grams. Each line is prepended with the occurence count.<br/><br />
<A href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_ngrams.tar.bz2">gutenberg_ngrams.tar.bz2</A> (<strong>624 MB</strong>)<br/></p>
<p><br/></p>
<p>This is the compressed tarball of all the txt files in Project Gutenberg (as of a week before this blog post). Note that you don&#8217;t need this file unless you want to generate the Ngrams yourself using the scripts provided below.<br/><br />
<A href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_files.tar.bz2.0">gutenberg_files.tar.bz2.0</A>,<br />
<A href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_files.tar.bz2.1">gutenberg_files.tar.bz2.1</A>,<br />
<A href="http://www.prashanthellina.com/docs/gutenberg_data/gutenberg_files.tar.bz2.2">gutenberg_files.tar.bz2.2</A> (<strong>5.3 GB</strong>)<br/></p>
<p>My webserver (Apache) has a problem serving out files bigger than 2GB, so I had to split the file up. After you download the splits, you have to join them like this.</p>
<pre lang="BASH">
mv gutenberg_files.tar.bz2.0 gutenberg_files.tar.bz2
cat gutenberg_files.tar.bz2.1 >> gutenberg_files.tar.bz2
cat gutenberg_files.tar.bz2.2 >> gutenberg_files.tar.bz2
</pre>
<p><br/></p>
<p>To decompress the files, you will need bunzip2 on *nix/Cygwin. On Windows, use 7zip.
</p>
<h2>Generate the data yourself</h2>
<p>
In case you want to generate the Ngrams yourself by processing the Project Gutenberg data files, follow these instructions. You will have to get the Project gutenberg data files. Use the following command to get all the English language files in txt format.</p>
<pre lang="bash">
mkdir gutenberg
cd gutenberg
wget -w 2 -m "http://www.gutenberg.org/robot/harvest?filetypes[]=txt&#038;langs[]=en"
</pre>
<p><br/></p>
<p>The txt files are compressed and stored in files ending with .zip extension. These zip files are spread across multiple directories. The following command will move the zip files into the &#8220;gutenberg&#8221; directory you created in the above step.</p>
<pre lang="BASH">
for i in `find . -name "*.zip"`; do mv $i . ; done;
</pre>
<p><br/></p>
<p>Now that all the zip files are in the same directory, unzip the zip files.Some zip files may contain files other than .txt&#8217;s. The following command extracts only .txt&#8217;s in the zip files.</p>
<pre lang="BASH">
cd ..
mkdir gutenberg_txt
for i in `find gutenberg -name "*.zip"`; do unzip $i \*.txt -d gutenberg_txt/ ; done;
cd gutenberg_txt
for i in `find . -name "*.txt"`; do mv $i . ; done;
cd ..
</pre>
<p><br/></p>
<p>The gutenberg txt files have gutenberg headers and footers which should be removed lest they skew the frequency of Ngrams. The script &#8220;remove_gutenberg_text.py&#8221; does exactly this. The &#8220;generate_ngrams.py&#8221; script creates uni, bi and tri-grams of whatever text is piped into it. The following command pipes all the txt files through both the scripts to create the ngrams file.</p>
<pre lang="BASH">
for i in `find gutenberg_txt/ -name "*.txt"`; \
do cat $i | python remove_gutenberg_text.py | \
grep -i -v "project gutenberg" |\
 python generate_ngrams.py >> gutenberg_ngrams; done;
</pre>
<p><br/></p>
<p>Now you have to count the number of times an ngram occurs. The following sequence of commands process the ngrams file generated above and produce a file with the frequency counts of the ngrams. Note that the &#8220;512K&#8221; option to sort is because I had to run these scripts on my host which kills processes that take too much memory. If you have a machine with a lot of memory, sorting can be significantly faster if you use a higher value, say &#8220;1G&#8221;.</p>
<pre lang="BASH">
sort -S 512K -T tmp_sort/ gutenberg_ngrams > gutenberg_ngrams.sorted
uniq -c gutenberg_ngrams.sorted > gutenberg_ngrams.counted
sort -S 512K -T tmp_sort/ gutenberg_ngrams.counted > gutenberg_ngrams.counted.sorted
</pre>
<p><br/>
</p>
<h3>Gutenberg data processing scripts</h3>
<ul>
<li><A href="http://code.prashanthellina.com/code/remove_gutenberg_text.py">remove_gutenberg_text.py</A> &#8212; removes Project Gutenberg header and footer from txt files</li>
<li><A href="http://code.prashanthellina.com/code/generate_ngrams.py">generate_ngrams.py</A> &#8212; generate uni, bi and tri-grams for any text</li>
</ul>
<h2>Do get back</h2>
<p>If you use this data, I would really appreciate if you get back with details about how you used it in the context of your project</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=JpXduZCi1fk:P_rNrdn_ArM:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=JpXduZCi1fk:P_rNrdn_ArM:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=JpXduZCi1fk:P_rNrdn_ArM:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=JpXduZCi1fk:P_rNrdn_ArM:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=JpXduZCi1fk:P_rNrdn_ArM:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=JpXduZCi1fk:P_rNrdn_ArM:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=JpXduZCi1fk:P_rNrdn_ArM:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=JpXduZCi1fk:P_rNrdn_ArM:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/JpXduZCi1fk" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/05/04/n-gram-data-from-project-gutenberg/feed/</wfw:commentRss>
		<slash:comments>5</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/05/04/n-gram-data-from-project-gutenberg/</feedburner:origLink></item>
		<item>
		<title>Wordza – A Smart Word Quizzer</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/2xBxtHdfmFw/</link>
		<comments>http://blog.prashanthellina.com/2008/04/27/wordza-a-smart-word-quizzer/#comments</comments>
		<pubDate>Sun, 27 Apr 2008 08:36:49 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[web]]></category>
		<category><![CDATA[quiz]]></category>
		<category><![CDATA[vocabulary]]></category>
		<category><![CDATA[word]]></category>
		<category><![CDATA[wordza]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=62</guid>
		<description><![CDATA[I&#8217;d thought of making a word quizzer as a web application to improve my vocabulary when I took the GRE test a couple of years back. I&#8217;d written one in Visual Basic 6 when I wrote SAT , but desktop applications are boring! I got inspired to bring my long standing idea to fruition and [...]]]></description>
			<content:encoded><![CDATA[<p>I&#8217;d thought of making a word quizzer as a web application to improve my vocabulary when I took the GRE test a couple of years back. I&#8217;d written one in Visual Basic 6 when I wrote SAT <img src='http://blog.prashanthellina.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> , but desktop applications are boring!</p>
<p>I got inspired to bring my long standing idea to fruition and the outcome is <a href="http://www.wordza.com">Wordza</a>.</p>
<p><a href="http://www.wordza.com"><img src="http://www.prashanthellina.com/images/wordza_logo.gif" alt="wordza"/></a></p>
<blockquote><p>
Wordza is a smart word quizzing service designed to help you rapidly learn new words. It adapts to your level and continually challenges you to get better. Preparing for GRE/SAT/CAT? Use Wordza 15 minutes a day.</p>
<p>Questions have difficulty ratings ranging between 1 and 100. The ratings are derived from user activity. A question is easy if a lot of people get it right and difficult when very few do!</p>
<p>You start at difficulty level 50. If you get three questions right in a row, you drive up the difficulty level a notch. On the other hand, answering a question wrong will reduce the difficulty level by one.
</p></blockquote>
<p><img src="http://www.prashanthellina.com/images/wordza_screenshot.gif" alt="wordza screenshot"/></p>
<p>If you are interested in the implementation &#8230; I used Python, MySQL behind Apache to get this up. When I started making it, I wanted to keep it simple and make it work fast. I hope I&#8217;ve met both these requirements. Do give it a try and let me know what you think.</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=2xBxtHdfmFw:C9eMsFtk7bY:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=2xBxtHdfmFw:C9eMsFtk7bY:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=2xBxtHdfmFw:C9eMsFtk7bY:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=2xBxtHdfmFw:C9eMsFtk7bY:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=2xBxtHdfmFw:C9eMsFtk7bY:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=2xBxtHdfmFw:C9eMsFtk7bY:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=2xBxtHdfmFw:C9eMsFtk7bY:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=2xBxtHdfmFw:C9eMsFtk7bY:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/2xBxtHdfmFw" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/04/27/wordza-a-smart-word-quizzer/feed/</wfw:commentRss>
		<slash:comments>7</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/04/27/wordza-a-smart-word-quizzer/</feedburner:origLink></item>
		<item>
		<title>Alexa rank: A script to get the rank for any site</title>
		<link>http://feedproxy.google.com/~r/prashanthellina/~3/tNZOfg8P3e8/</link>
		<comments>http://blog.prashanthellina.com/2008/04/22/alexa-rank-a-script-to-get-the-rank-for-any-site/#comments</comments>
		<pubDate>Tue, 22 Apr 2008 17:49:55 +0000</pubDate>
		<dc:creator>prashanthellina</dc:creator>
				<category><![CDATA[programming]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[web]]></category>
		<category><![CDATA[alexa]]></category>
		<category><![CDATA[alexa rank]]></category>
		<category><![CDATA[programmatically]]></category>
		<category><![CDATA[script]]></category>
		<category><![CDATA[xml]]></category>

		<guid isPermaLink="false">http://blog.prashanthellina.com/?p=61</guid>
		<description><![CDATA[What is Alexa rank? Alexa collects statistics about visits by internet users to websites through the Alexa Toolbar. Based on the collected data, Alexa computes site ranking. By examining the Alexa rank of a site, you can get a rough idea of how popular the site is. Many argue that Alexa rank is misleading but [...]]]></description>
			<content:encoded><![CDATA[<h2>What is Alexa rank?</h2>
<p><a href="http://www.alexa.com">Alexa</a> collects statistics about visits by internet users to websites through the <a href="http://download.alexa.com/">Alexa Toolbar</a>. Based on the collected data, Alexa computes site ranking. By examining the Alexa rank of a site, you can get a rough idea of how popular the site is. Many argue that <a href="http://www.mattcutts.com/blog/thoughts-on-alexa-data/">Alexa rank is misleading</a> but it has its uses.</p>
<h2>The Alexa rank script</h2>
<p>You can find out the Alexa rank for any site by using this page. However, if you want to programatically get the Alexa rank, you can do it using <a href="http://code.prashanthellina.com/code/get_alexa_rank.py">this script</a>.</p>
<p><strong><a href="http://code.prashanthellina.com/code/get_alexa_rank.py">Get the Alexa rank script</a></strong></p>
<h2>Using the script</h2>
<p>After downloading the script, give it execute permission by doing this. You will need to have Python installed.</p>
<pre lang="BASH">
chmod +x get_alexa_rank.py
</pre>
<p><br/></p>
<pre lang="BASH">
$ ./get_alexa_rank.py google.com
popularity rank = 2
reach_rank = 1

$ ./get_alexa_rank.py wikipedia.com
popularity rank = 7
reach_rank = 6

$ ./get_alexa_rank.py blog.prashanthellina.com
popularity rank = 557287
reach_rank = 482289

$ ./get_alexa_rank.py www.inexistantsite.com
popularity rank = -1
reach_rank = -1
</pre>
<p><br/></p>
<h2>How does the script work?</h2>
<p>If you make a http request for the following url,</p>
<blockquote><p>http://data.alexa.com/data?cli=10&#038;dat=snbamz&#038;url=$URL</p></blockquote>
<p>after replacing $URL with the url of the site for which you need the Alexa rank, the following XML response is sent out. I tried with &#8220;http://blog.prashanthellina.com&#8221;.</p>
<pre lang="xml">
<ALEXA VER="0.9" URL="blog.prashanthellina.com/" HOME="0" AID="=">
<RLS PREFIX="http://" more="0">
</RLS>
	<SD TITLE="A" FLAGS="">
<POPULARITY URL="prashanthellina.com/" TEXT="557287"/>
<RANK DELTA="+70225"/>
<REACH RANK="482289"/>
</SD>
</ALEXA>
</pre>
<p><br/></p>
<p>The script parses the XML response and extracts POPULARITY/@TEXT and REACH/@RANK.</p>
<p>If you are looking for a PHP script for doing the same, <a href="http://googlepagerankin.wordpress.com/2008/02/01/alexa-rank-checking-scriptalexa-rank-checker-script/">check this out</a>.</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~ff/prashanthellina?a=tNZOfg8P3e8:ONrDgyM3QWc:yIl2AUoC8zA"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=yIl2AUoC8zA" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=tNZOfg8P3e8:ONrDgyM3QWc:D7DqB2pKExk"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=tNZOfg8P3e8:ONrDgyM3QWc:D7DqB2pKExk" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=tNZOfg8P3e8:ONrDgyM3QWc:V_sGLiPBpWU"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=tNZOfg8P3e8:ONrDgyM3QWc:V_sGLiPBpWU" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=tNZOfg8P3e8:ONrDgyM3QWc:F7zBnMyn0Lo"><img src="http://feeds.feedburner.com/~ff/prashanthellina?i=tNZOfg8P3e8:ONrDgyM3QWc:F7zBnMyn0Lo" border="0"></img></a> <a href="http://feeds.feedburner.com/~ff/prashanthellina?a=tNZOfg8P3e8:ONrDgyM3QWc:dnMXMwOfBR0"><img src="http://feeds.feedburner.com/~ff/prashanthellina?d=dnMXMwOfBR0" border="0"></img></a>
</div><img src="http://feeds.feedburner.com/~r/prashanthellina/~4/tNZOfg8P3e8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://blog.prashanthellina.com/2008/04/22/alexa-rank-a-script-to-get-the-rank-for-any-site/feed/</wfw:commentRss>
		<slash:comments>9</slash:comments>
		<feedburner:origLink>http://blog.prashanthellina.com/2008/04/22/alexa-rank-a-script-to-get-the-rank-for-any-site/</feedburner:origLink></item>
	</channel>
</rss>

