<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10titles.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemtitles.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;A04CR3k9eSp7ImA9WhRXFUs.&quot;"><id>tag:blogger.com,1999:blog-9749960</id><updated>2011-12-22T16:06:06.761Z</updated><title>Research on Search</title><subtitle type="html">My study of machine learning, data mining, computational linguistics and information retrieval, towards the grand goal of developing the "perfect search engine" that "understands exactly what you mean and gives you back exactly what you want" (Larry Page).</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://researchonsearch.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>133</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/researchonsearch" /><feedburner:info uri="researchonsearch" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><feedburner:feedFlare href="http://add.my.yahoo.com/rss?url=http%3A%2F%2Ffeeds.feedburner.com%2Fresearchonsearch" src="http://us.i1.yimg.com/us.yimg.com/i/us/my/addtomyyahoo4.gif">Subscribe with My Yahoo!</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsgator.com/ngs/subscriber/subext.aspx?url=http%3A%2F%2Ffeeds.feedburner.com%2Fresearchonsearch" src="http://www.newsgator.com/images/ngsub1.gif">Subscribe with NewsGator</feedburner:feedFlare><feedburner:feedFlare href="http://feeds.my.aol.com/add.jsp?url=http%3A%2F%2Ffeeds.feedburner.com%2Fresearchonsearch" src="http://o.aolcdn.com/favorites.my.aol.com/webmaster/ffclient/webroot/locale/en-US/images/myAOLButtonSmall.gif">Subscribe with My AOL</feedburner:feedFlare><feedburner:feedFlare href="http://www.bloglines.com/sub/http://feeds.feedburner.com/researchonsearch" src="http://www.bloglines.com/images/sub_modern11.gif">Subscribe with Bloglines</feedburner:feedFlare><feedburner:feedFlare href="http://www.netvibes.com/subscribe.php?url=http%3A%2F%2Ffeeds.feedburner.com%2Fresearchonsearch" src="http://www.netvibes.com/img/add2netvibes.gif">Subscribe with Netvibes</feedburner:feedFlare><feedburner:feedFlare href="http://fusion.google.com/add?feedurl=http%3A%2F%2Ffeeds.feedburner.com%2Fresearchonsearch" src="http://buttons.googlesyndication.com/fusion/add.gif">Subscribe with Google</feedburner:feedFlare><feedburner:feedFlare href="http://www.pageflakes.com/subscribe.aspx?url=http%3A%2F%2Ffeeds.feedburner.com%2Fresearchonsearch" src="http://www.pageflakes.com/ImageFile.ashx?instanceId=Static_4&amp;fileName=ATP_blu_91x17.gif">Subscribe with Pageflakes</feedburner:feedFlare><entry gd:etag="W/&quot;C0QFRX4zfip7ImA9WhdWE0Q.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-2260564611051406578</id><published>2011-09-07T11:21:00.002+01:00</published><updated>2011-09-07T11:28:34.086+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-07T11:28:34.086+01:00</app:edited><title>The Hungarian algorithm in clustering evaluation</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Hungarian_algorithm"&gt;The Hungarian algorithm (aka Kuhn–Munkres algorithm or Munkres assignment algorithm)&lt;/a&gt; can solve the assignment problem in polynomial time O(n^3). It can be used to find the optimal mapping from discovered clusters to the ground-truth categories which serves as the basis for some performance measures of &lt;a href="http://en.wikipedia.org/wiki/Cluster_analysis"&gt;clustering&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-2260564611051406578?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/mrCR6Pr5eTo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/2260564611051406578/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=2260564611051406578" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/2260564611051406578?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/2260564611051406578?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/mrCR6Pr5eTo/hungarian-algorithm-in-clustering.html" title="The Hungarian algorithm in clustering evaluation" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/09/hungarian-algorithm-in-clustering.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkAMRnkyeCp7ImA9WhdXFUk.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-4289573635962190010</id><published>2011-08-28T16:14:00.003+01:00</published><updated>2011-08-28T16:19:47.790+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-28T16:19:47.790+01:00</app:edited><title>Fastest membership test in Python</title><content type="html">What is the most efficient method to check whether an item is in a given group or not? In Python, it seems that &lt;a href="http://labs.kortina.net/2010/10/13/list-dict-set-and-frozen-set-performance-in-python/"&gt;set (or frozenset) would be slightly faster than dict and much much faster than list&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-4289573635962190010?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/6dNVuoCDDFs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/4289573635962190010/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=4289573635962190010" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/4289573635962190010?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/4289573635962190010?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/6dNVuoCDDFs/fastest-membership-test-in-python.html" title="Fastest membership test in Python" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/08/fastest-membership-test-in-python.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEcNR309fCp7ImA9WhdXE0U.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-6972714536865589431</id><published>2011-08-26T20:02:00.002+01:00</published><updated>2011-08-26T20:14:56.364+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-26T20:14:56.364+01:00</app:edited><title>Submodular functions</title><content type="html">Intuitively, a &lt;a href="http://en.wikipedia.org/wiki/Submodular_function"&gt;submodular function&lt;/a&gt; over the subsets demonstrates "&lt;span style="font-style:italic;"&gt;diminishing returns&lt;/span&gt;", which is related to the concept of &lt;a href="http://en.wikipedia.org/wiki/Marginal_utility"&gt;marginal utility&lt;/a&gt; in economics. Its usefulness for machine learning is well explained and illustrated by the &lt;a href="http://submodularity.org/"&gt;Beyond Convexity&lt;/a&gt; tutorial. There is &lt;a href="http://www.cs.caltech.edu/~krausea/sfo/"&gt;a Matlab toolbox for submodular function optimization&lt;/a&gt; available that is developed by &lt;a href="http://las.ethz.ch/krausea.html"&gt;Andreas Krause&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-6972714536865589431?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/-cpl2d57WPk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/6972714536865589431/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=6972714536865589431" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6972714536865589431?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6972714536865589431?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/-cpl2d57WPk/submodular-functions.html" title="Submodular functions" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/08/submodular-functions.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0QCR3k5eip7ImA9WhdXE0U.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-4417135016545196600</id><published>2011-08-26T15:24:00.004+01:00</published><updated>2011-08-26T20:02:46.722+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-26T20:02:46.722+01:00</app:edited><title>L1 regularisation Is efficient for selecting relevant features</title><content type="html">&lt;a href="http://ai.stanford.edu/~ang/"&gt;Andrew Ng&lt;/a&gt; has proven in his &lt;a href="http://ai.stanford.edu/~ang/papers/icml04-l1l2.pdf"&gt;ICML-2004 paper&lt;/a&gt; that sample complexity grows linearly in the number of irrelevant features when using L2 regularisation (in logistic regression, support vector machine, and back-propagation neural network), but only logarithmically when using L1 regularisation (in logistic regression).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-4417135016545196600?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/U7YAntrRpnc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/4417135016545196600/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=4417135016545196600" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/4417135016545196600?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/4417135016545196600?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/U7YAntrRpnc/l1-regularisation-is-efficient-for.html" title="L1 regularisation Is efficient for selecting relevant features" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/08/l1-regularisation-is-efficient-for.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DU8DSH49eSp7ImA9WhZaFkQ.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-2716034172154768368</id><published>2011-07-03T13:44:00.002+01:00</published><updated>2011-07-03T13:57:59.061+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-03T13:57:59.061+01:00</app:edited><title>New linear-time algorithm for suffix array construction</title><content type="html">Juha Kärkkäinen, Peter Sanders , and Stefan Burkhardt: &lt;a href="http://portal.acm.org/citation.cfm?id=1217858"&gt;Linear Work Suffix Array Construction&lt;/a&gt;, Journal of the ACM (JACM), Volume 53 Issue 6, November 2006.&lt;br /&gt;As the authors have said, this algorithm narrows the gap between &lt;a href="http://en.wikipedia.org/wiki/Suffix_tree"&gt;suffix tree&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Suffix_array"&gt;suffix array&lt;/a&gt;, which are widely used and largely interchangeable index structures on strings and sequences. Usually theoreticians prefer the former due to linear-time construction algorithms and more explicit structure while practitioners prefer the latter due to its simplicity and space efficiency. Now there is one more reason for practitioners to stick with suffix array.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-2716034172154768368?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/jeJ0t_XJh2A" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/2716034172154768368/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=2716034172154768368" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/2716034172154768368?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/2716034172154768368?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/jeJ0t_XJh2A/new-linear-time-algorithm-for-suffix.html" title="New linear-time algorithm for suffix array construction" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/07/new-linear-time-algorithm-for-suffix.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DU8FRnc4cSp7ImA9WhZaFU0.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-8637953512108275256</id><published>2011-07-01T09:01:00.006+01:00</published><updated>2011-07-01T09:10:17.939+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-01T09:10:17.939+01:00</app:edited><title>Research Impact for REF</title><content type="html">The British government's emphasis on the practical &lt;span style="font-weight:bold;"&gt;impact&lt;/span&gt; of research in &lt;a href="http://en.wikipedia.org/wiki/Research_Excellence_Framework"&gt;REF&lt;/a&gt; reminds me of Feynman's following words.&lt;br /&gt;&lt;blockquote&gt;&lt;span style="font-style:italic;"&gt;Physics [research] is like sex: sure, it may give some practical results, but that's not why we do it.&lt;/span&gt;&lt;/blockquote&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-8637953512108275256?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/yAl_Ydg4ONg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/8637953512108275256/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=8637953512108275256" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8637953512108275256?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8637953512108275256?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/yAl_Ydg4ONg/research-impact-for-ref.html" title="Research Impact for REF" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/07/research-impact-for-ref.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkQAQXg_eip7ImA9WhZbE0g.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-1656128833575964623</id><published>2011-06-17T23:32:00.002+01:00</published><updated>2011-06-17T23:39:00.642+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-17T23:39:00.642+01:00</app:edited><title>DiveRS'11</title><content type="html">&lt;a href="http://www.eps.uam.es/~castells"&gt;Pablo Castells&lt;/a&gt;, &lt;a href="http://www.cs.ucl.ac.uk/people/J.Wang.html"&gt;Jun Wang&lt;/a&gt;, &lt;a href="http://ir.ii.uam.es/~rlara"&gt;Ruben Lara&lt;/a&gt;, and &lt;a href="http://www.dcs.bbk.ac.uk/~dell"&gt;Dell Zhang&lt;/a&gt; are organising an ACM &lt;a href="http://www.recsys.acm.org/2011/"&gt;RecSys-2011&lt;/a&gt; workshop on &lt;a href="http://ir.ii.uam.es/divers2011/"&gt;Novelty and Diversity in Recommender Systems (DiveRS)&lt;/a&gt;. A special issue of &lt;a href="http://tist.acm.org/"&gt;ACM TIST&lt;/a&gt; in the scope of the workshop will be announced after the conference. Authors of accepted papers will be invited to submit an extended version.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-1656128833575964623?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/qp5ThfO8kqo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/1656128833575964623/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=1656128833575964623" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/1656128833575964623?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/1656128833575964623?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/qp5ThfO8kqo/divers11.html" title="DiveRS'11" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/06/divers11.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08BSHg7eSp7ImA9WhZbE0k.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-5508217351414545210</id><published>2011-06-17T23:19:00.002+01:00</published><updated>2011-06-17T23:30:59.601+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-17T23:30:59.601+01:00</app:edited><title>A couple of metrics</title><content type="html">It is often desirable to measure the dissimilarity or distance between items using a proper &lt;a href="http://en.wikipedia.org/wiki/Distance_function"&gt;metric&lt;/a&gt;.&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;a href="http://en.wikipedia.org/wiki/Jaccard_index"&gt;Jaccard coefficient&lt;/a&gt; can be converted to a metric by by subtracting the Jaccard coefficient from 1. &lt;/li&gt;&lt;li&gt;&lt;a href="http://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence"&gt;Kullback–Leibler divergence&lt;/a&gt; can be converted to a metric by taking the square root of its symmetric version &lt;a href="http://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence"&gt;Jensen–Shannon divergence&lt;/a&gt;.&lt;/li&gt;&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-5508217351414545210?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/clOyN8BHo2s" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/5508217351414545210/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=5508217351414545210" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/5508217351414545210?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/5508217351414545210?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/clOyN8BHo2s/couple-of-metrics.html" title="A couple of metrics" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2011/06/couple-of-metrics.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkIMSXk7cSp7ImA9Wx5VEUk.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-2141858157810712356</id><published>2010-10-04T00:18:00.003+01:00</published><updated>2010-10-04T00:29:48.709+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-10-04T00:29:48.709+01:00</app:edited><title>A Poor Man's Parallel Processing</title><content type="html">A very crude, but often good enough, method to achieve parallel processing (e.g., on multi-core computers) is to partition the large input data file into small chunks, run the program to process each of them in parallel, and then merge the output results file back. Fortunately, this process can be done easily with the wise iterative usage of two Unix utilities: &lt;a href="http://en.wikipedia.org/wiki/Split_(Unix)"&gt;split&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Cat_(Unix)"&gt;cat&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-2141858157810712356?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/F8YHSSIxNmU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/2141858157810712356/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=2141858157810712356" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/2141858157810712356?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/2141858157810712356?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/F8YHSSIxNmU/poor-mans-parallel-processing.html" title="A Poor Man's Parallel Processing" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/10/poor-mans-parallel-processing.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck4BSHw-eSp7ImA9Wx5XEUg.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-3839707794064466629</id><published>2010-09-10T21:28:00.003+01:00</published><updated>2010-09-10T21:35:59.251+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-09-10T21:35:59.251+01:00</app:edited><title>nDCG</title><content type="html">The choice of the gain and discount function for the popular IR performance measure &lt;a href="http://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG"&gt;normalised Discounted Cumulative Gain (nDCG)&lt;/a&gt; has been discussed and empirically justified in &lt;a href="http://portal.acm.org/citation.cfm?id=1645953.1646032"&gt;a CIKM-2009 paper&lt;/a&gt; through &lt;a href="http://en.wikipedia.org/wiki/Analysis_of_variance"&gt;analysis of variance (ANOVA)&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-3839707794064466629?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/bOju5_Eciik" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/3839707794064466629/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=3839707794064466629" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/3839707794064466629?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/3839707794064466629?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/bOju5_Eciik/ndcg.html" title="nDCG" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/09/ndcg.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04NQ3g9eyp7ImA9Wx5SFU4.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-7938274321722992393</id><published>2010-08-11T15:09:00.002+01:00</published><updated>2010-08-11T15:13:12.663+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-11T15:13:12.663+01:00</app:edited><title>LNRE</title><content type="html">Here is a good tutorial with Matlab examples about &lt;a href="http://www.ling.upenn.edu/courses/cogs502/LNRE.html"&gt;Statistical Estimation for Large Numbers of Rare Events (LNRE)&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-7938274321722992393?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/3a8P7UafY_Q" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/7938274321722992393/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=7938274321722992393" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7938274321722992393?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7938274321722992393?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/3a8P7UafY_Q/lnre.html" title="LNRE" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/08/lnre.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0MFQnY9eCp7ImA9WxFVGEQ.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-8362216267823719956</id><published>2010-06-18T21:56:00.002+01:00</published><updated>2010-06-18T22:10:13.860+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-06-18T22:10:13.860+01:00</app:edited><title>VLFeat  - a computer vision toolbox</title><content type="html">The &lt;a href="http://www.vlfeat.org/"&gt;VLFeat&lt;/a&gt; open source computer vision library that implements popular&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;span style="font-weight: bold;"&gt;feature extraction&lt;/span&gt; algorithms (such as &lt;a href="http://www.vlfeat.org/overview/sift.html"&gt;SIFT&lt;/a&gt;, &lt;a href="http://www.vlfeat.org/overview/mser.html"&gt;MSER&lt;/a&gt;, and &lt;a href="http://www.vlfeat.org/overview/quickshift.html"&gt;quick shift&lt;/a&gt;),&lt;br /&gt;&lt;/li&gt;&lt;li&gt;&lt;span style="font-weight: bold;"&gt;clustering&lt;/span&gt; algorithms (such as &lt;a href="http://www.vlfeat.org/overview/ikm.html"&gt;integer k-means&lt;/a&gt;, &lt;a href="http://www.vlfeat.org/overview/hikm.html"&gt;hierarchical k-means&lt;/a&gt;, and &lt;a href="http://www.vlfeat.org/overview/aib.html"&gt;agglomerative information bottleneck&lt;/a&gt;), and&lt;br /&gt;&lt;/li&gt;&lt;li&gt;&lt;span style="font-weight: bold;"&gt;matching&lt;/span&gt; algorithms (such as &lt;a href="http://www.vlfeat.org/overview/kdtree.html"&gt;randomized kd-trees&lt;/a&gt;).&lt;br /&gt;&lt;/li&gt;&lt;/ul&gt;It is written in C for efficiency and compatibility, with interfaces in MATLAB for ease of use, and detailed documentation throughout.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-8362216267823719956?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/lapr9EOgACE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/8362216267823719956/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=8362216267823719956" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8362216267823719956?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8362216267823719956?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/lapr9EOgACE/vlfeat-computer-vision-toolbox.html" title="VLFeat  - a computer vision toolbox" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/06/vlfeat-computer-vision-toolbox.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0UHR3g9fip7ImA9WxFWE0U.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-6595665010398939285</id><published>2010-06-01T09:24:00.002+01:00</published><updated>2010-06-01T09:33:56.666+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-06-01T09:33:56.666+01:00</app:edited><title>Bloom filters and Locality Sensitive Hashing</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Locality_sensitive_hashing"&gt;Locality Sensitive Hashing (LSH)&lt;/a&gt; of &lt;span style="font-style:italic;"&gt;l&lt;/span&gt;-bits is achieved by carrying out &lt;span style="font-style:italic;"&gt; l&lt;/span&gt; independent random cuts of the Euclidean space: if two data points are in the same side of all these cuts, they are very likely to be nearest neighbours. In this sense, I think &lt;a href="http://en.wikipedia.org/wiki/Bloom_filter"&gt;Bloom filters&lt;/a&gt; (that also relies on a number of independent hashing functions) can be conceptually considered as the extreme case of LSH: each of its cuts tries to separate one data point from the rest.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-6595665010398939285?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/d2GyV3U9iIk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/6595665010398939285/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=6595665010398939285" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6595665010398939285?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6595665010398939285?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/d2GyV3U9iIk/bloom-filters-and-locality-sensitive.html" title="Bloom filters and Locality Sensitive Hashing" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/06/bloom-filters-and-locality-sensitive.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkEHRnsyfSp7ImA9WxFWE0U.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-6382078520742247601</id><published>2010-05-31T10:22:00.004+01:00</published><updated>2010-06-01T09:23:57.595+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-06-01T09:23:57.595+01:00</app:edited><title>An application of Bloom filters</title><content type="html">It is said that Google's &lt;a href="http://en.wikipedia.org/wiki/BigTable"&gt;BigTable&lt;/a&gt; uses &lt;a href="http://en.wikipedia.org/wiki/Bloom_filter"&gt;Bloom filters&lt;/a&gt; to reduce the disk lookups for non-existent rows or columns.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-6382078520742247601?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/bosb598m4Yo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/6382078520742247601/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=6382078520742247601" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6382078520742247601?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6382078520742247601?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/bosb598m4Yo/bloom-filters-and-bigtable.html" title="An application of Bloom filters" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/05/bloom-filters-and-bigtable.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0ENRXY-cCp7ImA9WxFRGU8.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-7168542709393045712</id><published>2010-05-04T00:00:00.005+01:00</published><updated>2010-05-04T00:34:54.858+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-05-04T00:34:54.858+01:00</app:edited><title>A suffix tree implementation with Unicode support</title><content type="html">It seems that there is currently no &lt;a href="http://en.wikipedia.org/wiki/Suffix_tree"&gt;suffix tree&lt;/a&gt; implementation with Unicode support publicly available online. So I adapted &lt;a href="http://www.daimi.au.dk/~mailund/suffix_tree.html"&gt;Thomas Mailund's suffix tree implementation in C with a Python binding&lt;/a&gt; and put it &lt;a href="http://www.dcs.bbk.ac.uk/~dell/code/suffix_tree_unicode.zip"&gt;here&lt;/a&gt;. The changes that I made to the code were mainly to make it support Unicode text and be compatible with new version Python. It also includes an example program all_comsubstr.py that illustrates the extraction of common substrings from two Chinese strings (encoded in UTF-8).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-7168542709393045712?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/U8GPWRbwhyk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/7168542709393045712/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=7168542709393045712" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7168542709393045712?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7168542709393045712?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/U8GPWRbwhyk/suffix-tree-implementation-with-unicode.html" title="A suffix tree implementation with Unicode support" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/05/suffix-tree-implementation-with-unicode.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkYFR3g-eSp7ImA9WxFRGU8.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-388717278064874281</id><published>2010-05-03T22:53:00.002+01:00</published><updated>2010-05-03T23:01:56.651+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-05-03T23:01:56.651+01:00</app:edited><title>Longest Common Substring</title><content type="html">Given two strings, &lt;span style="font-style:italic;"&gt;S&lt;/span&gt; of length &lt;span style="font-style:italic;"&gt;m&lt;/span&gt; and &lt;span style="font-style:italic;"&gt;T&lt;/span&gt;  of length &lt;span style="font-style:italic;"&gt;n&lt;/span&gt;, their &lt;a href="http://en.wikipedia.org/wiki/Longest_common_substring_problem"&gt;longest common substrings&lt;/a&gt; can be found in O(&lt;span style="font-style:italic;"&gt;m&lt;/span&gt;+&lt;span style="font-style:italic;"&gt;n&lt;/span&gt;) time using a &lt;a href="http://en.wikipedia.org/wiki/Generalised_suffix_tree"&gt;generalised suffix tree&lt;/a&gt;, or in O(&lt;span style="font-style:italic;"&gt;m&lt;/span&gt;&lt;span style="font-style:italic;"&gt;n&lt;/span&gt;) time through &lt;a href="http://en.wikipedia.org/wiki/Dynamic_programming"&gt;dynamic programming&lt;/a&gt; (e.g., the Python code &lt;a href="http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring#Python"&gt;here&lt;/a&gt;).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-388717278064874281?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/DVU0mr3c2jQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/388717278064874281/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=388717278064874281" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/388717278064874281?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/388717278064874281?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/DVU0mr3c2jQ/longest-common-substring.html" title="Longest Common Substring" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/05/longest-common-substring.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0IEQH0zeSp7ImA9WxBaGEw.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-4783243130298899710</id><published>2010-03-28T22:53:00.005+01:00</published><updated>2010-03-28T23:31:41.381+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-03-28T23:31:41.381+01:00</app:edited><title>Bayesian inference for the Gaussian</title><content type="html">Given the prior probability &lt;br /&gt;$p(\mu) = \mathcal{N}(\x_0,\sigma_0^2)$ &lt;br /&gt;and the likelihood &lt;br /&gt;$p(x_1|\mu) = \mathcal{N}(\mu,\sigma_1^2)$, &lt;br /&gt;the expectation of the posterior probability &lt;br /&gt;$p(\mu|x_1)$ &lt;br /&gt;has a very simple and elegant form:&lt;br /&gt;$(\alpha \x_0 + \beta x_1) / (\alpha + \beta)$&lt;br /&gt;where &lt;br /&gt;$\alpha = 1/(\sigma_0^2)$ and $\beta = 1/(\sigma_1^2)$ &lt;br /&gt;are the precisions.&lt;br /&gt;&lt;br /&gt;Please refer to Bishop's PRML book section 2.3.6.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-4783243130298899710?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/dxQtbKlBGJc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/4783243130298899710/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=4783243130298899710" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/4783243130298899710?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/4783243130298899710?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/dxQtbKlBGJc/bayesian-inference-for-gaussian.html" title="Bayesian inference for the Gaussian" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/03/bayesian-inference-for-gaussian.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MFQ30zcCp7ImA9WxBWEk0.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-8161884115071419497</id><published>2010-02-03T14:56:00.002Z</published><updated>2010-02-03T15:03:32.388Z</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-02-03T15:03:32.388Z</app:edited><title>Comparing Data Analysis Packages</title><content type="html">A succinct comparison of data analysis packages including R, Matlab, SciPy, Excel, SAS, SPSS and Stata, can be found &lt;a href="http://anyall.org/blog/2009/02/comparison-of-data-analysis-packages-r-matlab-scipy-excel-sas-spss-stata/"&gt;here&lt;/a&gt;. I recently tried Stata, but found its language syntax ugly and awkward.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-8161884115071419497?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/PYgDcFyGsL0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/8161884115071419497/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=8161884115071419497" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8161884115071419497?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8161884115071419497?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/PYgDcFyGsL0/comparing-data-analysis-packages.html" title="Comparing Data Analysis Packages" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2010/02/comparing-data-analysis-packages.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0INQnwzeyp7ImA9WxNUGUk.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-7007292614750703194</id><published>2009-11-11T11:58:00.004Z</published><updated>2009-11-11T12:13:13.283Z</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-11-11T12:13:13.283Z</app:edited><title>The myth about the Internet</title><content type="html">&lt;a href="http://www.research.att.com/viewInnovator.cfm?id=109"&gt;Walter Willinger&lt;/a&gt; et al. recently published a &lt;a href="http://faculty.nps.edu/dlalders/docs/Internet-AMS-Notices-May2009.pdf"&gt;paper&lt;/a&gt; in which the &lt;a href="http://en.wikipedia.org/wiki/Scale-free_network"&gt;scale-free network&lt;/a&gt; model of the &lt;a href="http://en.wikipedia.org/wiki/Preferential_attachment"&gt;preferential attachment&lt;/a&gt; type for Internet is said to be a myth, as it is based on fundamentally flawed traceout data. Furthermore, they criticize the currently popular &lt;span style="font-style:italic;"&gt;data-fitting&lt;/span&gt; approach to &lt;a href="http://en.wikipedia.org/wiki/Network_science"&gt;network science&lt;/a&gt; and argue that it should be replaced by the &lt;span style="font-style:italic;"&gt;reverse-engineering&lt;/span&gt; approach.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-7007292614750703194?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/lcnG4xDH3ck" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/7007292614750703194/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=7007292614750703194" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7007292614750703194?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7007292614750703194?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/lcnG4xDH3ck/myth-about-internet.html" title="The myth about the Internet" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2009/11/myth-about-internet.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkMGQHY-eyp7ImA9WxNSEEk.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-6089437043082535801</id><published>2009-08-12T23:24:00.008+01:00</published><updated>2009-08-23T17:20:21.853+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-08-23T17:20:21.853+01:00</app:edited><title>Large networks are not modular</title><content type="html">A pretty striking finding in &lt;a href="http://www2008.org/papers/fp569.html"&gt;the WWW'08 paper from Leskovec etc.&lt;/a&gt; is that in nearly every network dataset they examined, there are tight but almost trivial communities at very small scales (up to around 100 nodes), while at larger scales, the best possible communities gradually "blend in" with the rest of the network and thus become less "community-like".&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-6089437043082535801?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/xJheXtJ-Jjs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/6089437043082535801/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=6089437043082535801" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6089437043082535801?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/6089437043082535801?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/xJheXtJ-Jjs/large-networks-are-not-modular.html" title="Large networks are not modular" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2009/08/large-networks-are-not-modular.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUQEQX47eip7ImA9WxJbGUQ.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-7271082370085589351</id><published>2009-07-30T23:52:00.005+01:00</published><updated>2009-07-31T00:41:40.002+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-07-31T00:41:40.002+01:00</app:edited><title>Spectral Graph Partitioning</title><content type="html">There are a number of methods in the family of Spectral Graph Partitioning, including the traditional &lt;span style="font-style:italic;"&gt;min-cut&lt;/span&gt; and various balanced cut criteria (such as &lt;span style="font-style:italic;"&gt;ratio-cut&lt;/span&gt;, &lt;span style="font-style:italic;"&gt;average-cut&lt;/span&gt;, &lt;span style="font-style:italic;"&gt;normalized-cut&lt;/span&gt; and &lt;span style="font-style:italic;"&gt;minmax-cut&lt;/span&gt;). Each method uses a different objective function and consequently a different definition of partition (cluster) indicator vector. The following two tutorials on Spectral Clustering both contain a good summary of these methods.&lt;br /&gt;[1] &lt;a href="http://ranger.uta.edu/~chqding/Spectral/"&gt;Spectral Clustering, ICML 2004 Tutorial by Chris Ding&lt;/a&gt;&lt;br /&gt;[2] &lt;a href="http://www.kyb.tuebingen.mpg.de/bs/people/ule/publications/publication_downloads/Luxburg07_tutorial.pdf"&gt;A Tutorial on Spectral Clustering by Ulrike von Luxburg&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-7271082370085589351?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/sf0i1pHALNc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/7271082370085589351/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=7271082370085589351" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7271082370085589351?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/7271082370085589351?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/sf0i1pHALNc/spectral-graph-partitioning.html" title="Spectral Graph Partitioning" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2009/07/spectral-graph-partitioning.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0IEQn4zeCp7ImA9WxJVEk0.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-112542774645796488</id><published>2009-06-28T16:04:00.000+01:00</published><updated>2009-06-28T16:05:03.080+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-06-28T16:05:03.080+01:00</app:edited><title>SIFT and SURF</title><content type="html">&lt;a href="http://en.wikipedia.org/wiki/Scale-invariant_feature_transform"&gt;Scale-Invariant Feature Transform (or SIFT)&lt;/a&gt; is one of the most popular algorithms in computer vision to detect and describe local features in images. The algorithm was published by &lt;a href="http://www.cs.ubc.ca/~lowe/"&gt;David Lowe&lt;/a&gt; in 1999, and it is now a patent of the &lt;a href="http://www.ubc.ca/"&gt;University of British Columbia&lt;/a&gt;. &lt;br /&gt;&lt;br /&gt;&lt;a href="http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/AV0405/MURRAY/SIFT.html"&gt;The SIFT approach, for image feature generation&lt;/a&gt;, takes an image and transforms it into a "large collection of local feature vectors". Each of these feature vectors is invariant to any scaling, rotation or translation of the image. This approach shares many features with neuron responses in primate vision. To aid the extraction of these features the SIFT algorithm applies a 4 stage filtering approach: (1) Scale-Space Extrema Detection (2) Keypoint Localistaion (3) Orientation Assignment (4) Keypoint Descriptor. &lt;br /&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/SURF"&gt;Speeded Up Robust Features (SURF)&lt;/a&gt; is said to have similar performance to SIFT, while at the same time being faster.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-112542774645796488?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/OWxRZviELfw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/112542774645796488/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=112542774645796488" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/112542774645796488?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/112542774645796488?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/OWxRZviELfw/sift-and-surf.html" title="SIFT and SURF" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2009/06/sift-and-surf.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkMFR3Y6fCp7ImA9WxJQEko.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-1909855849943322805</id><published>2009-05-25T17:50:00.002+01:00</published><updated>2009-05-25T17:53:36.814+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-05-25T17:53:36.814+01:00</app:edited><title>Centrality measures in network analysis</title><content type="html">There are four measures of &lt;a href="http://en.wikipedia.org/wiki/Centrality"&gt;centrality&lt;/a&gt; that are widely used in network analysis: &lt;span style="font-weight:bold;"&gt;degree centrality&lt;/span&gt;, &lt;span style="font-weight:bold;"&gt;betweenness centrality&lt;/span&gt;, &lt;span style="font-weight:bold;"&gt;closeness centrality&lt;/span&gt;, and &lt;span style="font-weight:bold;"&gt;eigenvector centrality&lt;/span&gt;. Google's &lt;a href="http://en.wikipedia.org/wiki/PageRank"&gt;PageRank&lt;/a&gt; is a variant of the eigenvector centrality measure.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-1909855849943322805?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/GRLuEWHsihU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/1909855849943322805/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=1909855849943322805" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/1909855849943322805?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/1909855849943322805?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/GRLuEWHsihU/centrality-measures-in-network-analysis.html" title="Centrality measures in network analysis" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2009/05/centrality-measures-in-network-analysis.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ak8MRX0zeyp7ImA9WxJQFEg.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-8926255383670754047</id><published>2009-05-25T16:54:00.005+01:00</published><updated>2009-05-27T22:14:44.383+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-05-27T22:14:44.383+01:00</app:edited><title>Dirichlet prior for smoothing</title><content type="html">Using &lt;a href="http://en.wikipedia.org/wiki/Dirichlet_distribution"&gt;Dirichlet distribution&lt;/a&gt; as the prior for smoothing in statistical &lt;a href="http://en.wikipedia.org/wiki/Language_modeling"&gt;language modeling&lt;/a&gt; leads to &lt;a href="http://en.wikipedia.org/wiki/Additive_smoothing"&gt;additive smoothing&lt;/a&gt; (a.k.a. &lt;span style="font-weight:bold;"&gt;Lidstone smoothing&lt;/span&gt;) that includes &lt;span style="font-weight:bold;"&gt;Laplace smoothing&lt;/span&gt; (i.e., add one) and &lt;span style="font-weight:bold;"&gt;Jeffreys-Perks smoothing&lt;/span&gt; (i.e., add half) (a.k.a. Expected Likelihood Estimation) as special cases. This family of smoothing methods can be regarded as a &lt;span style="font-style:italic;"&gt;document dependent&lt;/span&gt; extension of &lt;span style="font-weight:bold;"&gt;linear interpolated smoothing&lt;/span&gt;.&lt;br /&gt;&lt;br /&gt;It has been shown that Laplace smoothing, though most popular (in textbooks), is often inferior to Lidstone smoothing (using a value less than one) in modeling natural language data, e.g., for text classification tasks (see &lt;a href="http://www.springerlink.com/content/ht1x6qptvbnph4pa/"&gt;Athena: Mining-based interactive management of text databases&lt;/a&gt;).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-8926255383670754047?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/KOG2UA1isAI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/8926255383670754047/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=8926255383670754047" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8926255383670754047?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/8926255383670754047?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/KOG2UA1isAI/dirichlet-prior-for-smoothing.html" title="Dirichlet prior for smoothing" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2009/05/dirichlet-prior-for-smoothing.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0cFQH49fSp7ImA9WxJQEEo.&quot;"><id>tag:blogger.com,1999:blog-9749960.post-544752920127571555</id><published>2009-05-23T12:42:00.000+01:00</published><updated>2009-05-23T12:43:31.065+01:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2009-05-23T12:43:31.065+01:00</app:edited><title>PyMat</title><content type="html">&lt;a href="http://pymat.sourceforge.net/"&gt;PyMat&lt;/a&gt; exposes the MATLAB engine interface allowing Python programs to start, close, and communicate with a MATLAB engine session. In addition, the package allows transferring matrices to and from an MATLAB workspace. These matrices can be specified as NumPy arrays, allowing a blend between the mathematical capabilities of NumPy and those of MATLAB.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/9749960-544752920127571555?l=researchonsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/researchonsearch/~4/1WD-o6SWsec" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://researchonsearch.blogspot.com/feeds/544752920127571555/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=9749960&amp;postID=544752920127571555" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/544752920127571555?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/9749960/posts/default/544752920127571555?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/researchonsearch/~3/1WD-o6SWsec/pymat.html" title="PyMat" /><author><name>Dell Zhang</name><uri>http://www.blogger.com/profile/14810903698038676929</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="21" height="32" src="http://3.bp.blogspot.com/-rNdTYdZZemg/TvNVYV7IbpI/AAAAAAAAATk/-6SFTPL264k/s220/dellzhang_bbk.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://researchonsearch.blogspot.com/2009/05/pymat.html</feedburner:origLink></entry></feed>

