<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;D0AGQXs5eSp7ImA9WhBaEUk.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184</id><updated>2013-05-21T07:15:20.521-07:00</updated><category term="Intel-MKL" /><category term="clustering" /><category term="PETSc" /><category term="Fedora" /><category term="LSI" /><category term="SQL" /><category term="MMDS" /><category term="Power-Iteration" /><category term="Connectome" /><category term="itpp" /><category term="NSF" /><category term="GaBP" /><category term="ml-events" /><category term="Jacobi" /><category term="GAMP" /><category term="asterix" /><category term="NIPS" /><category term="collaborative-filtering" /><category term="Poker" /><category term="Amazon-EC2" /><category term="svd++" /><category term="MyMediaLite" /><category term="MadLINQ" /><category term="SVM" /><category term="Hearst Challenge" /><category term="Parser" /><category term="eclipse" /><category term="K-Shell" /><category term="ismion" /><category term="Vowpal Wabbit" /><category term="biglearn.org" /><category term="eHarmony" /><category term="pegasos" /><category term="big-datasets" /><category term="DARPA" /><category term="MacPorts" /><category term="K-means" /><category term="Oracle Labs" /><category term="time-SVD++" /><category term="OpenCloud" /><category term="it++" /><category term="LexisNexis" /><category term="CentOS" /><category term="BlackLight" /><category term="linode" /><category term="FreeBSD" /><category term="HPC" /><category term="Lawa" /><category term="Social Networks" /><category term="Giraph" /><category term="LDA" /><category term="MAC OSX" /><category term="graphchi" /><category term="shotgun" /><category term="graph-visualization" /><category term="KDD Cup" /><category term="parallel-algorithm" /><category term="Matrix Market" /><category term="Libboost" /><category term="SVDFeature" /><category term="Intel" /><category term="LASSO" /><category term="HPCC" /><category term="Python" /><category term="Netflix" /><category term="SVD" /><category term="Lanczos" /><category term="Pandora" /><category term="Alternating-Least-Squares" /><category term="Machine Learning" /><category term="Julia" /><category term="Pegasus" /><category term="Label Propagation" /><category term="Eigen" /><category term="kaggle" /><category term="32 Bit Linux" /><category term="Gentoo" /><category term="RedHat" /><category term="Cassovary" /><category term="linear-system" /><category term="Ciel" /><category term="LensKit" /><category term="Hadoop" /><category term="GraphLab" /><category term="Mahout" /><category term="Data-Scope" /><category term="BLAS" /><category term="PMF" /><category term="Parallel-Perl" /><category term="logistic regression" /><category term="Spark" /><category term="graph-database" /><category term="MapReduce-Plugin" /><category term="PSC" /><category term="read-modify-write" /><category term="matrix factorization" /><category term="GraphLab-Workshop" /><category term="Java" /><category term="Spotlight" /><category term="K-Core" /><category term="Octave" /><category term="Discovix" /><category term="open-source-tools" /><category term="million-songs" /><category term="Medusa" /><category term="Cool-Internships" /><category term="twitter" /><category term="SVMLight" /><category term="TeraGrid" /><category term="SLEPc" /><category term="Galois" /><category term="Lapack" /><category term="Hyperspectral Imaging" /><category term="Ubuntu" /><category term="Linear-Stable" /><category term="R" /><category term="PCA" /><title>Large Scale Machine Learning and Other Animals</title><subtitle type="html" /><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://bickson.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>250</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/blogspot/sYXZE" /><feedburner:info uri="blogspot/syxze" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;CU4AQ3o6cSp7ImA9WhBbGUU.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-2548921142075800304</id><published>2013-05-19T10:17:00.001-07:00</published><updated>2013-05-19T10:19:02.419-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T10:19:02.419-07:00</app:edited><title>An Overview of Graph Processing Frameworks</title><content type="html">&lt;a href="http://strata.oreilly.com/2013/05/improving-options-for-unlocking-your-graph-data.html"&gt;A nicely written overview&lt;/a&gt; of Graph processing frameworks by Ben Lorica from O'Reilly media. Will help you master the essential buzzwords on big data analytics and graph processing.. And explains quite clearly why you should attend our &lt;a href="http://graphlab.org/graphlab-workshop-2013/"&gt;2nd GraphLab workshop&lt;/a&gt;!&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/lGP85sQA9wc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/2548921142075800304/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/overview-of-graph-processing-framework.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2548921142075800304?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2548921142075800304?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/lGP85sQA9wc/overview-of-graph-processing-framework.html" title="An Overview of Graph Processing Frameworks" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/overview-of-graph-processing-framework.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUEBSHk5cCp7ImA9WhBbGUU.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-231226753792566991</id><published>2013-05-19T10:10:00.001-07:00</published><updated>2013-05-19T10:14:19.728-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T10:14:19.728-07:00</app:edited><title>Kaggle Titanic Contest</title><content type="html">I got this from &lt;a href="http://www.linkedin.com/in/sagied"&gt;Sagie Davidovich&lt;/a&gt; a link to &lt;a href="http://www.kaggle.com/c/titanic-gettingStarted"&gt;Kaggle's Titanic Contest&lt;/a&gt;. The task is to predict who survived the Titanic based on some features like age, cabin class, name, and presence of relatives on the Ship.&lt;br /&gt;
What I like about this task, is that you can actually explain to your gradma what you are predicting (unlike many other ML tasks which are hard to explain for the non expert..). The dataset is quite tiny in our standards. But still enables the usage of different ML methods. The best prediction on the&amp;nbsp;&lt;a href="http://www.kaggle.com/c/titanic-gettingStarted/leaderboard"&gt;leader-board&lt;/a&gt;&amp;nbsp;has about 99% accuracy.&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/f49wQBN0RKY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/231226753792566991/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/kaggle-titanic-contest.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/231226753792566991?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/231226753792566991?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/f49wQBN0RKY/kaggle-titanic-contest.html" title="Kaggle Titanic Contest" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/kaggle-titanic-contest.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0EFSXw-cSp7ImA9WhBbFU4.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-5157942853758030227</id><published>2013-05-14T04:40:00.000-07:00</published><updated>2013-05-14T04:40:18.259-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-14T04:40:18.259-07:00</app:edited><title>Funding for the next generation of GraphLab</title><content type="html">&lt;br /&gt;
&lt;div dir="ltr" style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
The GraphLab journey began with the desire:&lt;/div&gt;
&lt;ul style="background-color: white; border: 0px; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px; list-style-image: initial; list-style-position: inside; margin: 0px 0px 10px 21px; padding: 0px; vertical-align: baseline;"&gt;
&lt;li style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;to&amp;nbsp;&lt;strong style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;rethink&lt;/strong&gt;&amp;nbsp;the way we approach Machine Learning and Graph analytics,&lt;/li&gt;
&lt;li style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;to&amp;nbsp;&lt;strong style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;demonstrate&lt;/strong&gt;&amp;nbsp;that with the&amp;nbsp;&lt;strong style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;right abstractions&lt;/strong&gt;&amp;nbsp;and system design we can achieve unprecedented levels of performance, and&lt;/li&gt;
&lt;li style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;to&amp;nbsp;&lt;strong style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;build a community&lt;/strong&gt;&amp;nbsp;around large-scale graph computation.&lt;/li&gt;
&lt;/ul&gt;
&lt;div dir="ltr" style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
We have been blown away by the excitement and growth of the GraphLab community and have been unable to keep up with the incredible interest from our amazing users.&lt;/div&gt;
&lt;div style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
Therefore, we are proud to announce GraphLab Inc, a company devoted to accelerating the development of the open-source GraphLab project.&lt;/div&gt;
&lt;h3 dir="ltr" style="background-color: white; border: 0px; color: #181818; font-family: Helvetica, Helvetica, serif; font-size: 24px; font-weight: normal; line-height: 1.25; margin: 0px 0px 9px; padding: 0px; vertical-align: baseline;"&gt;
Why a company?&lt;/h3&gt;
&lt;div dir="ltr" style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
Put simply, we need a full-time dedicated effort to take GraphLab from where it is today to where we would like to see it go in the future. &amp;nbsp;With a dedicated team, you will see exciting new features, more integration with Cloud infrastructure, easier installation and deployment, along with a revamped support effort, with additional commercial-grade options and tools.&lt;/div&gt;
&lt;h3 dir="ltr" style="background-color: white; border: 0px; color: #181818; font-family: Helvetica, Helvetica, serif; font-size: 24px; font-weight: normal; line-height: 1.25; margin: 0px 0px 9px; padding: 0px; vertical-align: baseline;"&gt;
What happens to the open-source project?&lt;/h3&gt;
&lt;div dir="ltr" style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
The open source project will remain the flagship technology where we push the limits of graph computation and develop new ideas. &amp;nbsp;With a dedicated team focused on the code, documentation, deployment process and most importantly support, you can expect a regular release schedule, faster turnaround on bug fixes and improvements, and a higher quality bar for the code going forward. &amp;nbsp;We will continue to rely on our community to push us to tackle bigger, harder problems, and to contribute back to the open-source effort.&lt;/div&gt;
&lt;h3 dir="ltr" style="background-color: white; border: 0px; color: #181818; font-family: Helvetica, Helvetica, serif; font-size: 24px; font-weight: normal; line-height: 1.25; margin: 0px 0px 9px; padding: 0px; vertical-align: baseline;"&gt;
Are you going to charge for GraphLab now?&lt;/h3&gt;
&lt;div dir="ltr" style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
We believe the only way the best ideas in machine learning are developed is with the help of a vibrant community (it takes a village). That is why GraphLab will remain an open-source project; now with a company behind it, we can double our efforts to keeping the project healthy.&lt;/div&gt;
&lt;h3 dir="ltr" style="background-color: white; border: 0px; color: #181818; font-family: Helvetica, Helvetica, serif; font-size: 24px; font-weight: normal; line-height: 1.25; margin: 0px 0px 9px; padding: 0px; vertical-align: baseline;"&gt;
What is the roadmap for the open-source project?&lt;/h3&gt;
&lt;div dir="ltr" style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
GraphLab 2.2 is just around the corner, see here for more details as to what is in it. Beyond that, we are exploring a new computation engine and further enhancements to the communication layer, as well as simpler integration with existing Cloud technologies, easier installation procedures, and an exciting new graph storage system. &amp;nbsp;And of course, we look forward to working with you to develop the roadmap and build the next generation of the GraphLab system.&lt;/div&gt;
&lt;div dir="ltr" style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
The next 12 months will be an exciting time for us, and we hope that everyone will come along for the ride&amp;nbsp;&lt;img alt=":-)" class="wp-smiley" scale="0" src="http://graphlab.org/wp-includes/images/smilies/icon_smile.gif" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&amp;nbsp;.&lt;/div&gt;
&lt;div style="background-color: white; border: 0px; clear: none; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
Learn more at&amp;nbsp;&lt;a href="http://graphlab.com/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;graphlab.com&lt;/a&gt;&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/nQzltvCBDII" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/5157942853758030227/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/funding-for-next-generation-of-graphlab.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/5157942853758030227?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/5157942853758030227?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/nQzltvCBDII/funding-for-next-generation-of-graphlab.html" title="Funding for the next generation of GraphLab" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/funding-for-next-generation-of-graphlab.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE4GSXw8fSp7ImA9WhBbGUk.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-5940358903590493069</id><published>2013-05-12T06:48:00.001-07:00</published><updated>2013-05-19T00:02:08.275-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T00:02:08.275-07:00</app:edited><title>Bond Percolation in GraphLab</title><content type="html">&lt;span style="background-color: white;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;span style="line-height: 19.1875px;"&gt;I was asked by &lt;a href="http://www.cs.huji.ac.il/~kirk/"&gt;Prof. Scott Kirkpatrick&lt;/a&gt; to help and implement bond&amp;nbsp;&lt;/span&gt;&lt;span style="line-height: 19.176136016845703px;"&gt;percolation&lt;/span&gt;&lt;span style="line-height: 19.1875px;"&gt;&amp;nbsp;in GraphLab. It is an oldie but goldie problem which is closely related to the connected components problem.&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="background-color: white;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;span style="line-height: 19.1875px;"&gt;Here is an&amp;nbsp;&lt;/span&gt;&lt;span style="line-height: 19.176136016845703px;"&gt;explanation&lt;/span&gt;&lt;span style="line-height: 19.1875px;"&gt;&amp;nbsp;about bond&amp;nbsp;&lt;/span&gt;&lt;span style="line-height: 19.176136016845703px;"&gt;percolation&lt;/span&gt;&lt;span style="line-height: 19.1875px;"&gt;&amp;nbsp;from &lt;a href="http://en.wikipedia.org/wiki/Percolation_theory"&gt;Wikipedia&lt;/a&gt;:&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;i&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;A representative question (and the&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Etymology" style="background-color: white; background-image: none; color: #0b0080; font-family: sans-serif; font-size: 13px; line-height: 19.1875px; text-decoration: none;" title="Etymology"&gt;source&lt;/a&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;of the name) is as follows. Assume that some liquid is poured on top of some&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Porosity" style="background-color: white; background-image: none; color: #0b0080; font-family: sans-serif; font-size: 13px; line-height: 19.1875px; text-decoration: none;" title="Porosity"&gt;porous&lt;/a&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;material. Will the liquid be able to make its way from hole to hole and reach the bottom? This physical question is&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Mathematical_model" style="background-color: white; background-image: none; color: #0b0080; font-family: sans-serif; font-size: 13px; line-height: 19.1875px; text-decoration: none;" title="Mathematical model"&gt;modelled&lt;/a&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;mathematically as a&amp;nbsp;&lt;/span&gt;&lt;a class="mw-redirect" href="http://en.wikipedia.org/wiki/Grid_graph" style="background-color: white; background-image: none; color: #0b0080; font-family: sans-serif; font-size: 13px; line-height: 19.1875px; text-decoration: none;" title="Grid graph"&gt;three-dimensional network&lt;/a&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;of&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;n&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;×&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;n&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;×&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;n&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Graph_(mathematics)" style="background-color: white; background-image: none; color: #0b0080; font-family: sans-serif; font-size: 13px; line-height: 19.1875px; text-decoration: none;" title="Graph (mathematics)"&gt;vertices&lt;/a&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;, usually called "sites", in which the&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Graph_(mathematics)" style="background-color: white; background-image: none; color: #0b0080; font-family: sans-serif; font-size: 13px; line-height: 19.1875px; text-decoration: none;" title="Graph (mathematics)"&gt;edge&lt;/a&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;or "bonds" between each two neighbors may be open (allowing the liquid through) with probability&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;p&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;, or closed with probability 1&amp;nbsp;–&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;p&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;, and they are assumed to be independent. Therefore, for a given&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;p&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;, what is the probability that an open path exists from the top to the bottom? The behavior for large&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;n&lt;/span&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&amp;nbsp;is of primary interest. This problem, called now&amp;nbsp;&lt;/span&gt;&lt;b style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;bond percolation&lt;/b&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;, was introduced in the mathematics literature by&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Percolation_theory#CITEREFBroadbentHammersley1957" style="background-color: white; background-image: none; color: #0b0080; font-family: sans-serif; font-size: 13px; line-height: 19.1875px; text-decoration: none;"&gt;Broadbent &amp;amp; Hammersley (1957&lt;/a&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;), and has been studied intensively by mathematicians and physicists since.&lt;/span&gt;&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;&lt;span style="background-color: white; font-family: sans-serif; font-size: 13px; line-height: 19.1875px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/i&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://upload.wikimedia.org/wikipedia/commons/7/7b/Bond_percolation_p_51.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="320" src="http://upload.wikimedia.org/wikipedia/commons/7/7b/Bond_percolation_p_51.png" width="316" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
2-d bond&amp;nbsp;percolation&amp;nbsp;problem. Will the water find their way&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
along the edges of this maze from top to bottom?&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
The algorithm for finding the connected edges is very simple:&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
In parallel&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&amp;nbsp; &amp;nbsp;- for each edge, record the minimum id of the connected edges&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&amp;nbsp; &amp;nbsp;- if there are no more changes in the network, break&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
This algorithm is frequently used in social networks to find groups of friends. And it is of course distributed and scales to very large problems. &amp;nbsp;The output is the min component found for each edge. From this output we can find the edge clusters.&amp;nbsp;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
I have utilized the newest experimental version of GraphLab (v. 2.2) for quickly implementing the above algorithm. The resulting code is surprisingly simple. The main loop of the program:&lt;/div&gt;
&lt;div class="separator" style="clear: both;"&gt;
&lt;/div&gt;
&lt;table id="src_table_0" style="background-color: white; border-collapse: collapse; color: black; font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; font-size: 12px; margin: 0px; padding: 0px; white-space: pre;"&gt;&lt;tbody style="margin: 0px; padding: 0px;"&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_218" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;
&lt;span style="color: purple;"&gt;//create a GraphLab engine&lt;/span&gt;
engine_type engine&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;dc&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; graph&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; clopts&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);
&lt;/span&gt;&lt;span class="pln"&gt;&lt;span style="color: purple;"&gt;//register the map and combine operations that will be used in the update function&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_219" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;engine&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;register_map_reduce&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;BOND_PERCOLATION_MAP_REDUCE&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; bond_percolation_map&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; bond_percolation_combine&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);
&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_221" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;
  //in a loop
&amp;nbsp; &lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;for&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;int&lt;/span&gt;&lt;span class="pln"&gt; i&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;=&lt;/span&gt;&lt;span class="lit" style="color: #006666;"&gt;0&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;;&lt;/span&gt;&lt;span class="pln"&gt; i&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;lt;&lt;/span&gt;&lt;span class="pln"&gt; max_iter&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;;&lt;/span&gt;&lt;span class="pln"&gt; i&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;++){
&lt;/span&gt;&lt;span class="pln"&gt;     &lt;span style="color: purple;"&gt;//perform update function on each node&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_222" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;engine&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;parfor_all_local_vertices&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;bond_percolation_function&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);
&lt;/span&gt;&lt;span class="pln"&gt;     &lt;span style="color: purple;"&gt;//wait until all nodes are done&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_223" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;engine&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;wait&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;();
&lt;/span&gt;&lt;span class="pln"&gt;     &lt;span style="color: purple;"&gt;//count the number of unsatisfied links&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_224" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;size_t diff &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;=&lt;/span&gt;&lt;span class="pln"&gt; graph&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;map_reduce_edges&lt;/span&gt;&lt;span class="str" style="color: #008800;"&gt;&amp;lt;size_t&amp;gt;&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;count_component&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_225" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;span style="color: purple;"&gt;//if no more links to explore we are done&lt;/span&gt;&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_226" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;if&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;diff &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;==&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="lit" style="color: #006666;"&gt;0&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;)&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_227" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;break&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;;&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_228" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;}&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;span style="background-color: white; font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; font-size: 11.818181991577148px; white-space: pre-wrap;"&gt;bond_perculation_function &lt;/span&gt;which is executed in parallel across all node. In this function, we traverse all connected edges, and compare their minimum edges id using the &amp;nbsp;&lt;span style="background-color: white; font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; font-size: 11.818181991577148px; white-space: pre-wrap;"&gt;bond_perculation_combine. &lt;/span&gt;As it is easy to verify, each of those implemented function has a single line of code.&amp;nbsp;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;/div&gt;
&lt;table id="src_table_0" style="background-color: white; border-collapse: collapse; color: black; font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; font-size: 12px; margin: 0px; padding: 0px; white-space: pre;"&gt;&lt;tbody style="margin: 0px; padding: 0px;"&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_115" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;/div&gt;
&lt;table id="src_table_0" style="background-color: white; border-collapse: collapse; color: black; font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; font-size: 12px; margin: 0px; padding: 0px; white-space: pre;"&gt;&lt;tbody style="margin: 0px; padding: 0px;"&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_104" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;

&lt;span style="color: purple;"&gt;//return the min component id found across this edges and its two connecting nodes&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_105" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="kwd" style="color: #000088;"&gt;unsigned&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;int&lt;/span&gt;&lt;span class="pln"&gt; bond_percolation_map&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;const&lt;/span&gt;&lt;span class="pln"&gt; graph_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;vertex_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; center&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_106" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;graph_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;edge_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; edge&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_107" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;const&lt;/span&gt;&lt;span class="pln"&gt; graph_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;vertex_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; other&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;)&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;{&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_108" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp;return edge&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;data&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;comp_id &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;=&lt;/span&gt;&lt;span class="pln"&gt; &amp;nbsp;std&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;min&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;std&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;min&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;center&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;data&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;comp_id&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; edge&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;data&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;id&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;),&lt;/span&gt;&lt;span class="pln"&gt; other&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;data&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;comp_id&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_109" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_112" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pun" style="color: #666600;"&gt;}
&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_113" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_115" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="com" style="color: #880000;"&gt;//find min component of two connected edges&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_116" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="kwd" style="color: #000088;"&gt;void&lt;/span&gt;&lt;span class="pln"&gt; bond_percolation_combine&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;unsigned&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;int&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; v1&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;const&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;unsigned&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;int&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; v2&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;)&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;{&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_117" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; v1 &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;=&lt;/span&gt;&lt;span class="pln"&gt; std&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;min&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;v1&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; v2&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_118" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_120" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pun" style="color: #666600;"&gt;}&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_121" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_122" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="com" style="color: #880000;"&gt;//the main update function, go over all nodes and selects their min edge id&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_123" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="kwd" style="color: #000088;"&gt;void&lt;/span&gt;&lt;span class="pln"&gt; bond_percolation_function&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;engine_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;context_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; context&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_124" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; graph_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;vertex_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; vertex&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;)&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;{&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_126" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;vertex&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;data&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;comp_id &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;=&lt;/span&gt;&lt;span class="pln"&gt; &amp;nbsp;context&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;map_reduce&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;lt;&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;unsigned&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;int&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;gt;(&lt;/span&gt;&lt;span class="pln"&gt;BOND_PERCOLATION_MAP_REDUCE&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;,&lt;/span&gt;&lt;span class="pln"&gt; graphlab&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;ALL_EDGES&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_127" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_129" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pun" style="color: #666600;"&gt;}&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both;"&gt;
&lt;/div&gt;
&lt;table id="src_table_0" style="background-color: white; border-collapse: collapse; color: black; font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; font-size: 12px; margin: 0px; padding: 0px; white-space: pre;"&gt;&lt;tbody style="margin: 0px; padding: 0px;"&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_115" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="com" style="color: #880000;"&gt;//&lt;/span&gt;&lt;span class="com"&gt;count the number of components that are still not satisfied. When the number of components are zero, we are done&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div class="separator" style="clear: both;"&gt;
&lt;/div&gt;
&lt;table id="src_table_0" style="background-color: white; border-collapse: collapse; color: black; font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; font-size: 12px; margin: 0px; padding: 0px; white-space: pre;"&gt;&lt;tbody style="margin: 0px; padding: 0px;"&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_97" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;size_t count_components&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="kwd" style="color: #000088;"&gt;const&lt;/span&gt;&lt;span class="pln"&gt; graph_type&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;::&lt;/span&gt;&lt;span class="pln"&gt;edge_type &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;&amp;amp;&lt;/span&gt;&lt;span class="pln"&gt; edge&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;)&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;{&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_98" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pln"&gt;&amp;nbsp; &lt;span style="color: #000088;"&gt;return&lt;/span&gt;&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;=&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;(&lt;/span&gt;&lt;span class="pln"&gt;edge&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;source&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;data&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;comp_id &lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;!=&lt;/span&gt;&lt;span class="pln"&gt; edge&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;.&lt;/span&gt;&lt;span class="pln"&gt;target&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;data&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;().&lt;/span&gt;&lt;span class="pln"&gt;comp_id&lt;/span&gt;&lt;span class="pun" style="color: #666600;"&gt;);&lt;/span&gt;&lt;span class="pln"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_99" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr id="sl_svn5e9fb752765eb83448cada5d30befa51bb3d0386_103" style="margin: 0px; padding: 0px;"&gt;&lt;td class="source" style="font-size: 12px; margin: 0px; padding: 0px 0px 0px 4px; vertical-align: top; white-space: pre-wrap;"&gt;&lt;span class="pun" style="color: #666600;"&gt;}&lt;/span&gt;&lt;span class="pln"&gt; &lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
The full code is &lt;a href="https://code.google.com/p/graphlabapi/source/browse/toolkits/graph_analytics/gl3bond_percolation.cpp?name=v2.2"&gt;here&lt;/a&gt;. It is now part of the graph_analytics toolkit in v2.2. Needless to say it is working.. :-)&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/syQKcNFCJ18" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/5940358903590493069/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/bond-percolation-in-graphlab.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/5940358903590493069?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/5940358903590493069?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/syQKcNFCJ18/bond-percolation-in-graphlab.html" title="Bond Percolation in GraphLab" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/bond-percolation-in-graphlab.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkAGRno5cSp7ImA9WhBbE0k.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-3282928756796670737</id><published>2013-05-11T23:38:00.001-07:00</published><updated>2013-05-11T23:38:47.429-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-11T23:38:47.429-07:00</app:edited><title>Hadoop Mortar</title><content type="html">Just got from my boss &lt;a href="http://www.cs.washington.edu/people/faculty/guestrin/"&gt;Prof. Carlos Guestrin&lt;/a&gt; this link:&amp;nbsp;&lt;a href="http://blog.mortardata.com/post/49934459499/recommender-systems-for-free"&gt;http://blog.mortardata.com/post/49934459499/recommender-systems-for-free&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
It looks like a nice publicity stunt - &lt;a href="http://www.mortardata.com/"&gt;Hadoop Mortar&lt;/a&gt; is proposing to build recommender systems for free for selected companies. I wonder if this will actually work?&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/pnaqu9D-IIo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/3282928756796670737/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/hadoop-mortar.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/3282928756796670737?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/3282928756796670737?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/pnaqu9D-IIo/hadoop-mortar.html" title="Hadoop Mortar" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/hadoop-mortar.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEEGQH87eip7ImA9WhBbEUw.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-8881631473017789567</id><published>2013-05-09T08:17:00.000-07:00</published><updated>2013-05-09T08:17:01.102-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-09T08:17:01.102-07:00</app:edited><title>Fun Readings about Startups</title><content type="html">I got the following from my collaborator &lt;a href="http://www.linkedin.com/pub/haijie-gu/26/aa6/a10"&gt;Jay Gu&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
Here's a fun reading about startup: a well written lecture notes on Peter Thiel's course at Stanford, which covers topics like culture, hiring, strategy, lessons and mindset etc. I've read first 5 chapters and found it interesting and rewarding so I decided to share with all of you.&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;div&gt;
Html version&lt;/div&gt;
&lt;div&gt;
&lt;a href="http://blakemasters.com/peter-thiels-cs183-startup" style="color: #1155cc;" target="_blank"&gt;http://blakemasters.com/peter-&lt;wbr&gt;&lt;/wbr&gt;thiels-cs183-startup&lt;/a&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Epub version if you like to read on your iPad&lt;/div&gt;
&lt;div&gt;
&lt;a href="http://stanfy.com/blog/epub-version-of-peter-thiel-lectures-at-stanford-university-cs183-startup/" style="color: #1155cc;" target="_blank"&gt;http://stanfy.com/blog/epub-&lt;wbr&gt;&lt;/wbr&gt;version-of-peter-thiel-&lt;wbr&gt;&lt;/wbr&gt;lectures-at-stanford-&lt;wbr&gt;&lt;/wbr&gt;university-cs183-startup/&lt;/a&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
Below are some fun quotes just give you a quick taste.&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;div&gt;
&lt;span style="font-family: Helvetica Neue;"&gt;&lt;b style="line-height: 20px;"&gt;About hiring good engin&lt;/b&gt;&lt;span style="line-height: 20px;"&gt;&lt;b&gt;eers fr&lt;/b&gt;&lt;/span&gt;&lt;b style="line-height: 20px;"&gt;om "Google":&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Helvetica Neue;"&gt;&lt;span style="line-height: 20px;"&gt;&lt;b&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;"&lt;/b&gt;&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;So the way to compete against the giants is not with money. Google will outbid you. They have oil derrick that spits out $30bn in search revenue every year. To win, you need to tell a story about cogs. At Google, you’re a cog. Whereas with me, you’re an instrumental piece of this great thing that we’ll build together. Articulate the vision. Don’t even try to pay well. Meet people’s cash flow needs. Pay them so they can cover their rent and go out every once in awhile. It’s not about cash. It’s about breaking through the wall of cynicism. It’s about making 1% of this new thing way more exciting than a couple hundred grand and a cubicle at Google."&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Helvetica Neue;"&gt;&lt;span style="line-height: 20px;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;"&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;We tend to massively underestimate the compounding returns of intelligence. As humans, we need to solve big problems. If you graduate Stanford at 22 and Google recruits you, you’ll work a 9-to-5. It’s probably more like an 11-to-3 in terms of hard work. They’ll pay well. It’s relaxing. But what they are actually doing is paying you to accept a much lower intellectual growth rate. When you recognize that intelligence is compounding, the cost of that missing long-term compounding is enormous.&amp;nbsp;&lt;/span&gt;&lt;em style="border: 0px; line-height: 20px; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline;"&gt;They’re not giving you the best opportunity of your life&lt;/em&gt;&lt;span style="line-height: 20px;"&gt;. Then a scary thing can happen: You might realize one day that you’ve lost your competitive edge. You won’t be the best anymore. You won’t be able to fall in love with new stuff. Things are cushy where you are. You get complacent and stall. So, run your prospective engineering hires through that narrative. Then show them the alternative: working at your startup."&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;b&gt;&lt;span style="font-family: Helvetica Neue;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;b&gt;&lt;span style="font-family: Helvetica Neue;"&gt;The fundamental question&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;span style="color: #252525; font-family: Helvetica Neue;"&gt;&lt;span style="line-height: 20px;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;"&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;The path from 0 to 1 might start with asking and answering three questions:&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;&amp;nbsp;First, what is valuable?&amp;nbsp;&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;Second, what can I do?&amp;nbsp;&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;And third, what is nobody else doing?&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;div&gt;
&lt;span style="color: #252525; font-family: Helvetica Neue;"&gt;&lt;span style="line-height: 20px;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;The intellectual rephrasing of these questions is:&lt;/span&gt;&lt;span style="line-height: 20px;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="border: 0px; line-height: 20px; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline;"&gt;What important truth do very few people agree with you on?&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="border: 0px; line-height: 20px; margin-bottom: 20px; outline: 0px; padding: 0px; vertical-align: baseline;"&gt;
&lt;span style="color: #252525; font-family: Helvetica Neue;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;The business version is:&lt;span style="border: 0px; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline;"&gt;&amp;nbsp;What valuable company is nobody building&lt;/span&gt;?"&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;b&gt;&lt;span style="font-family: Helvetica Neue;"&gt;About escaping competition:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Helvetica Neue;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;"&lt;span style="line-height: 20px;"&gt;Intense competition makes things hard because you just beat heads with other people. The intensity of competition becomes a proxy for value. But value is a different question entirely."&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Helvetica Neue;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;b&gt;&lt;span style="font-family: Helvetica Neue;"&gt;About how to own a market:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="line-height: 20px;"&gt;&lt;span style="font-family: Helvetica Neue;"&gt;&amp;nbsp; &amp;nbsp;&amp;nbsp; "For a company to own its market, it must have some combination of brand, scale cost advantages, network effects, or proprietary technology."&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;span style="font-family: Helvetica Neue;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;b&gt;&lt;span style="font-family: Helvetica Neue;"&gt;About hiring nerds vs athletes:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;span style="font-family: Helvetica Neue;"&gt;&lt;span style="white-space: pre-wrap;"&gt; &lt;/span&gt;"&lt;span style="line-height: 20px;"&gt;In thinking about building good company culture, it may be helpful to dichotomize two extreme personality types: nerds and athletes. Engineers and STEM people tend to be highly intelligent, good at problem solving, and naturally non zero-sum. Athletes tend to be highly motivated fighters; you only win if the other guy loses."&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;span style="line-height: 20px;"&gt;&lt;span style="font-family: Helvetica Neue;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;"The optimal spot on the matrix is monopoly capitalism with some tailored combination of zero-sum and non zero-sum oriented people. You want to pick an environment where you don’t have to fight. But you should bring along some good fighters to protect your non zero-sum people and mission, just in case."&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/CotBxSGs0BE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/8881631473017789567/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/fun-readings-about-startups.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/8881631473017789567?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/8881631473017789567?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/CotBxSGs0BE/fun-readings-about-startups.html" title="Fun Readings about Startups" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/fun-readings-about-startups.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEEDQnsycCp7ImA9WhBbEU0.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-3934865322035528493</id><published>2013-05-09T06:37:00.005-07:00</published><updated>2013-05-09T06:37:53.598-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-09T06:37:53.598-07:00</app:edited><title>New collaborative filtering functionality in GraphLab and GraphChi</title><content type="html">&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;A few days ago I wrote in this blog about &lt;a href="http://bickson.blogspot.co.il/2013/05/speeding-up-parallel-als.html"&gt;parallel coordinate descent for speeding up ALS&lt;/a&gt;.&lt;/span&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;I have just implemented parallel ALS using coordinate descent a.k.a.&lt;/span&gt;&lt;br /&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
CCD++ algorithm. The algorithm is described in the following two papers:&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;
&lt;pre style="background-color: #fbfcfd; border: 1px solid rgb(196, 207, 229); font-family: monospace, fixed; font-size: 14px; line-height: 17px; margin: 4px 8px 4px 2px; overflow: auto; padding: 4px 6px; white-space: pre-wrap; word-wrap: break-word;"&gt;H.-F. Yu, C.-J. Hsieh, S. Si, I. S. Dhillon, Scalable Coordinate Descent Approaches to Parallel Matrix Factorization for Recommender Systems. IEEE International Conference on Data Mining(ICDM), December 2012.&amp;nbsp;&lt;/pre&gt;
&lt;pre style="background-color: #fbfcfd; border: 1px solid rgb(196, 207, 229); font-family: monospace, fixed; font-size: 14px; line-height: 17px; margin: 4px 8px 4px 2px; overflow: auto; padding: 4px 6px; white-space: pre-wrap; word-wrap: break-word;"&gt;Steffen Rendle, Zeno Gantner, Christoph Freudenthaler, and Lars Schmidt-Thieme. Fast context-aware recommendations with factorization machines. In Proceedings of the 34th international ACM SIGIR conference on Research and development in Information Retrieval (SIGIR '11). ACM, New York, NY, USA, 635-644.&lt;/pre&gt;
&lt;div&gt;
&lt;div&gt;
In a nutshell, it speeds up ALS by avoiding the need for costly least square computation, each dimension (coordinate) is handled&amp;nbsp;separately in parallel.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Documentation of the method for GraphChi is here:&amp;nbsp;&lt;/div&gt;
&lt;div&gt;
&lt;a href="http://bickson.blogspot.co.il/2012/12/collaborative-filtering-with-graphchi.html" style="color: #1155cc;" target="_blank"&gt;http://bickson.blogspot.co.il/&lt;wbr&gt;&lt;/wbr&gt;2012/12/collaborative-&lt;wbr&gt;&lt;/wbr&gt;filtering-with-graphchi.html&lt;/a&gt;&lt;/div&gt;
&lt;div&gt;
Documentation of the method for GraphLab is here:&lt;/div&gt;
&lt;div&gt;
&lt;a href="http://docs.graphlab.org/collaborative_filtering.html" style="color: #1155cc;" target="_blank"&gt;http://docs.graphlab.org/&lt;wbr&gt;&lt;/wbr&gt;collaborative_filtering.html&lt;/a&gt;&lt;/div&gt;
&lt;div&gt;
Note: For GraphLab the algorithm is implemented in version 2.2 which will be release this summer. It is still possible to checkout this version and try it out using the mercurial command "hg up v2.2".&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Let me know if you try it out!&lt;/div&gt;
&lt;div&gt;
&amp;nbsp;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/Zh6zTdUzumk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/3934865322035528493/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/new-collaborative-filtering.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/3934865322035528493?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/3934865322035528493?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/Zh6zTdUzumk/new-collaborative-filtering.html" title="New collaborative filtering functionality in GraphLab and GraphChi" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/new-collaborative-filtering.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A04MRn0zcCp7ImA9WhBUGE4.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-8705629454704241920</id><published>2013-05-06T00:06:00.002-07:00</published><updated>2013-05-06T04:33:07.388-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-06T04:33:07.388-07:00</app:edited><title>Speeding up parallel ALS</title><content type="html">My collaborator &lt;a href="http://www.cs.cmu.edu/~jegonzal/"&gt;Joey Gonzalez&lt;/a&gt; sent me the following paper:&lt;br /&gt;
&lt;span style="background-color: white;"&gt;H.-F. Yu,&amp;nbsp;C.-J. Hsieh, S. Si, I. S. Dhillon,&amp;nbsp;&lt;a href="http://www.cs.utexas.edu/~cjhsieh/icdm-pmf.pdf"&gt;Scalable Coordinate Descent Approaches to Parallel Matrix Factorization for Recommender Systems.&amp;nbsp;&lt;/a&gt;&lt;i&gt;IEEE International Conference on Data Mining(ICDM), December 2012.&lt;/i&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="background-color: white;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="background-color: white;"&gt;Which got the best paper award in ICDM 2012. It is a well written a paper proposing a technique for speeding up ALS (alternating least squares) when executed in parallel, by using coordinate descent. It also has a good review of the two highly repeating building blocks in matrix factorization algorithms: alternating least squares and stochastic gradient descent and a discussion on how to parallelize them.&lt;/span&gt;&lt;br /&gt;
&lt;span style="background-color: white;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="background-color: white;"&gt;When I read the paper, it reminded me of previous work of Steffen Rendle, which I shared with my blog readers &lt;a href="http://bickson.blogspot.co.il/2012/08/steffen-rendle-libfm.html"&gt;here&lt;/a&gt;:&lt;/span&gt;&lt;br /&gt;
Steffen Rendle, Zeno Gantner, Christoph Freudenthaler, and Lars Schmidt-Thieme. 2011. F&lt;a href="http://www.informatik.uni-konstanz.de/rendle/pub0/sigir-2011/"&gt;ast context-aware recommendations with factorization machines&lt;/a&gt;. In Proceedings of the 34th international ACM SIGIR conference on Research and development in Information Retrieval (SIGIR '11). ACM, New York, NY, USA, 635-644. 
&lt;br /&gt;
&lt;br /&gt;
Basically, it is the same construction. (Unfortunately it seems that ICDM guys where not aware to the previous SIGIR paper).&lt;br /&gt;
&lt;br /&gt;
Here is the algorithm update rule from Steffen's paper:&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-E4_qqsuT_6g/UYdVWyYcNjI/AAAAAAAA728/EAuYln9htVY/s1600/Screen+Shot+2013-05-06+at+10.00.50+AM.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="121" src="http://2.bp.blogspot.com/-E4_qqsuT_6g/UYdVWyYcNjI/AAAAAAAA728/EAuYln9htVY/s400/Screen+Shot+2013-05-06+at+10.00.50+AM.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
And here is the update rule from ICDM paper:&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-lnRnZs92ebc/UYdV1dhlo_I/AAAAAAAA73E/OWLDDyqOg2k/s1600/Screen+Shot+2013-05-06+at+10.03.02+AM.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="87" src="http://3.bp.blogspot.com/-lnRnZs92ebc/UYdV1dhlo_I/AAAAAAAA73E/OWLDDyqOg2k/s400/Screen+Shot+2013-05-06+at+10.03.02+AM.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
Anyway aside from slightly different notations it is basically the same algorithm. And the conclusions of both papers are identical: this constructions improves performance of parallel ALS, and have favorable performance relative to SGD.&lt;br /&gt;
&lt;br /&gt;
I asked Prof. Inderjit Dillon from The University of Texas at Austin about the relation between the papers and I got the following reply:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;i&gt;Solving the one-variable problem is simple, and was not intended to count as a "contribution" (we had similar 1-variable solutions in an &lt;a href="http://users.cis.fiu.edu/~lzhen001/activities/KDD2011Program/docs/p1064.pdf"&gt;earlier paper on coordinate descent for NMF, which appeared in KDD 2011&lt;/a&gt;). The main contribution of our ICDM paper is to present a parallel coordinate descent algorithm (on multi-core and distributed memory machines) for matrix factorization for missing value estimation. The results show it outperforms ALS and SGD, which is what most people seem to use in industry (and which is what prompted me to investigate co-ordinate descent for the problem).
&lt;/i&gt;&lt;/blockquote&gt;
To anyone who is interested in reading more, my recommendation is to read first the ICDM paper for getting a general overview, and then read the SIGIR paper for getting more specific.&lt;br /&gt;
&lt;br /&gt;
To those of you who are interested in additional parallel coordinate descent method for &amp;nbsp;L1 loss function, you are welcome to take a look at our ICML paper described &lt;a href="http://bickson.blogspot.co.il/2011/06/large-scale-logistic-regression-on.html"&gt;here&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/xnj0cVq2CTE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/8705629454704241920/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/speeding-up-parallel-als.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/8705629454704241920?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/8705629454704241920?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/xnj0cVq2CTE/speeding-up-parallel-als.html" title="Speeding up parallel ALS" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-E4_qqsuT_6g/UYdVWyYcNjI/AAAAAAAA728/EAuYln9htVY/s72-c/Screen+Shot+2013-05-06+at+10.00.50+AM.png" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/speeding-up-parallel-als.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEEMSXs6cCp7ImA9WhBUFU8.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-1471698734628025336</id><published>2013-05-02T12:24:00.000-07:00</published><updated>2013-05-02T12:24:48.518-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-02T12:24:48.518-07:00</app:edited><title>GraphLab Challenge @ SC13</title><content type="html">Just learned from my boss &lt;a href="http://www.cs.washington.edu/people/faculty/guestrin/"&gt;Prof. Carlos Guestrin&lt;/a&gt; about &lt;a href="http://sc13.supercomputing.org/content/student-cluster-competition"&gt;student cluster competition&lt;/a&gt; which is part of SC13 conference. The interesting part is the GraphLab programming is one of the&amp;nbsp;challenges:&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;i&gt;&lt;strong&gt;• &amp;nbsp;&amp;nbsp;&amp;nbsp;GraphLab(rador)&lt;/strong&gt;&lt;a href="http://graphlab.org/" style="color: #314c92; text-decoration: none;"&gt;&lt;strong&gt;http://graphlab.org&lt;/strong&gt;&lt;/a&gt;The GraphLab project started in 2009 to develop a new parallel computation abstraction tailored to machine learning. GraphLab scales to graphs with billions of vertices and edges easily, performing orders of magnitude faster than competing systems. GraphLab combines advances in machine learning algorithms, asynchronous distributed graph computation, prioritized scheduling, and graph placement with optimized low-level system design and efficient data-structures to achieve unmatched performance and scalability in challenging machine learning tasks.&lt;br /&gt;The GraphLab project consists of a core C++ GraphLab API and a collection of high-performance machine learning and data mining toolkits built on top of the GraphLab API. The API is built on top of standard cluster and cloud technologies: interprocess communication is accomplished over TCP-IP and MPI is used to launch and manage GraphLab programs. Each GraphLab process is multithreaded to fully utilize the multicore resources available on modern cluster nodes. GraphLab supports reading and writing to both Posix and HDFS filesystems.&lt;/i&gt;&lt;/blockquote&gt;
We will keep an eye to hear about the outcome of this contest...&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/I6DbQ8h-Hos" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/1471698734628025336/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/05/graphlab-challenge-sc13.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/1471698734628025336?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/1471698734628025336?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/I6DbQ8h-Hos/graphlab-challenge-sc13.html" title="GraphLab Challenge @ SC13" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/05/graphlab-challenge-sc13.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MDR30-eip7ImA9WhBUE0Q.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-2075989314854411822</id><published>2013-04-30T03:02:00.001-07:00</published><updated>2013-05-01T02:11:16.352-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-01T02:11:16.352-07:00</app:edited><title>Recsys 2013: Yelp! Business Prediction Contest</title><content type="html">&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;I got an interesting email from &lt;a href="http://gr.linkedin.com/in/ampazis/"&gt;Prof. Nicholas Ampazis&lt;/a&gt;&amp;nbsp;from University of Aegean, Greece. Nicholas is trying out GraphChi gensgd for &lt;a href="https://www.kaggle.com/c/yelp-recsys-2013"&gt;Kaggle's Yelp! business prediction contest&lt;/a&gt;&amp;nbsp;which is part of Recsys 2013.&lt;/span&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;First he sent me some interesting observations about the dataset:&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;i&gt;- There are 2108 training users in the ratings (review) matrix that do&amp;nbsp;&lt;/i&gt;&lt;/span&gt;&lt;i style="color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;appear in the training users file. The reverse is not true (i.e. all&amp;nbsp;&lt;/i&gt;&lt;i style="color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;users in the training user file have ratings).&lt;/i&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;i&gt;- All business_ids in review appear in the business file&lt;/i&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;i&gt;- There are 5315 users for which we wish to make predictions that do&amp;nbsp;&lt;/i&gt;&lt;/span&gt;&lt;i style="color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;not appear in the ratings matrix.&lt;/i&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;i&gt;- There are 1205 businness_ids &amp;nbsp;for which we wish to make predictions&amp;nbsp;&lt;/i&gt;&lt;/span&gt;&lt;i style="color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;that do not appear in the ratings matrix (those always come in pairs&amp;nbsp;&lt;/i&gt;&lt;i style="color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;with the unknown users above).&lt;/i&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;i&gt;- The union of (distinct) users in the ratings matrix, training user&amp;nbsp;&lt;/i&gt;&lt;/span&gt;&lt;i style="color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;file and test user file is 51082&lt;/i&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;i&gt;- The union of (distinct) business_ids in the ratings matrix, training&amp;nbsp;&lt;/i&gt;&lt;/span&gt;&lt;i style="color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;business file and test business file is 12742&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;Nicholas has kindly agreed to share with us some of the scripts he is using, to convert the Yelp! data to GraphChi:&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;(written by his colleague Vaggelis&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;Tripolitakis - thanks!!).&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Disclaimer: we did not fine tune performance of gensgd yet so prediction quality is still poor. We plan to refine execution in the next couple of days and report results here.&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
0) Register to the competition &lt;a href="https://www.kaggle.com/c/yelp-recsys-2013/data"&gt;here&lt;/a&gt; and download the datasets into your root GraphChi folder.&lt;br /&gt;
&lt;br /&gt;
1) Download the conversion scripts from GitHub:&lt;br /&gt;
&lt;a href="https://github.com/vtripolitakis/yelpscripts" style="background-color: white; color: #1155cc; font-family: arial, sans-serif; font-size: 12.800000190734863px;" target="_blank"&gt;https://github.com/&lt;wbr&gt;&lt;/wbr&gt;vtripolitakis/yelpscripts&lt;/a&gt;&lt;br /&gt;
2) Give running permission to the script:&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;# chmod a+rx script&lt;/span&gt;&lt;br /&gt;
&lt;br style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;" /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;3) Verify that json ruby library is present using:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;# sudo gem install json&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
Note: if you do not have root permission on your machine, install the package using&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;# gem install json&lt;/span&gt;&lt;br /&gt;
and add the locally created gem folder into your path, for example:&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;#&amp;nbsp;export PATH=$PATH:/home/bickson/.gem/ruby/1.8/bin&lt;/span&gt;&lt;br /&gt;
&lt;br style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;" /&gt;
4) Use the following instructions for converting the data to GraphChi format&lt;br /&gt;
(hint: use copy &amp;amp; paste!)&lt;br /&gt;
&lt;br /&gt;
&lt;div class="im" style="background-color: white; color: #500050; font-size: 12.800000190734863px;"&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;###################### TRAINING SET ##########################&lt;br /&gt;&lt;br /&gt;#---REVIEW---&lt;br /&gt;./script yelp_training_set/yelp_&lt;wbr&gt;&lt;/wbr&gt;training_set_review.json user_id&amp;nbsp;business_id date votes stars &amp;gt; yelp_training_set_review.csv&lt;br /&gt;&lt;br /&gt;#----USER---&lt;br /&gt;./script yelp_training_set/yelp_&lt;wbr&gt;&lt;/wbr&gt;training_set_user.json user_id&amp;nbsp;review_count average_stars name votes &amp;gt; yelp_training_set_user.csv&lt;br /&gt;&lt;br /&gt;#----BUSINESS----&lt;br /&gt;./script yelp_training_set/yelp_&lt;wbr&gt;&lt;/wbr&gt;training_set_business.json business_id&amp;nbsp;open city state review_count longitude latitude categories name neighborhoods full_address stars &amp;gt;&amp;nbsp;yelp_training_set_business.csv&lt;br /&gt;&lt;br /&gt;##############################&lt;wbr&gt;&lt;/wbr&gt;##############################&lt;wbr&gt;&lt;/wbr&gt;##&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;###################### TEST SET ##########################&lt;br /&gt;&lt;br /&gt;#---REVIEW---&lt;br /&gt;./script yelp_test_set/yelp_test_set_&lt;wbr&gt;&lt;/wbr&gt;review.json user_id business_id &amp;gt;&amp;nbsp;yelp_test_set_review.csv&lt;br /&gt;&lt;br /&gt;#----USER---&lt;/span&gt;&lt;/div&gt;
&lt;div class="im" style="background-color: white; color: #500050; font-size: 12.800000190734863px;"&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;./script yelp_test_set/yelp_test_set_&lt;wbr&gt;&lt;/wbr&gt;user.json user_id review_count &amp;gt;&amp;nbsp;yelp_test_set_user.csv&lt;br /&gt;&lt;br /&gt;#----BUSINESS----&lt;/span&gt;&lt;/div&gt;
&lt;div class="im" style="background-color: white; color: #500050; font-size: 12.800000190734863px;"&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;./script yelp_test_set/yelp_test_set_&lt;wbr&gt;&lt;/wbr&gt;business.json business_id open&amp;nbsp;city state review_count longitude latitude categories name&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: 'Courier New', Courier, monospace; font-size: 12.800000190734863px;"&gt;neighborhoods full_address&lt;/span&gt;&lt;span style="font-family: 'Courier New', Courier, monospace; font-size: 12.800000190734863px;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: 'Courier New', Courier, monospace; font-size: 12.800000190734863px;"&gt;&amp;gt; yelp_test_set_business.csv&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&lt;br /&gt;##############################&lt;wbr&gt;&lt;/wbr&gt;##############################&lt;wbr&gt;&lt;/wbr&gt;##&lt;/span&gt;&lt;/div&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&lt;span style="background-color: white; color: #222222; font-size: 12.800000190734863px;"&gt;######### CONCATENATE USER/BUSINESS FILES FROM TRAIN AND&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; color: #222222; font-size: 12.800000190734863px;"&gt;TEST ##########################&lt;/span&gt;&lt;br style="background-color: white; color: #222222; font-size: 12.800000190734863px;" /&gt;&lt;br style="background-color: white; color: #222222; font-size: 12.800000190734863px;" /&gt;&lt;span style="background-color: white; color: #222222; font-size: 12.800000190734863px;"&gt;cat yelp_training_set_user.csv yelp_test_set_user.csv &amp;gt; user_file.csv&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&lt;span class="il" style="background-color: #ffffcc; color: #222222; font-size: 12.800000190734863px;"&gt;cat&lt;/span&gt;&lt;span style="background-color: white; color: #222222; font-size: 12.800000190734863px;"&gt;&amp;nbsp;yelp_training_set_business.csv yelp_test_set_business.csv &amp;gt;&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white; color: #222222; font-size: 12.800000190734863px;"&gt;business_file.csv&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
5) Run GraphChi GENSGD&lt;br /&gt;
a) First trial: run using reviews only (without user and business information)&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;bickson@thrust:~/graphchi$ ./toolkits/collaborative_filtering/gensgd --training=yelp_training_set_review.csv --test=yelp_test_set_review.csv --from_pos=0 --to_pos=1 --val_pos=2 --rehash=1 --gensgd_mult_dec=0.999999 --quiet=1 --file_columns=3 --minval=1 --maxval=5 &amp;nbsp;--clean_cache=1&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;WARNING: &amp;nbsp;common.hpp(print_copyright:180): GraphChi Collaborative filtering library is written by Danny Bickson (c). Send any &amp;nbsp;comments or bug reports to danny.bickson@gmail.com&amp;nbsp;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[training] =&amp;gt; [yelp_training_set_review.csv]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[test] =&amp;gt; [yelp_test_set_review.csv]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[from_pos] =&amp;gt; [0]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[to_pos] =&amp;gt; [1]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[val_pos] =&amp;gt; [2]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[rehash] =&amp;gt; [1]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[gensgd_mult_dec] =&amp;gt; [0.999999]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[quiet] =&amp;gt; [1]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[file_columns] =&amp;gt; [3]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[minval] =&amp;gt; [1]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[maxval] =&amp;gt; [5]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[clean_cache] =&amp;gt; [1]&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;...&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;2.60862) Iteration: &amp;nbsp; 0 Training RMSE: &amp;nbsp; &amp;nbsp;1.22329&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;3.39838) Iteration: &amp;nbsp; 1 Training RMSE: &amp;nbsp; &amp;nbsp;1.18201&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp; 4.2365) Iteration: &amp;nbsp; 2 Training RMSE: &amp;nbsp; &amp;nbsp;1.16143&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;5.04867) Iteration: &amp;nbsp; 3 Training RMSE: &amp;nbsp; &amp;nbsp;1.14613&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;5.89126) Iteration: &amp;nbsp; 4 Training RMSE: &amp;nbsp; &amp;nbsp;1.13354&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;6.70683) Iteration: &amp;nbsp; 5 Training RMSE: &amp;nbsp; &amp;nbsp; 1.1225&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;Found 2466 new test users with no information about them in training dataset!&lt;/span&gt;&lt;br /&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
b) second run: throw in user information&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;bickson@thrust:~/graphchi$ ./toolkits/collaborative_filtering/gensgd --training=yelp_training_set_review.csv --test=yelp_test_set_review.csv --from_pos=0 --to_pos=1 --val_pos=2 --rehash=1 --gensgd_mult_dec=0.999999 --quiet=1 --file_columns=3 --minval=1 --maxval=5 &lt;b&gt;--user_file=user_file.csv&lt;/b&gt; &amp;nbsp;--clean_cache=1&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;WARNING: &amp;nbsp;common.hpp(print_copyright:180): GraphChi Collaborative filtering library is written by Danny Bickson (c). Send any &amp;nbsp;comments or bug reports to danny.bickson@gmail.com&amp;nbsp;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[training] =&amp;gt; [yelp_training_set_review.csv]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[test] =&amp;gt; [yelp_test_set_review.csv]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[from_pos] =&amp;gt; [0]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[to_pos] =&amp;gt; [1]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[val_pos] =&amp;gt; [2]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[rehash] =&amp;gt; [1]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[gensgd_mult_dec] =&amp;gt; [0.999999]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[quiet] =&amp;gt; [1]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[file_columns] =&amp;gt; [3]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[minval] =&amp;gt; [1]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[maxval] =&amp;gt; [5]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[user_file] =&amp;gt; [user_file.csv]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;[clean_cache] =&amp;gt; [1]&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: 'Courier New', Courier, monospace; font-size: x-small;"&gt;...&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: 'Courier New', Courier, monospace; font-size: x-small;"&gt;&amp;nbsp;&amp;nbsp; 3.14781) Iteration: &amp;nbsp; 0 Training RMSE: &amp;nbsp; &amp;nbsp;1.21868&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;4.17876) Iteration: &amp;nbsp; 1 Training RMSE: &amp;nbsp; &amp;nbsp;1.10707&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;5.20784) Iteration: &amp;nbsp; 2 Training RMSE: &amp;nbsp; &amp;nbsp;1.05591&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;6.28441) Iteration: &amp;nbsp; 3 Training RMSE: &amp;nbsp; &amp;nbsp;1.01406&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;7.31922) Iteration: &amp;nbsp; 4 Training RMSE: &amp;nbsp; 0.975489&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; &amp;nbsp;8.33992) Iteration: &amp;nbsp; 5 Training RMSE: &amp;nbsp; 0.939978&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;Found 2466 new test users with no information about them in training dataset!&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
c) third run: throw in also business information:&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="im" style="background-color: white;"&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;bickson@thrust:~/graphchi$ ./toolkits/collaborative_filtering/gensgd --training=yelp_training_set_review.csv --test=yelp_test_set_review.csv --from_pos=0 --to_pos=1 --val_pos=2 --rehash=1 --gensgd_mult_dec=0.999999 --quiet=1 --file_columns=3 --minval=1 --maxval=5 --user_file=user_file.csv &lt;b&gt;--item_file=business_file.csv&lt;/b&gt; --clean_cache=1&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;WARNING: &amp;nbsp;common.hpp(print_copyright:180): GraphChi Collaborative filtering library is written by Danny Bickson (c). Send any &amp;nbsp;comments or bug reports to danny.bickson@gmail.com&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[training] =&amp;gt; [yelp_training_set_review.csv]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[test] =&amp;gt; [yelp_test_set_review.csv]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[from_pos] =&amp;gt; [0]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[to_pos] =&amp;gt; [1]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[val_pos] =&amp;gt; [2]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[rehash] =&amp;gt; [1]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[gensgd_mult_dec] =&amp;gt; [0.999999]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[quiet] =&amp;gt; [1]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[file_columns] =&amp;gt; [3]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[minval] =&amp;gt; [1]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[maxval] =&amp;gt; [5]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[user_file] =&amp;gt; [user_file.csv]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[item_file] =&amp;gt; [business_file.csv]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;[clean_cache] =&amp;gt; [1]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;...&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;&amp;nbsp; &amp;nbsp;3.62809) Iteration: &amp;nbsp; 0 Training RMSE: &amp;nbsp; &amp;nbsp;1.29575&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;&amp;nbsp; &amp;nbsp;5.10944) Iteration: &amp;nbsp; 1 Training RMSE: &amp;nbsp; &amp;nbsp;1.06187&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;&amp;nbsp; &amp;nbsp;6.50959) Iteration: &amp;nbsp; 2 Training RMSE: &amp;nbsp; 0.995394&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;&amp;nbsp; &amp;nbsp;7.92686) Iteration: &amp;nbsp; 3 Training RMSE: &amp;nbsp; 0.947596&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;&amp;nbsp; &amp;nbsp;9.35034) Iteration: &amp;nbsp; 4 Training RMSE: &amp;nbsp; 0.906372&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;&amp;nbsp; &amp;nbsp;10.7604) Iteration: &amp;nbsp; 5 Training RMSE: &amp;nbsp; &amp;nbsp;&lt;b&gt;0.86826&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #500050; font-family: Courier New, Courier, monospace;"&gt;&lt;span style="font-size: 12.727272033691406px;"&gt;Found 2466 new test users with no information about them in training dataset!&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div style="color: #500050;"&gt;
&lt;div style="font-size: 12.800000190734863px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;b&gt;Conclusion: including user and business properties significantly improves prediction performance.&lt;/b&gt;&lt;br /&gt;
&lt;div style="font-size: 12.800000190734863px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="font-size: 12.800000190734863px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
The output of gensgd is the file&amp;nbsp;yelp_test_set_review.csv.predict&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;%%MatrixMarket (null)&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;22956 1&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp; &amp;nbsp;1.4793704&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;N/A&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp; &amp;nbsp;3.3002301&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp; &amp;nbsp;2.8208445&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp; &amp;nbsp;4.0713396&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;N/A&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp; &amp;nbsp;3.1468302&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp; &amp;nbsp;3.6243955&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Next: soon I will post some update about performance of GraphChi &amp;amp; how to create the submission format out of GraphChi.&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/t0DYLU1pnIg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/2075989314854411822/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/recsys-2013-yelp-business-prediction.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2075989314854411822?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2075989314854411822?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/t0DYLU1pnIg/recsys-2013-yelp-business-prediction.html" title="Recsys 2013: Yelp! Business Prediction Contest" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/recsys-2013-yelp-business-prediction.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8NSXc5eyp7ImA9WhBUE00.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-4464223586215709102</id><published>2013-04-27T11:56:00.001-07:00</published><updated>2013-04-29T23:21:38.923-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-29T23:21:38.923-07:00</app:edited><title>Incremental SVD</title><content type="html">Here is an email I got from Prof. Magnasco from Rockefeller University, NY:&lt;br /&gt;
&lt;br /&gt;
Hi Danny, I've seen some of your posts regarding lanczos and thought I'd ask you.&lt;br /&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
I have a problem where I need to compute the first few hundred eigenvectors/values in the PCA of a large, dense dataset, about ten thousand times five million. (It's an avi of a fluorescence microscopy experiment). For the larger datasets I might not even be able to hold the entire set in memory. The normal approach would be to compute the 10000^2 A*A' matrix and then diagonalize it, the problem being that this matrix entails 12 million dot products of 5M element vectors. So I was hoping to find a method that iteratively computes the eigenvectors without such explicit evaluations. Is Lanczos numerically stable enough for such sizes?&amp;nbsp;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Thanks,&amp;nbsp;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Marcelo&lt;/div&gt;
&lt;div&gt;
&lt;span style="border-collapse: separate; font-family: Helvetica; font-size: 12px;"&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div style="background-color: white; color: #222222; font-size: 12px; margin: 0px;"&gt;
&lt;span style="border-collapse: separate; font-family: Helvetica; font-size: 12px;"&gt;&lt;span style="font-family: Courier;"&gt;______________________________&lt;wbr&gt;&lt;/wbr&gt;____________________________&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span style="border-collapse: separate; font-family: Helvetica; font-size: 12px;"&gt;
&lt;/span&gt;
&lt;br /&gt;
&lt;div style="background-color: white; color: #222222; font-size: 12px; margin: 0px;"&gt;
&lt;span style="border-collapse: separate; font-family: Helvetica; font-size: 12px;"&gt;&lt;span style="font-family: Courier;"&gt;Marcelo Magnasco&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;Box 212, 1230 York Avenue, NY NY10065&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span style="border-collapse: separate; font-family: Helvetica; font-size: 12px;"&gt;
&lt;/span&gt;
&lt;div style="background-color: white; color: #222222; font-size: 12px; margin: 0px;"&gt;
&lt;span style="border-collapse: separate; font-family: Helvetica; font-size: 12px;"&gt;&lt;span style="font-family: Courier;"&gt;Professor and Head,&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;v&amp;nbsp;&lt;a href="tel:%2B1%20212%203278542" style="color: #1155cc;" target="_blank" value="+12123278542"&gt;+1 212 3278542&lt;/a&gt;&amp;nbsp;f&amp;nbsp;&lt;a href="tel:%2B1%20212%203277422" style="color: #1155cc;" target="_blank" value="+12123277422"&gt;+1 212 3277422&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span style="border-collapse: separate; font-family: Helvetica; font-size: 12px;"&gt;
&lt;div style="background-color: white; color: #222222; font-size: 12px; margin: 0px;"&gt;
&lt;span style="font-family: Courier;"&gt;Mathematical Physics Lab&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&lt;a href="http://sur.rockefeller.edu/Plone" style="color: #1155cc;" target="_blank"&gt;http://sur.rockefeller.&lt;wbr&gt;&lt;/wbr&gt;edu/Plone&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-size: 12px; margin: 0px;"&gt;
&lt;span style="font-family: Courier;"&gt;The Rockefeller University&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span style="font-family: Courier;"&gt;&lt;a href="mailto:magnasco@rockefeller.edu" style="color: #1155cc;" target="_blank"&gt;magnasco@rockefeller.edu&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/span&gt;&lt;/div&gt;
As a great coincidence, I just heard about the same problem solution from&amp;nbsp;&lt;a href="http://www.cs.cmu.edu/~beb/"&gt;Byron Boots&lt;/a&gt;, a postdoc in UW:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 12.800000190734863px;"&gt;The incremental SVD papers that I am thinking of have been written by Matthew Brand. He has several papers on this topic, but the one that I have been using the most is this one:&amp;nbsp;&lt;/span&gt;&lt;a href="http://www.merl.com/papers/docs/TR2006-059.pdf" style="background-color: white; color: #1155cc; font-family: arial, sans-serif; font-size: 12.800000190734863px;" target="_blank"&gt;http://www.merl.com/&lt;wbr&gt;&lt;/wbr&gt;papers/docs/TR2006-059.pdf&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
Unfortunately I am not aware of software package which implements the Brand method. You will probably have to implement it yourself.&lt;br /&gt;
&lt;br /&gt;
Additional resource I found is another paper by Brand:&lt;br /&gt;
&lt;table border="0" style="color: #555555; font-family: 'Lucida Grande', Verdana, Arial, sans-serif; font-size: 13px;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td align="left" style="border: 0px; margin: 0px; padding: 0px 0px 1px;" valign="top"&gt;&lt;br class="Apple-interchange-newline" /&gt;Brand, M.E.,&amp;nbsp;&lt;strong&gt;“&lt;a href="http://www.bradblock.com/Incremental_singular_value_decomposition_of_uncertain_data_with_missing_values.pdf"&gt;Incremental Singular Value Decomposition of Uncertain Data with Missing Values&lt;/a&gt;”&lt;/strong&gt;,&amp;nbsp;&lt;i&gt;European Conference on Computer Vision (ECCV)&lt;/i&gt;, Vol 2350, pps 707-720, May 2002 (&lt;a href="http://link.springer.de/link/service/series/0558/bibs/2350/23500707.htm" style="color: #555555;" target="_external"&gt;Lecture Notes in Computer Science&lt;img alt="" border="0" height="12" src="http://www.merl.com/images/icons/icon_www.gif" style="border-width: 0px; margin: 0px 2px; padding: 0px;" /&gt;&lt;/a&gt;)&lt;br /&gt;&lt;br /&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
Which has a better explanation of the setup.&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/hdT8zXkHmys" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/4464223586215709102/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/incremental-svd.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4464223586215709102?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4464223586215709102?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/hdT8zXkHmys/incremental-svd.html" title="Incremental SVD" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/incremental-svd.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck8AR3s4eip7ImA9WhBVGE4.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-2839855175690033520</id><published>2013-04-24T12:14:00.001-07:00</published><updated>2013-04-24T12:14:06.532-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-24T12:14:06.532-07:00</app:edited><title>ACM KDD CUP 2013</title><content type="html">&lt;a href="http://www.kdd.org/kdd2013/"&gt;ACM KDD (Knowledge Discovery and Data mining) 2013&lt;/a&gt; conference will be held Aug 11-14 in Chicago. The annual KDD CUP competition is organized this year by Microsoft Research.&lt;br /&gt;
There are two tracks:&lt;br /&gt;
1) Identification of authorship of academic papers - &lt;a href="http://www.kaggle.com/c/kdd-cup-2013-author-paper-identification-challenge"&gt;track 1&lt;/a&gt;.&lt;br /&gt;
2) Author disambiguation - &lt;a href="http://www.kaggle.com/c/kdd-cup-2013-author-disambiguation"&gt;track 2&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Following the last couple of years, we hope to see some activity of users who are &lt;a href="http://graphlab.org/"&gt;utilizing GraphLab&lt;/a&gt; for computing part of the solution. I will update more once we get interesting results to report.&lt;br /&gt;
&lt;br /&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/xDNW0AvGce4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/2839855175690033520/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/acm-kdd-cup-2013.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2839855175690033520?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2839855175690033520?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/xDNW0AvGce4/acm-kdd-cup-2013.html" title="ACM KDD CUP 2013" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/acm-kdd-cup-2013.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DU4GR3c7eCp7ImA9WhBVF0w.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-1264322178499304934</id><published>2013-04-23T04:47:00.000-07:00</published><updated>2013-04-23T04:52:06.900-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-23T04:52:06.900-07:00</app:edited><title>Presto: distributed R framework from HP Labs</title><content type="html">I got from my collaborator &lt;a href="http://www.cs.cmu.edu/~akyrola/"&gt;Aapo Kyrola&lt;/a&gt; the following pointer to &lt;a href="http://www.hpl.hp.com/research/documentation.htm"&gt;Presto&lt;/a&gt;.&lt;br /&gt;
Presto is an interesting system which allowed large scale computation in R by distributing the computational workload in a cluster. Presto implements distributed arrays and thus allows efficient implementation of linear algebra primitives like matrix-vector product.&lt;br /&gt;
&lt;br /&gt;
The following two papers where recently published about Presto:&lt;br /&gt;
&lt;br /&gt;
&lt;ul style="background-color: white; font-family: Arial, Verdana, Helvetica, sans-serif; font-size: 12px;"&gt;
&lt;li&gt;&lt;a class="bold" href="http://eurosys2013.tudos.org/" style="color: #003366; font-weight: bold;"&gt;Presto: Distributed Machine Learning and Graph Processing with Sparse Matrices.&lt;/a&gt;&amp;nbsp;Shivaram Venkataraman, Erik Bodzsar, Indrajit Roy, Alvin AuYoung, Rob Schreiber. Eurosys 2013, Prague, Czech Republic.&lt;/li&gt;
&lt;li&gt;&lt;a class="bold" href="https://www.usenix.org/system/files/conference/hotcloud12/hotcloud12-final11.pdf" style="color: #003366; font-weight: bold;"&gt;Using R for Iterative and Incremental Processing.&lt;/a&gt;&amp;nbsp;Shivaram Venkataraman, Indrajit Roy, Alvin AuYoung, Rob Schreiber. HotCloud 2012, Boston, USA.&lt;/li&gt;
&lt;/ul&gt;
It those papers, a large number of applications where implemented in Presto like K-means, ALS&amp;lt; pagerank, vertex centrality, shortest path and others. A large performance gain of x15 - x40 is demonstrated over Hadoop and Spark.&lt;br /&gt;
&lt;br /&gt;
Unfortunately, it is not clear if Presto will be released as an open source project.&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/xonmFArzBuo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/1264322178499304934/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/presto-distributed-r-framework-from.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/1264322178499304934?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/1264322178499304934?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/xonmFArzBuo/presto-distributed-r-framework-from.html" title="Presto: distributed R framework from HP Labs" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/presto-distributed-r-framework-from.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0cDSX08cSp7ImA9WhBVEkQ.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-4382024988740829659</id><published>2013-04-18T07:23:00.002-07:00</published><updated>2013-04-18T07:24:38.379-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-18T07:24:38.379-07:00</app:edited><title>Distributed Dual Decomposition (DDD) in GraphLab</title><content type="html">Our collaborator &lt;a href="http://filebox.ece.vt.edu/~dbatra/"&gt;Dhruv Batra&lt;/a&gt;, from Virginia Tech has kindly contributed DDD code for GraphLab. Here are some explanation about the method and how to deploy it.&lt;br /&gt;
The full documentation is found &lt;a href="http://docs.graphlab.org/graphical_models.html"&gt;here&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;h1 style="-webkit-transition: text-shadow 0.5s linear; background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 20px; margin-right: 15px; transition: text-shadow 0.5s linear;"&gt;
Distributed Dual Decomposition&lt;/h1&gt;
&lt;div style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
Dual Decomposition (DD), also called Lagrangian Relaxation, is a powerful technique with a rich history in Operations Research. DD solves a relaxation of difficult optimization problems by decomposing them into simpler subproblems, solving these simpler subproblems independently and then combining these solutions into an approximate global solution.&lt;/div&gt;
&lt;div style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
More details about DD for solving Maximum A Posteriori (MAP) inference problems in Markov Random Fields (MRFs) can be found in the following:&lt;/div&gt;
&lt;pre class="fragment" style="background-color: #fbfcfd; border: 1px solid rgb(196, 207, 229); font-family: monospace, fixed; font-size: 14px; line-height: 17px; margin: 4px 8px 4px 2px; overflow: auto; padding: 4px 6px; word-wrap: break-word;"&gt;D. Sontag, A. Globerson, T. Jaakkola. 
Introduction to Dual Decomposition for Inference. 
Optimization for Machine Learning, editors S. Sra, S. Nowozin, and S. J. Wright: MIT Press, 2011.
&lt;/pre&gt;
&lt;h2 style="-webkit-transition: text-shadow 0.5s linear; background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 16px; margin-right: 15px; transition: text-shadow 0.5s linear;"&gt;
&lt;a class="anchor" href="http://www.blogger.com/blogger.g?blogID=3211409948956809184" id="running_ddd" style="color: #3d578c; font-weight: normal;"&gt;&lt;/a&gt;Running DDD&lt;/h2&gt;
&lt;div style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
The input MRF graph is assumed to be in the standard&amp;nbsp;&lt;a href="http://www.cs.huji.ac.il/project/PASCAL/fileFormat.php" style="color: #4665a2; text-decoration: none;"&gt;UAI file format&lt;/a&gt;. For example a 3x3 grid MRF can be found here:&amp;nbsp;&lt;a href="http://www.cs.huji.ac.il/project/PASCAL/examples/grid3x3.uai" style="color: #4665a2; text-decoration: none;"&gt;grid3x3.uai&lt;/a&gt;.&lt;/div&gt;
&lt;div style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
The program can be run like this:&lt;/div&gt;
&lt;pre class="fragment" style="background-color: #fbfcfd; border: 1px solid rgb(196, 207, 229); font-family: monospace, fixed; font-size: 14px; line-height: 17px; margin: 4px 8px 4px 2px; overflow: auto; padding: 4px 6px; word-wrap: break-word;"&gt;&amp;gt; ./dd --graph grid3x3.uai 
&lt;/pre&gt;
&lt;div style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
Other arguments are:&lt;/div&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–help&lt;/b&gt;&amp;nbsp;Display the help message describing the list of options.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–output&lt;/b&gt;&amp;nbsp;The output directory in which to save the final predictions.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–dualimprovthres&lt;/b&gt;&amp;nbsp;(Optional, default 0.00001) The amount of change in dual objective (in log-space) that will be tolerated at convergence.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–pdgapthres&lt;/b&gt;&amp;nbsp;(Optional, default 0.1) The tolerance level for zero primal-dual gap.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–maxiter&lt;/b&gt;&amp;nbsp;(Optional, default 10000) The maximum no. of dual update iterations.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–engine&lt;/b&gt;&amp;nbsp;(Optional, Default: asynchronous) The engine type to use when executing the vertex-programs&lt;ul&gt;
&lt;li&gt;&lt;b&gt;synchronous&lt;/b&gt;: All LoopyBP updates are run at the same time (Synchronous BP). This engine exposes greater parallelism but is less computationally efficient.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;asynchronous&lt;/b&gt;: LoopyBP updates are run asynchronous with priorities (Residual BP). This engine is has greater overhead and exposes less parallelism but can substantially improve the rate over convergence.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–ncpus&lt;/b&gt;&amp;nbsp;(Optional, Default 2) The number of local computation threads to use on each machine. This should typically match the number of physical cores.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–scheduler&lt;/b&gt;&amp;nbsp;(Optional, Default sweep) The scheduler to use when running with the asynchronous engine. The default is typically sufficient.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–engine_opts&lt;/b&gt;&amp;nbsp;(Optional, Default empty) Any additional engine options. See&amp;nbsp;&lt;b&gt;–engine_help&lt;/b&gt;&amp;nbsp;for a list of options.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–graph_opts&lt;/b&gt;&amp;nbsp;(Optional, Default empty) Any additional graph options. See&amp;nbsp;&lt;b&gt;–graph_help&lt;/b&gt;&amp;nbsp;for a list of options.&lt;/li&gt;
&lt;/ul&gt;
&lt;ul style="background-color: white; font-family: 'Lucida Grande', Verdana, Geneva, Arial, sans-serif; font-size: 13px; line-height: 16px;"&gt;
&lt;li&gt;&lt;b&gt;–scheduler_opts&lt;/b&gt;&amp;nbsp;(Optional, Default empty) Any additional scheduler options. See&amp;nbsp;&lt;b&gt;–scheduler_help&lt;/b&gt;&amp;nbsp;for a list of options.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
&lt;span style="font-family: Lucida Grande, Verdana, Geneva, Arial, sans-serif;"&gt;&lt;span style="font-size: 12.727272033691406px; line-height: 15.994318008422852px;"&gt;Anyone who tries to run it - please let us know!&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="font-family: Lucida Grande, Verdana, Geneva, Arial, sans-serif;"&gt;&lt;span style="font-size: 12.727272033691406px; line-height: 15.994318008422852px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/cOto_1q3nkA" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/4382024988740829659/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/distributed-dual-decomposition-ddd-in.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4382024988740829659?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4382024988740829659?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/cOto_1q3nkA/distributed-dual-decomposition-ddd-in.html" title="Distributed Dual Decomposition (DDD) in GraphLab" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/distributed-dual-decomposition-ddd-in.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE4MRH86cSp7ImA9WhBVE04.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-4662201680356886128</id><published>2013-04-17T12:35:00.003-07:00</published><updated>2013-04-18T17:56:25.119-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-18T17:56:25.119-07:00</app:edited><title>CLiMF Algorithm in GraphChi</title><content type="html">I got some good news to report: last week we got a great contribution from &lt;a href="http://uk.linkedin.com/pub/mark-levy/9/762/16"&gt;Mark Levy&lt;/a&gt; (last.fm) for GraphChi collaborative filtering toolkit. Mark have implemented the CLiMF algorithm, described in the paper: &lt;a href="http://www.ci.tuwien.ac.at/~alexis/Publications_files/climf-recsys12.pdf"&gt;CLiMF: learning to maximize reciprocal rank with collaborative less-is-more filtering. Yue Shi, Martha Larson, Alexandros Karatzoglou, Nuria Oliver, Linas Baltrunas, Alan Hanjalic,
Sixth ACM Conference on Recommender Systems, RecSys '12.&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp;CLiMF is a ranking method which optimizes &lt;a href="http://en.wikipedia.org/wiki/Mean_reciprocal_rank"&gt;MRR (mean reciprocal rank)&lt;/a&gt; which is an information retrieval measure for top-K recommenders. CLiMF is a variant of latent factor CF which optimises a significantly different objective function to most methods: instead of trying to predict ratings CLiMF aims to maximise MRR of relevant items. The MRR is the reciprocal rank of the first relevant item found when unseen items are sorted by score i.e. the MRR is 1.0 if the item with the highest score is a relevant prediction, 0.5 if the first item is not relevant but the second is, and so on. By optimising MRR rather than RMSE or similar measures CLiMF naturally promotes diversity as well as accuracy in the recommendations generated. CLiMF uses stochastic gradient ascent to maximise a smoothed lower bound for the actual MRR. It assumes binary relevance, as in friendship or follow relationships, but the graphchi implementation lets you specify a relevance threshold for ratings so you can run the algorithm on standard CF datasets and have the ratings automatically interpreted as binary preferences.&lt;br /&gt;
&lt;br /&gt;
CLiMF-related command-line options:&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp;--binary_relevance_thresh=xx&lt;/span&gt;   Consider the item liked/relevant if rating is at least this value [default: 0]&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp;--halt_on_mrr_decrease&lt;/span&gt;         Halt if the training set objective (smoothed MRR) decreases [default: false]&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&amp;nbsp;--num_ratings&lt;/span&gt;                  Consider this many top predicted items when computing actual MRR on validation set [default:10000]&lt;br /&gt;
&lt;br /&gt;
Here is an example on running CLiMF on Netflix data:&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;./toolkits/collaborative_filtering/climf --training=smallnetflix_mm --validation=smallnetflix_mme --binary_relevance_thresh=4 --sgd_gamma=1e-6 --max_iter=6 --quiet=1 --sgd_step_dec=0.9999 --sgd_lambda=1e-6&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Training objective:-9.00068e+07&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Validation MRR: &amp;nbsp;0.169322&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Training objective:-9.00065e+07&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Validation MRR: &amp;nbsp;0.171909&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Training objective:-9.00062e+07&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Validation MRR: &amp;nbsp;0.172372&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Training objective:-9.0006e+07&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Validation MRR: &amp;nbsp;0.172503&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Training objective:-9.00057e+07&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Validation MRR: &amp;nbsp;0.172544&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Training objective:-9.00054e+07&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;&amp;nbsp; Validation MRR: &amp;nbsp;0.172549&lt;/span&gt;&lt;br /&gt;
&lt;div&gt;
&lt;br /&gt;
I am very excited about this development - and I hope many more users will follow with additional contributions to our growing code base! Thanks Mark!!!&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/N9xLSNjLFWk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/4662201680356886128/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/climf-algorithm-in-graphchi.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4662201680356886128?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4662201680356886128?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/N9xLSNjLFWk/climf-algorithm-in-graphchi.html" title="CLiMF Algorithm in GraphChi" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/climf-algorithm-in-graphchi.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE8BQn8-eCp7ImA9WhBVEk8.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-4143294345898797914</id><published>2013-04-17T12:27:00.002-07:00</published><updated>2013-04-17T12:27:33.150-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-17T12:27:33.150-07:00</app:edited><title>DARPA PPAML</title><content type="html">I got this following &lt;a href="http://www.darpa.mil/NewsEvents/Releases/2013/03/19a.aspx"&gt;DARPA call&lt;/a&gt; from Mike Draugelis, our man in Lockheed Martin:&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="border: 0px; font-family: Arial, Helvetica, sans-serif; font-size: 13px; line-height: 17.328125px; margin-bottom: 20px; outline: 0px; padding: 0px; vertical-align: baseline;"&gt;
&lt;span style="background-color: white;"&gt;Machine learning – the ability of computers to understand data, manage results, and infer insights from uncertain information – is the force behind many recent revolutions in computing. Email spam filters, smartphone personal assistants and self-driving vehicles are all based on research advances in machine learning. Unfortunately, even as the demand for these capabilities is accelerating, every new application requires a Herculean effort. &amp;nbsp;Even a team of specially-trained machine learning experts makes only painfully slow progress due to the lack of tools to build these systems.&lt;/span&gt;&lt;/div&gt;
&lt;div style="border: 0px; font-family: Arial, Helvetica, sans-serif; font-size: 13px; line-height: 17.328125px; margin-bottom: 20px; outline: 0px; padding: 0px; vertical-align: baseline;"&gt;
&lt;span style="background-color: white;"&gt;The Probabilistic Programming for Advanced Machine Learning (PPAML) program was launched to address this challenge. Probabilistic programming is a new programming paradigm for managing uncertain information. By incorporating it into machine learning, PPAML seeks to greatly increase the number of people who can successfully build machine learning applications and make machine learning experts radically more effective. Moreover, the program seeks to create more economical, robust and powerful applications that need less data to produce more accurate results – features inconceivable with today’s technology.&lt;/span&gt;&lt;/div&gt;
&lt;br /&gt;
And here is the call abstract:&lt;br /&gt;
&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;The goal of the PPAML program is to advance machine learning by using probabilistic programming to 1)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;dramatically increase the number of people who can successfully build machine learning applications&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;, 2)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;make machine learning experts radically more effective&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;, and 3)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;enable new applications that are impossible to conceive of using today’s technology&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;. In support of this overarching goal, PPAML has a number of sub-goals. Specifically, the sub-goals are 1)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;to make machine learning model code shorter&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;, 2)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;to reduce development time&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;, 3)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;to facilitate the construction of richer models&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;, 4)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;to require lower levels of expertise in building machine learning applications&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;, and 5)&amp;nbsp;&lt;/span&gt;&lt;em style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;to support the construction of integrated models&lt;/em&gt;&lt;span style="background-color: white; color: #333333; font-family: Arial, Helvetica, sans-serif; font-size: 12px; line-height: 16px;"&gt;.&lt;/span&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/Za03_38yy7k" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/4143294345898797914/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/darpa-ppaml.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4143294345898797914?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4143294345898797914?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/Za03_38yy7k/darpa-ppaml.html" title="DARPA PPAML" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/darpa-ppaml.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0ICSX44cSp7ImA9WhBbGUU.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-4150184786545282627</id><published>2013-04-10T10:25:00.000-07:00</published><updated>2013-05-19T11:52:48.039-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T11:52:48.039-07:00</app:edited><title>The GraphLab Workshop - Why Should You Care?</title><content type="html">Everyone knows that one of the hottest topics today is big data analytics.&amp;nbsp;&lt;a href="http://graphlab.org/graphlab-workshop-2013/"&gt;The GraphLab workshop&lt;/a&gt; is a "trade show" for all the significant graph analytics and graph database solutions. In one day you could learn more about the following systems (a preliminary list!):&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;h1 style="background-color: white; border: 0px; color: #181818; font-family: Helvetica, Helvetica, serif; font-size: 32px; font-weight: normal; line-height: 1.25; margin: 0px 0px 12px; padding: 0px; vertical-align: baseline;"&gt;
&lt;/h1&gt;
&lt;h1 style="border: 0px; font-size: 32px; font-weight: normal; line-height: 1.25; margin: 0px 0px 12px; padding: 0px; vertical-align: baseline;"&gt;
&lt;/h1&gt;
&lt;h1 style="background-color: white; border: 0px; color: #181818; font-family: Helvetica, Helvetica, serif; font-size: 32px; font-weight: normal; line-height: 1.25; margin: 0px 0px 12px; padding: 0px; vertical-align: baseline;"&gt;
Featured Projects&lt;/h1&gt;
&lt;table style="background-color: white; border-bottom-left-radius: 3px; border-bottom-right-radius: 3px; border-collapse: collapse; border-spacing: 0px; border-top-left-radius: 3px; border-top-right-radius: 3px; border: 1px solid rgb(221, 221, 221); color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px; margin: 0px 0px 18px; padding: 0px; vertical-align: baseline;"&gt;&lt;tbody style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" height="75" scale="0" src="http://3.bp.blogspot.com/-rMxX5eg5jTw/UWWWG4UCOGI/AAAAAAAA7uc/j94PD3BWEGY/s200/Picture+19.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="200" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;Google’s&amp;nbsp;&lt;a href="http://googleresearch.blogspot.co.il/2009/06/large-scale-graph-computing-at-google.html" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Pregel&amp;nbsp;&lt;/a&gt;is their Bulk Synchronous graph framework.&amp;nbsp;&lt;a href="http://research.google.com/pubs/mirrokni.html" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Prof. Vahab Mirrokni&lt;/a&gt;&amp;nbsp;is going to give an oral talk about graph processing @ Google.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" height="75" scale="0" src="http://3.bp.blogspot.com/-RSzie99aip8/UWWXhASzklI/AAAAAAAA7uk/B9gRtCzycHw/s320/Picture+20.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;img alt="" border="0" height="75" scale="0" src="http://1.bp.blogspot.com/-a2fkwo8OyhQ/UWWYTCHxrsI/AAAAAAAA7us/nK_odACaB1k/s200/Picture+22.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="75" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;Apache Giraph is the open source equivalent system to Google’s Pregel.&amp;nbsp;&lt;a href="http://graphlab.org/graphlab-workshop-2013/preliminary-agenda/www.linkedin.com/in/averyching/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Dr. Avery Ching&lt;/a&gt;, one of Giraph contributors, will give a talk about large scale graph processing @ Facebook.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://1.bp.blogspot.com/-EfXbZb7mseU/UWWZMphXJCI/AAAAAAAA7u0/RKO8_9qe0d0/s320/Picture+23.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://twitter.com/pankaj" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Dr. Pankaj Gupta&lt;/a&gt;, the creator of Cassovary Graph Processing system @ Twitter will give a talk about Who To Follow (WTF) service in Twitter.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://1.bp.blogspot.com/-vajqDlHY3b4/UXY8KsWBjgI/AAAAAAAA7x0/Yp4veaD4O74/s320/Screen+Shot+2013-04-23+at+10.45.05+AM.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="175" /&gt;&lt;img alt="" border="0" scale="0" src="http://4.bp.blogspot.com/-7iG_HLn9944/UXY7SFmMHWI/AAAAAAAA7xo/K23eI62_kiQ/s320/Screen+Shot+2013-04-23+at+8.29.05+AM.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="125" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://research.microsoft.com/en-us/projects/naiad/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Naiad&lt;/a&gt;&amp;nbsp;is a parallel data flow framework from Microsoft with the focus of incremental computation.&amp;nbsp;&lt;a href="http://research.microsoft.com/en-us/people/derekmur/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Dr. Derek Murray&lt;/a&gt;&amp;nbsp;from Microsoft Research will present Naiad.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://graphlab.org/wp-content/uploads/2013/05/Screen-Shot-2013-05-03-at-9.25.45-PM.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="200" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;Intel GraphBuilder is a software for creating graphs out of raw data, utilizing Hadoop for parallel graph creation.&amp;nbsp;&lt;a href="http://graphlab.org/graphlab-workshop-2013/preliminary-agenda/www.linkedin.com/pub/ted-willke/7/860/83a/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Dr. Theodore Willke&lt;/a&gt;&amp;nbsp;from Intel Labs will present Intel Labs work in this domain.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://3.bp.blogspot.com/-KeGAimx943c/UWWaDtGkl4I/AAAAAAAA7u8/jgO4PrYi9Ns/s320/Picture+24.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="200" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://graphlab.org/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;GraphLab&lt;/a&gt;&amp;nbsp;is CMU+UW open source graph processing system, which supports both bulk synchronous parallel as well as asynchronous computation.&amp;nbsp;&lt;a href="http://www.cs.washington.edu/people/faculty/guestrin/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Prof. Carlos Guestrin&lt;/a&gt;&amp;nbsp;will present the latest GraphLab project.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://4.bp.blogspot.com/-8O85-e_N3so/UWWbEjb0buI/AAAAAAAA7vE/0b0vGwTZGzY/s320/Picture+25.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;Allegro Graph is a high performance graph database with RDF support.&amp;nbsp;&lt;a href="http://www.franz.com/about/bios/jaasman.lhtml" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Jans Aasman, the CEO of Franz&lt;/a&gt;, will give a demo of their newest graph database.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://4.bp.blogspot.com/-q2XnZPsuklI/UWWcFayT_DI/AAAAAAAA7vM/s8m8KSofCEA/s320/Picture+26.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="150" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://gauss.cs.ucsb.edu/~aydin/CombBLAS/html/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Combinatorial BLAS&lt;/a&gt;&amp;nbsp;is a distributed memory parallel graph library from LBNL/UCSB.&amp;nbsp;&lt;a href="http://gauss.cs.ucsb.edu/~aydin/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Dr. Aydin Buluc&lt;/a&gt;&amp;nbsp;will present comb-BLAS.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://2.bp.blogspot.com/-2xQboyA6cdM/UX94GAkkNNI/AAAAAAAA7yk/iEITTDLSLjg/s320/Screen+Shot+2013-04-30+at+10.48.45+AM.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://www.cs.washington.edu/node/4217/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Grappa&lt;/a&gt;&amp;nbsp;is a distributed graph processing framework using commodity processors, from The University of Washington.&amp;nbsp;&lt;a href="http://homes.cs.washington.edu/~oskin/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Prof. Mark Oskin&lt;/a&gt;&amp;nbsp;will present Grappa.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img border="0" scale="0" src="http://graphlab.org/wp-content/uploads/2013/05/Screen-Shot-2013-05-08-at-10.44.43-AM.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="150" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;Presto is a distributed framework for speeding up R computations by HP Labs.&amp;nbsp;&lt;a href="http://shivaram.info/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Shivaram Venkataraman&lt;/a&gt;&amp;nbsp;from Bekreley and &lt;a href="https://www.acis.ufl.edu/~klee/"&gt;Kyungyong Lee&lt;/a&gt; will present Persto.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://4.bp.blogspot.com/-hnQZpqc115U/UWWcbqBdJaI/AAAAAAAA7vU/dqpw8NrT8vQ/s320/Picture+27.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="150" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://thinkaurelius.github.io/titan/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Titan&lt;/a&gt;&amp;nbsp;is a distributed graph database.&amp;nbsp;&lt;a href="http://www.matthiasb.com/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Dr. Matthias Broecheler&lt;/a&gt;&amp;nbsp;will present Titan.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://4.bp.blogspot.com/-gTAxa2AFi84/UWWc3tRqFXI/AAAAAAAA7vc/fJZYTpCuBuM/s320/Picture+28.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://www.neo4j.org/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Neo4j&lt;/a&gt;&amp;nbsp;is an open source distributed graph database in Java. Alex Averbuch from neo4j will present neo4j.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://1.bp.blogspot.com/-EUYKeEpB760/UXY6kTeXkDI/AAAAAAAA7xg/iVkrlsHrJS0/s320/Screen+Shot+2013-04-23+at+10.37.09+AM.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;Infinite Graph from Objectivity is a distributed graph database.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://4.bp.blogspot.com/-TXwL28Pf56E/UWWdnMrZOyI/AAAAAAAA7vk/gx71_Reu4dk/s320/Picture+30.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://www.sparsity-technologies.com/dex.php" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;DEX&lt;/a&gt;&amp;nbsp;is a high performance and scalable graph database system. Norbert Martinez will present DEX.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://3.bp.blogspot.com/-CLAGvZcF0xE/UWWfGQOtfDI/AAAAAAAA7vw/575wB2AnBG0/s320/Picture+31.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" width="200" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://yarcdata.com/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;YarcData&lt;/a&gt;, a Cray spinoff is creating customized hardware solutions for ultra fast graph processing.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://4.bp.blogspot.com/-ej4xwdCDLHo/UWWkMOeIQGI/AAAAAAAA7wU/xlFY8pRGP9w/s320/Picture+29.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://www.systap.com/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Systap LLC&lt;/a&gt;&amp;nbsp;is a startup working on speeding up graph algorithms using GPUs.&amp;nbsp;&lt;a href="http://www.linkedin.com/pub/bryan-thompson/30/390/54b" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Bryan Thompson&lt;/a&gt;&amp;nbsp;from Systap will present preliminary results of applying the gather apply scatter model on GPU.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://event.cwi.nl/grades2013/images/ldbc.gif" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://www.ldbc.eu/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Linked Data Benchmark Council (LDBC)&lt;/a&gt;, a new EU FP7 project that aims to establish industry cooperation on graph database benchmarks, benchmark practices and benchmark results. Dr. Alex Averbuch (Neo Technologies), Norbert Martinez (Polytechnic University of Catalonia)&lt;br /&gt;
and Dr. Andrey Gubichev (Technical University of Munich) will present LDBC.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;table style="border-bottom-left-radius: 3px; border-bottom-right-radius: 3px; border-collapse: collapse; border-spacing: 0px; border-top-left-radius: 3px; border-top-right-radius: 3px; border: 1px solid rgb(221, 221, 221); color: #555555; font-size: 13.63636302947998px; line-height: 19.09090805053711px; margin: 0px 0px 18px; padding: 0px; vertical-align: baseline;"&gt;&lt;tbody style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="border: 0px; clear: none; color: #555555; font-size: 13.63636302947998px; line-height: 1.25; margin-bottom: 10px; padding: 0px; vertical-align: baseline;"&gt;
Other notable talks at the GraphLab workshop:&lt;/div&gt;
&lt;table style="border-bottom-left-radius: 3px; border-bottom-right-radius: 3px; border-collapse: collapse; border-spacing: 0px; border-top-left-radius: 3px; border-top-right-radius: 3px; border: 1px solid rgb(221, 221, 221); color: #555555; font-size: 13.63636302947998px; line-height: 19.09090805053711px; margin: 0px 0px 18px; padding: 0px; vertical-align: baseline;"&gt;&lt;tbody style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://1.bp.blogspot.com/-UgOFXAbr_2Q/UWWhnx33SXI/AAAAAAAA7v8/B3KnXwSlxQ8/s320/Picture+32.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://trifacta.com/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Trifacta&lt;/a&gt;&amp;nbsp;is the hottest bay area startup out there, started by Prof. Joe Hellerstein from Berkeley and Prof. Jefferey Heer from Stanford. Prof. Joe Hellerstein will talk about Productivity for Data Analysts: Visualization, Intelligence and Scale.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="background-color: #f9f9f9; border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://2.bp.blogspot.com/-8bVaE_boCwY/UWWicFFw7EI/AAAAAAAA7wE/Px-FMgiL4Aw/s320/Picture+33.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;Dr. Lei Tang from Walmart Labs will talk about adaptive user segmentation for collaborative filtering.&lt;/td&gt;&lt;/tr&gt;
&lt;tr style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline;"&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;img alt="" border="0" scale="0" src="http://1.bp.blogspot.com/-IiqiaC9Hb-Q/UWWjExr3F6I/AAAAAAAA7wM/4T3Pi6fNWV0/s320/Picture+34.png" style="border: 0px; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/td&gt;&lt;td style="border: none; color: #333333; font-family: inherit; font-size: 12px; font-style: inherit; font-variant: inherit; line-height: 18px; margin: 0px; padding: 9px 10px; vertical-align: top;"&gt;&lt;a href="http://www.alpinedatalabs.com/" style="border: 0px; color: #0085cf; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Alpine Data Labs&amp;nbsp;&lt;/a&gt;is a Greenplum spinoff focusing on big data analytics. Seven Hillion will describe a case study of big data analytics on top of Hadoop.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div class="sharedaddy sd-sharing-enabled" style="border-bottom-left-radius: 0px !important; border-bottom-right-radius: 0px !important; border-top-left-radius: 0px !important; border-top-right-radius: 0px !important; border: 0px; clear: both; color: #555555; font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 12px; line-height: inherit; margin: 0px; padding: 0px; vertical-align: baseline; zoom: 1;"&gt;
&lt;div class="robots-nocontent sd-block sd-social sd-social-icon sd-sharing" style="border-bottom-left-radius: 0px !important; border-bottom-right-radius: 0px !important; border-top-color: rgba(0, 0, 0, 0.129412); border-top-left-radius: 0px !important; border-top-right-radius: 0px !important; border-top-style: solid; border-width: 1px 0px 0px; font-family: inherit; font-size: 14px; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; padding: 10px 0px 5px; vertical-align: baseline; width: 607.1306762695313px; zoom: 1;"&gt;
&lt;div class="sd-content" style="border-bottom-left-radius: 0px !important; border-bottom-right-radius: 0px !important; border-top-left-radius: 0px !important; border-top-right-radius: 0px !important; border: 0px; float: right; font-family: inherit; font-size: 14px; font-style: inherit; font-variant: inherit; line-height: inherit; margin: -2px 0px 0px; padding: 0px; vertical-align: baseline; width: 498.59375px;"&gt;
&lt;ul style="background-image: none; border: none; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; list-style: none; margin: 0px; padding: 0px !important; vertical-align: baseline; zoom: 1;"&gt;
&lt;li class="share-reddit" style="background-image: none; border: none; display: block; float: left; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; list-style: none; margin: 0px 5px 5px 0px !important; padding: 0px !important; vertical-align: baseline;"&gt;&lt;a class="share-reddit sd-button share-icon no-text" href="http://graphlab.org/graphlab-workshop-2013/preliminary-agenda/?share=reddit&amp;amp;nb=1" rel="nofollow" style="background-image: -webkit-linear-gradient(top, rgb(247, 247, 247) 0%, rgb(239, 239, 239) 100%); border-bottom-left-radius: 3px; border-bottom-right-radius: 3px; border-top-left-radius: 3px; border-top-right-radius: 3px; border: 1px solid rgb(221, 221, 221) !important; box-shadow: rgb(255, 255, 255) 0px 1px 0px inset; color: rgb(0, 0, 0) !important; display: inline-block; font-size: 12px; font-variant: inherit; height: 21px; line-height: 1; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline; width: 21px;" title="Click to share on Reddit"&gt;&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;br /&gt;
&lt;b&gt;Stay tuned - additional talks and demos are going to be added soon&lt;/b&gt;!&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/0LvdhaYe3YM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/4150184786545282627/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/the-graphlab-workshop-why-should-you.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4150184786545282627?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/4150184786545282627?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/0LvdhaYe3YM/the-graphlab-workshop-why-should-you.html" title="The GraphLab Workshop - Why Should You Care?" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-rMxX5eg5jTw/UWWWG4UCOGI/AAAAAAAA7uc/j94PD3BWEGY/s72-c/Picture+19.png" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/the-graphlab-workshop-why-should-you.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0MCR304cSp7ImA9WhBWFEk.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-2655079675315834679</id><published>2013-04-07T13:30:00.000-07:00</published><updated>2013-04-08T11:24:26.339-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-08T11:24:26.339-07:00</app:edited><title>Facebook graph benchmark system</title><content type="html">I just heard from &lt;a href="http://www.cs.washington.edu/people/faculty/guestrin/"&gt;Carlos Guestrin&lt;/a&gt; about a &lt;a href="http://gigaom.com/2013/04/01/facebook-builds-a-database-benchmark-for-a-graph-powered-world/"&gt;new graph benchmark system from Facebook&lt;/a&gt;&amp;nbsp;called &lt;a href="https://www.facebook.com/notes/facebook-engineering/linkbench-a-database-benchmark-for-the-social-graph/10151391496443920"&gt;LinkBench&lt;/a&gt;. I will be happy to hear when anyone tries it out...&lt;br /&gt;
&lt;br /&gt;
Additionally, I got from &lt;a href="http://www.linkedin.com/pub/nilesh-jain/2/353/469/"&gt;Nilesh Jain&lt;/a&gt; from Intel Labs a link to &lt;a href="http://www.ldbc.eu/"&gt;LDBC&lt;/a&gt; an EU project for promoting graph benchmarks in industry. I was not aware of this project, but they are organizing the &lt;a href="http://bickson.blogspot.co.il/2012/12/sigmod-2013-grades-graph-workshop.html"&gt;GRADES workshop&lt;/a&gt; I previously wrote about. And guess who is giving the keynote talk at the GRADES workshop? You may have guessed right - &lt;a href="http://www.cs.washington.edu/people/faculty/guestrin/"&gt;Carlos Guestrin&lt;/a&gt; is going to talk about GraphLab. The GRADES workshop will take place June 23rd in NY.&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/GBR8QyyaVxE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/2655079675315834679/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/facebook-graph-benchmark-system.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2655079675315834679?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2655079675315834679?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/GBR8QyyaVxE/facebook-graph-benchmark-system.html" title="Facebook graph benchmark system" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/facebook-graph-benchmark-system.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUEERnkzcSp7ImA9WhBWF0k.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-3610028808670967821</id><published>2013-04-07T07:49:00.000-07:00</published><updated>2013-04-11T23:20:07.789-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-11T23:20:07.789-07:00</app:edited><title>Insider ML Jobs</title><content type="html">In this blog post I will publish some open ML positions relating to big data analytics I got from my contacts. Those positions are not public yet and are published first
in this blog.
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-vPYZbdk0_CQ/UWGG_3iK_AI/AAAAAAAA7uE/Z8aZCyfe0p0/s1600/Picture+18.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="198" src="http://2.bp.blogspot.com/-vPYZbdk0_CQ/UWGG_3iK_AI/AAAAAAAA7uE/Z8aZCyfe0p0/s200/Picture+18.png" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
PhD &amp;nbsp;in CS, Bio-informatics,&amp;nbsp;&lt;span style="background-color: white;"&gt;EE, physics, Statistics&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white;"&gt;post doctoral fellow in &amp;nbsp;biomedical informatics for 1 year.&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white;"&gt;Emphasis of applications of Big Data to medicine.&amp;nbsp;&lt;/span&gt;&lt;span style="background-color: white;"&gt;The fellow will be involved in projects which mine the electronic health record in of the Veterans Affairs system.&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
For details contact&amp;nbsp;Alon Ben-Ari, MD, department of anesthesiology VA Puget Sound Seattle, Washington.&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;span class="il" style="background-color: #ffffcc; color: #222222;"&gt;&lt;a href="mailto:alon.benari@gmail.com" style="color: #1155cc;" target="_blank"&gt;alon.benari@gmail.com&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;
Stay tuned - more jobs to be posted soon..&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/KzxJKZQHBZQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/3610028808670967821/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/insider-ml-jobs.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/3610028808670967821?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/3610028808670967821?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/KzxJKZQHBZQ/insider-ml-jobs.html" title="Insider ML Jobs" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-vPYZbdk0_CQ/UWGG_3iK_AI/AAAAAAAA7uE/Z8aZCyfe0p0/s72-c/Picture+18.png" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/insider-ml-jobs.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUMGSH87fCp7ImA9WhBWEEQ.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-2547345864434260072</id><published>2013-04-04T09:31:00.000-07:00</published><updated>2013-04-04T09:37:09.104-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-04T09:37:09.104-07:00</app:edited><title>Spotlight: Blaze C++ math library</title><content type="html">&lt;span style="font-family: inherit;"&gt;My collaborator &lt;a href="http://www.cs.cmu.edu/~ylow/"&gt;Yucheng Low&lt;/a&gt;, asked me to take a look at &lt;a href="https://code.google.com/p/blaze-lib/"&gt;Blaze math library&lt;/a&gt;. I did a quick review and here are my findings.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;div style="background-color: white; color: #222222;"&gt;
&lt;span style="font-family: inherit;"&gt;Blaze is an interesting effort with relatively easy programming interface (see for example CG code:&amp;nbsp;&lt;a href="http://code.google.com/p/blaze-lib/wiki/Getting_Started" style="color: #1155cc;" target="_blank"&gt;http://code.google.com/&lt;wbr&gt;&lt;/wbr&gt;p/&lt;span class="il" style="background-color: #ffffcc; background-position: initial initial; background-repeat: initial initial; color: #222222;"&gt;blaze&lt;/span&gt;-lib/wiki/Getting_&lt;wbr&gt;&lt;/wbr&gt;Started&lt;/a&gt;&amp;nbsp;under "A complex example").&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white; color: #222222;"&gt;
&lt;span style="font-family: inherit;"&gt;The main guy behind the software is Klaus Iglberger, a PhD from germany. Two papers were published about Blaze:&lt;/span&gt;&lt;/div&gt;
&lt;div style="background-color: white;"&gt;
&lt;ul style="color: #222222; max-width: 62em; padding-left: 25px;"&gt;
&lt;li style="margin-bottom: 0.3em; margin-left: 15px;"&gt;&lt;span style="font-family: inherit;"&gt;K. Iglberger, G. Hager, J. Treibig, and U. Rüde:&amp;nbsp;&lt;strong&gt;Expression Templates Revisited: A Performance Analysis of Current Methodologies&lt;/strong&gt;(&lt;a href="http://epubs.siam.org/sisc/resource/1/sjoce3/v34/i2/pC42_s1" rel="nofollow" style="color: #0000cc;" target="_blank"&gt;Download&lt;/a&gt;). SIAM Journal on Scientific Computing, 34(2): C42--C69, 2012&lt;/span&gt;&lt;/li&gt;
&lt;li style="margin-bottom: 0.3em; margin-left: 15px;"&gt;&lt;span style="font-family: inherit;"&gt;K. Iglberger, G. Hager, J. Treibig, and U. Rüde:&amp;nbsp;&lt;strong&gt;High Performance Smart Expression Template Math Libraries&lt;/strong&gt;&amp;nbsp;(&lt;a href="http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=06266939" rel="nofollow" style="color: #0000cc;" target="_blank"&gt;Download&lt;/a&gt;). Proceedings of the 2nd International Workshop on New Algorithms and Programming Models for the Manycore Era (APMM 2012) at HPCS 2012&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;div style="color: #222222;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;span style="color: black;"&gt;Regarding performance, at least for the tested primitives they have &lt;a href="http://code.google.com/p/blaze-lib/wiki/Benchmarks"&gt;very good performance&lt;/a&gt;. It seems they work very well for small matrices (less than 100 width) even relative to MKL. For larger matrices have performance similar to MKL. &lt;/span&gt;The performance tests do not cover sparse matrices (only dense).&lt;/span&gt;&lt;/div&gt;
&lt;div style="color: #222222;"&gt;
&lt;span style="color: black; font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="color: #222222;"&gt;
&lt;span style="color: black; font-family: inherit;"&gt;What is missing IMHO, is an algorithmic suite like linear solvers, svd etc. that exists Eigen. So currently &lt;a href="https://code.google.com/p/blaze-lib/"&gt;Blaze&lt;/a&gt; focuses mainly on matrix vector operation.&lt;/span&gt;&lt;/div&gt;
&lt;div style="color: #222222;"&gt;
&lt;span style="color: black; font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="color: #222222;"&gt;
&lt;span style="color: black; font-family: inherit;"&gt;It seems &lt;a href="https://code.google.com/p/blaze-lib/"&gt;Blaze&lt;/a&gt; is using a single core implementation (they do not exploit&amp;nbsp;parallelism).&amp;nbsp;&lt;/span&gt;&lt;/div&gt;
&lt;div style="color: #222222;"&gt;
&lt;span style="color: black; font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
Overall, it seems like a very interesting project to keep track of. Once it supports some additional functionality I would consider using it.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
To dig a little dipper, I sent some questions to &lt;a href="https://code.google.com/p/blaze-lib/"&gt;Klaus&lt;/a&gt;&amp;nbsp;Iglberger, who was very kind to promptly replay:&lt;/div&gt;
&lt;div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-O33bnvC4GZQ/UV2rqeeug8I/AAAAAAAA7tg/bBiPW7FSe_4/s1600/Portrait_Xing.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="200" src="http://2.bp.blogspot.com/-O33bnvC4GZQ/UV2rqeeug8I/AAAAAAAA7tg/bBiPW7FSe_4/s200/Portrait_Xing.jpg" width="151" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;br /&gt;
&lt;div class="im" style="background-color: white; color: #500050;"&gt;
&lt;span style="font-family: inherit;"&gt;&amp;gt; We were looking for a good math library to replace Eigen and we liked Blaze API. But we still have some missing functionality I wanted to ask you about.&lt;/span&gt;&lt;/div&gt;
&lt;span style="background-color: white; color: #222222; font-family: inherit;"&gt;We released the Blaze library only recently, in August 2012. Therefore Blaze is obviously much younger than Eigen and cannot compete in terms of features. Currently it can only compete in terms of performance (it seems to be the most efficient C++ math library for many operations) and in terms of software architecture and design. The software design and architecture is one of our personal interests will therefore always play a major role in the development of Blaze. However, due to that effort, I feel that Blaze can be used more naturally than the other C++ math libraries (including Eigen).&lt;/span&gt;&lt;br /&gt;
&lt;br style="background-color: white; color: #222222;" /&gt;
&lt;span style="background-color: white; color: #222222; font-family: inherit;"&gt;Since you are asking about features, let me give you an idea of our current roadmap. We are currently working on views (which you can for instance use to work on submatrices, extract parts of the result from vectors and matrices, etc.), special purpose matrices (banded matrices, upper and lower triangle matrices, etc.) and shared memory parallelization. These will be the next big features, starting with views in Blaze 1.2.&lt;/span&gt;&lt;br /&gt;
&lt;br style="background-color: white; color: #222222;" /&gt;
&lt;span style="background-color: white; color: #222222; font-family: inherit;"&gt;Whereas in direct comparison Blaze cannot compete in the total number of features, Blaze still offers a small number of unique features. The probably most important is the support of the Intel MIC architecture (Xeon Phi). Second is the support of the AVX instruction set, that is still not available in most other C++ math libraries. Third, Blaze is probably the only library that allows a completely hierarchic nesting of matrix and vector data types without performance penalties. For instance, you can define block structured matrices very conveniently:&lt;/span&gt;&lt;br /&gt;
&lt;br style="background-color: white; color: #222222;" /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&lt;span style="background-color: white; color: #222222;"&gt;typedef CompressedMatrix&amp;lt; DynamicMatrix&amp;lt;double,rowMajor&amp;gt;&lt;/span&gt;&lt;wbr style="background-color: white; color: #222222;"&gt;&lt;/wbr&gt;&lt;span style="background-color: white; color: #222222;"&gt;, rowMajor&amp;gt; &amp;nbsp;BlockStructuredMatrix;&lt;/span&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;span style="background-color: white; color: #222222;"&gt;BlockStructuredMatrix A, B, C;&lt;/span&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;span style="background-color: white; color: #222222;"&gt;// … Initializing the matrices&lt;/span&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;span style="background-color: white; color: #222222;"&gt;C = A * B;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br style="background-color: white; color: #222222;" /&gt;
&lt;span style="background-color: white; color: #222222; font-family: inherit;"&gt;In this matrix multiplication you can still count on every single matrix multiplication to be executed at maximum performance (see also the answer to your third question). And last but not least, with the introduction of views, Blaze will offer an extremely versatile feature to restrict the computation to the "parts" you are interested in:&lt;/span&gt;&lt;br /&gt;
&lt;br style="background-color: white; color: #222222;" /&gt;
&lt;span style="font-family: Courier New, Courier, monospace;"&gt;&lt;span style="background-color: white; color: #222222;"&gt;DynamicMatrix&amp;lt;double,rowMajor&amp;gt; A, B;&lt;/span&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;span style="background-color: white; color: #222222;"&gt;DynamicVector&amp;lt;double,&lt;/span&gt;&lt;wbr style="background-color: white; color: #222222;"&gt;&lt;/wbr&gt;&lt;span style="background-color: white; color: #222222;"&gt;columnVector&amp;gt; x;&lt;/span&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;span style="background-color: white; color: #222222;"&gt;// Restricts the computation of the matrix multiplication to the fourth column and still considers the most efficient way&lt;/span&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;span style="background-color: white; color: #222222;"&gt;// to compute the result although both matrices are stored in a row-wise fashion.&lt;/span&gt;&lt;br style="background-color: white; color: #222222;" /&gt;&lt;span style="background-color: white; color: #222222;"&gt;x = column( A * B, 4 );&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div class="im" style="background-color: white; color: #500050;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;br /&gt;&amp;gt; 1) Is there a plan to support sparse matrix algorithms like solving a linear system, eigen decomposition etc.&lt;/span&gt;&lt;/div&gt;
&lt;span style="font-family: inherit;"&gt;&lt;span style="background-color: white; color: #222222;"&gt;We currently don't plan to extend our linear system solvers, but hope that they can be added easily based on the data structures that we provide.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div class="im" style="background-color: white; color: #500050;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;br /&gt;&amp;gt; 2) What is the level of support for parallelism? Namely, is the library fully serial or do you have some support for parallelism when there are multiple cores.&lt;/span&gt;&lt;/div&gt;
&lt;span style="font-family: inherit;"&gt;&lt;span style="background-color: white; color: #222222;"&gt;Until now, Blaze is completely serial, except for the vectorization (which is of course also a level of parallelization). So unfortunately, currently a single operation can only use a single core. But, as already stated, we are currently working on shared memory parallelization, but it will take some time until we release this feature.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div class="im" style="background-color: white; color: #500050;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;br /&gt;&amp;gt; 3) According to the performance plots, on large matrices blaze performance aligns with MKL. Is there some mechanism which sends the computation to MKL once the problem is big enough and otherwise uses blaze code?&lt;/span&gt;&lt;/div&gt;
&lt;span style="font-family: inherit;"&gt;&lt;span style="background-color: white; color: #222222;"&gt;Blaze tries to detect several characteristics about the involved matrices. One of these characteristics is the size, which is used to determine which algorithm is most beneficial for performance. Whereas the MKL offers by far the best performance for large matrices, for small matrices the performance is less favorable due to optimizations that only work well for large matrices and therefore cause a performance penalty for small matrices. Therefore Blaze provides a couple of special algorithms tailored for small matrices (for instance, these algorithms don't use blocking strategies and use the data a little differently). The threshold for this algorithm switching can be configured in one of the configuration files: blaze/config/Threshold.h. With these you can tune Blaze to a specific target platform.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div class="im" style="background-color: white; color: #500050;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&amp;gt; 4) Is there support for serialization for writing and loading matrices from file?&lt;/span&gt;&lt;/div&gt;
&lt;span style="font-family: inherit;"&gt;&lt;span style="background-color: white; color: #222222;"&gt;We unfortunately neglected the support for writing to file and loading from file a little. Currently, only the DynamicVector class supports this feature. The member function you can use are called 'read' and 'write', respectively. However, I have added this to our list of features for the next release, since this will not take too much time to implement.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div class="im" style="background-color: white; color: #500050;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;br /&gt;&amp;gt; Thanks a lot for your time!&lt;/span&gt;&lt;/div&gt;
&lt;span style="background-color: white; color: #222222;"&gt;&lt;span style="font-family: inherit;"&gt;You're very welcome. Please don't hesitate to contact me again if you have further questions (or possible feature requests) or if you have suggestions of how to improve Blaze. Hopefully you consider Blaze for your work, even if some features are currently missing. Please keep me posted on your decision.&lt;/span&gt;&lt;/span&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/kwAsv27FlGA" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/2547345864434260072/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/04/spotlight-blaze-c-math-library.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2547345864434260072?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/2547345864434260072?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/kwAsv27FlGA/spotlight-blaze-c-math-library.html" title="Spotlight: Blaze C++ math library" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-O33bnvC4GZQ/UV2rqeeug8I/AAAAAAAA7tg/bBiPW7FSe_4/s72-c/Portrait_Xing.jpg" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/04/spotlight-blaze-c-math-library.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUNRXo7fip7ImA9WhBUEks.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-1797506044205086540</id><published>2013-03-27T03:04:00.000-07:00</published><updated>2013-04-29T11:31:34.406-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-29T11:31:34.406-07:00</app:edited><title>The 2nd GraphLab workshop is coming up!</title><content type="html">&lt;span style="color: red;"&gt;&lt;b&gt;An update: Just got a limited number of discount codes for this blog readers. The first few to email me will get 30% discount, in addition to the early bird rate!!&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
Following the &lt;a href="http://bickson.blogspot.co.il/2012/07/graphlab-workshop-is-over.html"&gt;great success&lt;/a&gt;&amp;nbsp;of the &lt;a href="http://graphlab.org/workshop2012/agenda/"&gt;first GraphLab workshop&lt;/a&gt;, we have started to organize this year event, in July at the bay area. To remind you, last year we wanted to organize a 15-20 people event, which eventually got a participation of 300+ researchers from 100+ companies.&lt;br /&gt;
&lt;br /&gt;
The main aim of this year workshop is to bring together top researchers from academia, as well as top data scientists from industry with the special focus of large scale machine learning on sparse graphs.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The event will take place Monday July 1st, 2013 in San&amp;nbsp;Francisco. &lt;a href="http://glw2.eventbrite.com/"&gt;Early bird registration is now open&lt;/a&gt;!&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;
&lt;b&gt;&lt;a href="http://graphlab.org/graphlab-workshop-2013/"&gt;Preliminary agenda&lt;/a&gt;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;
A (preliminary) list of our program committee:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/dipu1025"&gt;Deepak Agarwal&lt;/a&gt;, LinkedIn&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/john-mark-agosta/1/70a/a37/"&gt;John Mark Agosta&lt;/a&gt;, Toyota InfoTechnology Center USA&lt;/li&gt;
&lt;li&gt;Alex Averbuch, Neo4j&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/bieschke"&gt;Eric Bieschke&lt;/a&gt;, Pandora Internet Radio&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/jimblomo/"&gt;Jim Blomo&lt;/a&gt;, Yelp!&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/mbreternitz/"&gt;Mauricio Breterniz&lt;/a&gt;, AMD&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.matthiasb.com/"&gt;Matthias Broecheler&lt;/a&gt;, Auerelius&lt;/li&gt;
&lt;li&gt;&lt;a href="http://fr.linkedin.com/in/igorcarron/"&gt;Igor Carron&lt;/a&gt;, &lt;a href="http://nuit-blanche.blogspot.com/"&gt;Nuit Blanche&lt;/a&gt; &amp;amp; Space Engineering Research Center&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/averyching/"&gt;Avery Ching&lt;/a&gt;, Facebook&lt;/li&gt;
&lt;li&gt;&lt;a href="http://parasians.com/aboutus_Team.html"&gt;Jike Chong&lt;/a&gt;, CMU SV Campus&lt;/li&gt;
&lt;li&gt;Brad Cox, Technica Corporation&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/yogesh-dalal/14/623/793"&gt;Yogesh Dalal&lt;/a&gt;, Ebay&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/ranjitpdesai"&gt;Ranjit Desai&lt;/a&gt;, Adobe&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/teddunning"&gt;Ted Dunning&lt;/a&gt;, MapR&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/michaeldraugelis/"&gt;Michael Draugelis&lt;/a&gt;, Lockheed Martin Corporation&lt;/li&gt;
&lt;li&gt;Frank Elliot, Opera Solutions&lt;/li&gt;
&lt;li&gt;Baldo Faieta, Adobe&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/hulya-emir-farinas/0/148/766/"&gt;Hulya Emir-Farinas&lt;/a&gt;, Greenplum&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.cs.washington.edu/people/faculty/guestrin/"&gt;Carlos Guestrin&lt;/a&gt;, University of Washington&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/aharbick/"&gt;Andy Harbick&lt;/a&gt;, Rosetta Stone&lt;/li&gt;
&lt;li&gt;&lt;a href="http://ttic.uchicago.edu/~tamir/Tamir_Hazans_web_page.html"&gt;Tamir Hazan&lt;/a&gt;, Toyota Technical Institute Chicago&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/shillion/"&gt;Steven Hillion&lt;/a&gt;, Alpine Data Labs&lt;/li&gt;
&lt;li&gt;Nilesh Jain, Intel Labs&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/leejones"&gt;Lee Jones&lt;/a&gt;, Cisco&lt;/li&gt;
&lt;li&gt;Nick Kolegraff, Rackspace&lt;/li&gt;
&lt;li&gt;&lt;a href="http://research.yahoo.com/Edo_Liberty"&gt;Edo Libery&lt;/a&gt;, Yahoo! Labs&lt;/li&gt;
&lt;li&gt;&lt;a href="http://radar.oreilly.com/ben"&gt;Ben Lorica&lt;/a&gt;, O'Reilly&lt;/li&gt;
&lt;li&gt;&lt;a href="http://cs.stanford.edu/people/mmahoney/"&gt;Michael Mahoney&lt;/a&gt;, Stanford&lt;/li&gt;
&lt;li&gt;&lt;a data-mce-href="http://es.linkedin.com/in/norbertmb" href="http://es.linkedin.com/in/norbertmb" style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; line-height: 18.99147605895996px;"&gt;Norbert Martinez&lt;/a&gt;&lt;span style="color: #333333; font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; line-height: 18.99147605895996px;"&gt;, Sparsity Technologies&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/charlesmartin14"&gt;Charles Martin&lt;/a&gt;, Gerson Lehman Group&lt;/li&gt;
&lt;li&gt;&lt;a href="http://people.csail.mit.edu/mirrokni/Welcome.html"&gt;Vahab Mirrokni&lt;/a&gt;, Google Research&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/ashfaq-munshi/27/382/93"&gt;Ash Munshi&lt;/a&gt;, Knobout Inc.&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/in/janneumann"&gt;Jan Neumann&lt;/a&gt;, Comcast&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/andrew-nystrom/3/7aa/496"&gt;Andrew Nystrom&lt;/a&gt;, Thomson Reuters&amp;nbsp;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.dama.upc.edu/the-team"&gt;Josep Lluís Larriba Pey&lt;/a&gt;, Univirsitat Politecnica De Catalunya&lt;/li&gt;
&lt;li&gt;&lt;a href="https://sites.google.com/site/nikolaosvasiloglouii/"&gt;Nikolaos Vasiloglou II&lt;/a&gt;, Ismion&lt;/li&gt;
&lt;li&gt;&lt;a href="http://paloalto.thlab.net/people/udi-weinsberg"&gt;Udi Weinsberg&lt;/a&gt;, Technicolor Labs&lt;/li&gt;
&lt;li&gt;&lt;a href="http://staff.psc.edu/welling/"&gt;Joel Welling&lt;/a&gt;, Pittsburgh Supercomputing Center&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/theodore-willke/7/860/83a"&gt;Ted Willke&lt;/a&gt;, Intel Labs&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/josh-wills/0/82b/138"&gt;Josh Wills&lt;/a&gt;, Cloudera&lt;/li&gt;
&lt;li&gt;&lt;a href="http://jovo.me/"&gt;Joshua Vogelstein&lt;/a&gt;, Duke&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/lei-tang/8/394/a54"&gt;Lei Tang&lt;/a&gt;, Walmart Labs&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.linkedin.com/pub/bryan-thompson/30/390/54b"&gt;Bryan Tompson&lt;/a&gt;, Systap&lt;/li&gt;
&lt;li&gt;Tao Ye, Pandora Internet Radio&lt;/li&gt;
&lt;li&gt;&lt;a href="http://tr.linkedin.com/in/nezihyigitbasi"&gt;Nezih Yigitbasi&lt;/a&gt;, Intel Labs&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
A preliminary list of our sponsors:&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;a href="http://www.logicblox.com/"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-wnzBxvkucYU/UUAAs_aFvJI/AAAAAAAA7Wg/9s0Ctm4WykA/s1600/logixblox.png" /&gt;&lt;/a&gt;&lt;a href="http://pipefish.com/" style="clear: right; display: inline !important; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-2TpwjReBT7Q/UUAAs7e25iI/AAAAAAAA7Wk/NRGP0yKsv9s/s1600/pipefish.png" /&gt;&lt;/a&gt;&amp;nbsp;&lt;a href="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/hiringsolvedlogo.png" imageanchor="1" style="clear: left; display: inline !important; margin-bottom: 1em; margin-right: 1em; text-align: center;"&gt;&lt;img border="0" src="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/hiringsolvedlogo.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;a href="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/lexisnexis.png" imageanchor="1" style="clear: left; display: inline !important; margin-bottom: 1em; margin-right: 1em; text-align: center;"&gt;&lt;img border="0" src="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/lexisnexis.png" /&gt;&lt;/a&gt;&lt;a data-mce-href="http://www.alpinedatalabs.com/" href="http://www.alpinedatalabs.com/" style="clear: left; display: inline !important; font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img alt="" class="alignnone" data-mce-src="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/alpinedatalabslogo.png" src="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/alpinedatalabslogo.png" style="border: 0px;" title="LexisNexis" width="158" /&gt;&lt;/a&gt;&lt;br /&gt;
&lt;div style="color: #333333; font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;"&gt;
&lt;a data-mce-href="http://www.technicolor.com/" href="http://www.technicolor.com/" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img alt="" class="alignnone" data-mce-src="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/logotechnicolorrvb.png" src="https://evbdn.eventbrite.com/s3-s3/eventlogos/38783/logotechnicolorrvb.png" style="border: 0px;" title="LexisNexis" width="158" /&gt;&lt;/a&gt;&lt;br /&gt;
&lt;a href="http://www.ldbc.eu/" style="background-color: white; border: 0px; color: #5ca9d6; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;" target="_parent"&gt;&lt;img alt="" border="0px" height="20px" scale="0" src="http://event.cwi.nl/grades2013/images/ldbc.gif" style="border: none; font-family: inherit; font-size: inherit; font-style: inherit; font-variant: inherit; height: auto; line-height: inherit; margin: 0px; max-width: 100%; padding: 0px; vertical-align: baseline;" /&gt;&lt;/a&gt;&lt;span style="background-color: white; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px;"&gt;&amp;nbsp;The GraphLab workshop is co-sponsored by the&amp;nbsp;&lt;/span&gt;&lt;a href="http://www.ldbc.eu/" style="background-color: white; border: 0px; color: #0085cf; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;Linked Data Benchmark Council (LDBC)&lt;/a&gt;&lt;span style="background-color: white; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px;"&gt;, a new EU FP7 project that aims to establish industry cooperation on graph database benchmarks, benchmark practices and benchmark results. A recommended event is the&amp;nbsp;&lt;/span&gt;&lt;a href="http://event.cwi.nl/grades2013/" style="background-color: white; border: 0px; color: #0085cf; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px; margin: 0px; outline: none; padding: 0px; text-decoration: none; vertical-align: baseline;"&gt;SIGMOD GRADES workshop&lt;/a&gt;&lt;span style="background-color: white; color: #555555; font-family: Helvetica, Helvetica, serif; font-size: 13.63636302947998px; line-height: 19.09090805053711px;"&gt;, June 23rd in NY.&lt;/span&gt;&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/ddmfdHusXak" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/1797506044205086540/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/02/the-2nd-graphlab-workshop-is-coming-up.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/1797506044205086540?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/1797506044205086540?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/ddmfdHusXak/the-2nd-graphlab-workshop-is-coming-up.html" title="The 2nd GraphLab workshop is coming up!" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-wnzBxvkucYU/UUAAs_aFvJI/AAAAAAAA7Wg/9s0Ctm4WykA/s72-c/logixblox.png" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/02/the-2nd-graphlab-workshop-is-coming-up.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEYGRXY9cSp7ImA9WhBQGE0.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-6050479144594283816</id><published>2013-03-20T12:02:00.000-07:00</published><updated>2013-03-20T12:02:04.869-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-20T12:02:04.869-07:00</app:edited><title>Twitter WTF (Who to Follow) Paper</title><content type="html">I got the following interesting paper from my collaborator &lt;a href="http://www.cs.cmu.edu/~akyrola/"&gt;Aapo Kyrola&lt;/a&gt;:&lt;br /&gt;
&lt;span style="background-color: white; color: #555555; font-family: Verdana, 'BitStream vera Sans', Tahoma, Helvetica, sans-serif; font-size: 12px; line-height: 17px;"&gt;Pankaj Gupta, Ashish Goel, Jimmy Lin, Aneesh Sharma, Dong Wang, and Reza Zadeh.&amp;nbsp;&lt;/span&gt;&lt;b style="background-color: white; color: #555555; font-family: Verdana, 'BitStream vera Sans', Tahoma, Helvetica, sans-serif; font-size: 12px; line-height: 17px;"&gt;&lt;a href="http://www2013.org/2013/02/21/practice-and-experience-track-accepted-papers/"&gt;WTF: The Who to Follow Service at Twitter&lt;/a&gt;.&lt;/b&gt;&lt;span style="background-color: white; color: #555555; font-family: Verdana, 'BitStream vera Sans', Tahoma, Helvetica, sans-serif; font-size: 12px; line-height: 17px;"&gt;&amp;nbsp;&lt;/span&gt;&lt;i style="background-color: white; color: #555555; font-family: Verdana, 'BitStream vera Sans', Tahoma, Helvetica, sans-serif; font-size: 12px; line-height: 17px;"&gt;Proceedings of the 22th International World Wide Web Conference (WWW 2013)&lt;/i&gt;&lt;span style="background-color: white; color: #555555; font-family: Verdana, 'BitStream vera Sans', Tahoma, Helvetica, sans-serif; font-size: 12px; line-height: 17px;"&gt;, May 2013, Rio de Janeiro, Brazil.&lt;/span&gt;&lt;br /&gt;
&lt;span style="background-color: white; color: #555555; font-family: Verdana, 'BitStream vera Sans', Tahoma, Helvetica, sans-serif; font-size: 12px; line-height: 17px;"&gt;&lt;br /&gt;&lt;/span&gt;

It details Twitter "Who to Follow" recommendation service. In a nutshell Twitter uses two&amp;nbsp;algorithms: an egocentric random walk (personalized pagerank) and &lt;a href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.130.3254"&gt;SALSA&lt;/a&gt; which is a bipartite random walk (similar to HITS algorithm). &lt;br /&gt;
&lt;br /&gt;
In terms of infrastructure they use Twitter Cassovary graph processing system, on top of a single multicore machine, which is rather surprising considering Twitter graph size. Anyway this shows that proper efficient implementation on a single multicore machine can scale to very large models.&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/JHzd-i26pPI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/6050479144594283816/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/03/twitter-wtf-who-to-follow-paper.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/6050479144594283816?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/6050479144594283816?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/JHzd-i26pPI/twitter-wtf-who-to-follow-paper.html" title="Twitter WTF (Who to Follow) Paper" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/03/twitter-wtf-who-to-follow-paper.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEQCSXs_fip7ImA9WhBQFUU.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-8945918520867512230</id><published>2013-03-17T21:52:00.002-07:00</published><updated>2013-03-17T21:52:48.546-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-17T21:52:48.546-07:00</app:edited><title>Spotlight: Large Scale Distributed Deep Networks</title><content type="html">I got from Liu from &lt;a href="http://www.tencent.com/en-us/index.shtml"&gt;Tencent&lt;/a&gt;, the following paper from Google:&lt;br /&gt;
Jeffrey Dean, Greg S. Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Quoc V. Le, Mark Z. Mao, Marc’Aurelio Ranzato, Andrew Senior, Paul Tucker, Ke Yang, and Andrew Y. Ng&amp;nbsp;&lt;a href="http://research.google.com/archive/large_deep_networks_nips2012.html"&gt;Large Scale Distributed Deep Networks&lt;/a&gt;,&amp;nbsp;NIPS 2012: Neural Information Processing Systems,
Lake Tahoe, Nevada, United States, December, 2012. &lt;br /&gt;
&lt;br /&gt;
It uses a distributed implementation of SGD/LBGFS for training &lt;a href="http://ufldl.stanford.edu/wiki/index.php/Deep_Networks:_Overview"&gt;deep networks&lt;/a&gt;. It is one of the largest ML deployments I have seen so far: up to 10K cores, 5K machines. In a nutshell they factorize the problem into regions, run SGD in each region&amp;nbsp;separately&amp;nbsp;and then use a&amp;nbsp;central&amp;nbsp;server to merge &amp;nbsp;the model from the different regions. They also support asynchronous computation of the different nodes.&lt;br /&gt;
&lt;br /&gt;
And they did not fail to mention GraphLab :-)&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
We considered a number of existing large-scale computational tools for application to our problem, MapReduce [24] and GraphLab [25] being notable examples. We concluded that MapReduce, designed for parallel data processing, was ill-suited for the iterative computations inherent in&amp;nbsp;deep network training; whereas GraphLab, designed for general (unstructured) graph computations,&amp;nbsp;would not exploit computing efﬁciencies available in the structured graphs typically found in deep&amp;nbsp;networks.&lt;/blockquote&gt;
&lt;br /&gt;
I am sure I got their meaning - if anyone knows let me know.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/StVC6rbIqMs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/8945918520867512230/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/03/spotlight-large-scale-distributed-deep.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/8945918520867512230?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/8945918520867512230?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/StVC6rbIqMs/spotlight-large-scale-distributed-deep.html" title="Spotlight: Large Scale Distributed Deep Networks" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/03/spotlight-large-scale-distributed-deep.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUAMR347eSp7ImA9WhBRFEU.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-318032619841803662</id><published>2013-03-05T03:42:00.001-08:00</published><updated>2013-03-05T03:43:06.001-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-05T03:43:06.001-08:00</app:edited><title>A nice collaborative filtering tutorial "for dummies"</title><content type="html">I got from &lt;a href="http://www.linkedin.com/pub/muhammad-burhan/14/3a5/b0"&gt;M. Burhan&lt;/a&gt;, one of our GraphChi users from Germany, the following link to an online book called: &lt;a href="http://guidetodatamining.com/"&gt;A Programmer's Guide to Data Mining&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
There are two relevant chapters that may help&amp;nbsp;beginners&amp;nbsp;understand the basic concepts.&lt;br /&gt;
The first one of them is &lt;a href="http://guidetodatamining.com/guide/ch2/DataMining-ch2.pdf"&gt;Chapter 2: Collaborative Filtering&amp;nbsp;&lt;/a&gt;&amp;nbsp;and &lt;a href="http://guidetodatamining.com/guide/ch3/DataMining-ch3.pdf"&gt;Chapter 3: Implicit Ratings and Item Based Filtering&lt;/a&gt;.&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/kd9lRY93huM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/318032619841803662/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/03/a-nice-collaborative-filtering-tutorial.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/318032619841803662?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/318032619841803662?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/kd9lRY93huM/a-nice-collaborative-filtering-tutorial.html" title="A nice collaborative filtering tutorial &quot;for dummies&quot;" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/03/a-nice-collaborative-filtering-tutorial.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUYGRX08cSp7ImA9WhBRFEU.&quot;"><id>tag:blogger.com,1999:blog-3211409948956809184.post-6515796090623875347</id><published>2013-03-05T00:21:00.000-08:00</published><updated>2013-03-05T03:32:04.379-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-05T03:32:04.379-08:00</app:edited><title>Intel Labs report on GraphLab vs. Mahout</title><content type="html">I have some very interesting news to report. I got from &lt;a href="http://tr.linkedin.com/in/nezihyigitbasi"&gt;Nezih Yigitbasi&lt;/a&gt;, Intel Labs, the following graph:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-nAkbofKuYSM/UTWqbRecf7I/AAAAAAAA7VU/90OFOYmigyQ/s1600/gl_vs_mahout2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="282" src="http://3.bp.blogspot.com/-nAkbofKuYSM/UTWqbRecf7I/AAAAAAAA7VU/90OFOYmigyQ/s640/gl_vs_mahout2.png" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
It compares Mahout vs. Distributed GraphLab on the popular task of matrix factorization using ALS algorithm (alternating least squares) on Netflix data. The&amp;nbsp;bottom line&amp;nbsp;is that GraphLab is about x20 faster than Mahout.&lt;br /&gt;
&lt;br /&gt;
And here is the exact experiment setup, I got from Nezih:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;N is the number of ALS iterations, D is the number of latent factors. The experiments have been conducted on a 16 node cluster.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;We start GL as  &lt;span style="font-family: Courier New, Courier, monospace; font-size: x-small;"&gt;mpirun -hostfile  ~/hostfile -x CLASSPATH ./als –ncpus=16 --matrix hdfs://host001:19000/user/netflix --D=$LATENT_FACTOR_COUNT --max_iter=$ITER_COUNT --lambda=0.065  --minval=0 --maxval=5&lt;/span&gt;&amp;nbsp;&lt;/li&gt;
&lt;li&gt;To run mahout ALS, we use the &lt;span style="font-family: Courier New, Courier, monospace;"&gt;factorize-netflix.sh&lt;/span&gt; script under the examples directory. It should be run as &lt;span style="font-size: x-small;"&gt;&lt;span style="font-family: Courier New, Courier, monospace;"&gt;./factorize-netflix.sh /path/to/training_set/ /path/to/qualifying.txt /path/to/judging.txt&lt;/span&gt;&amp;nbsp;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;In our test cluster we have 16 machines each with 64GB of memory, 2 CPUs (Intel(R) Xeon(R) CPU E5-2670 @ 2.60GHz [8 cores each]) and 4 x 1 TB HDDs. The machines communicate over a 10Gb Ethernet interconnect.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;The Netflix dataset has been splitted into 32 equally sized chunks and then put into HDFS.&lt;/li&gt;
&lt;/ul&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/sYXZE/~4/UNT3i44scB4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://bickson.blogspot.com/feeds/6515796090623875347/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://bickson.blogspot.com/2013/03/intel-labs-report-on-graphlab-vs-mahout.html#comment-form" title="14 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/6515796090623875347?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3211409948956809184/posts/default/6515796090623875347?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/sYXZE/~3/UNT3i44scB4/intel-labs-report-on-graphlab-vs-mahout.html" title="Intel Labs report on GraphLab vs. Mahout" /><author><name>Danny Bickson</name><uri>http://www.blogger.com/profile/01517237836051035400</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="26" height="32" src="http://2.bp.blogspot.com/_kZmYQwDb9sk/TT4CDLhhPyI/AAAAAAAAcpI/oeFkBmszvzE/s1600/bickson.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-nAkbofKuYSM/UTWqbRecf7I/AAAAAAAA7VU/90OFOYmigyQ/s72-c/gl_vs_mahout2.png" height="72" width="72" /><thr:total>14</thr:total><feedburner:origLink>http://bickson.blogspot.com/2013/03/intel-labs-report-on-graphlab-vs-mahout.html</feedburner:origLink></entry></feed>
