<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;CUMASHo-fSp7ImA9WhRUF04.&quot;"><id>tag:blogger.com,1999:blog-6555947</id><updated>2012-01-27T23:50:49.455-07:00</updated><category term="journals" /><category term="clustering" /><category term="deadline" /><category term="workshops" /><category term="icdm" /><category term="rajeev motwani" /><category term="barriers" /><category term="seminars" /><category term="jeff phillips" /><category term="books" /><category term="latex" /><category term="duality" /><category term="funding" /><category term="polymath" /><category term="fonts" /><category term="community" /><category term="polytopes" /><category term="game theory" /><category term="ecml-pkdd" /><category term="cs.DC" /><category term="quantum" /><category term="soda" /><category term="PPAD" /><category term="classification" /><category term="travel" /><category term="simons foundation" /><category term="combinatorial geometry" /><category term="xkcd" /><category term="madalgo" /><category term="society" /><category term="polymath research" /><category term="bibtex" /><category term="k-12" /><category term="video" /><category term="cs.CG" /><category term="social-networking" /><category term="fellowships" /><category term="acceptances" /><category term="p-vs-nc" /><category term="embarrassing" /><category term="p-vs-np" /><category term="cfp" /><category term="humor" /><category term="obituary" /><category term="expanders" /><category term="academy" /><category term="models" /><category term="acm" /><category term="IMA" /><category term="fwcg" /><category term="geometry" /><category term="beamer" /><category term="soda2011" /><category term="sdm2011" /><category term="software" /><category term="current-distance" /><category term="esa" /><category term="reviewing" /><category term="nsf" /><category term="topology" /><category term="nih" /><category term="dimacs" /><category term="conferences" /><category term="talks" /><category term="shape" /><category term="svn" /><category term="randomness" /><category term="theory.SE" /><category term="partha niyogi" /><category term="media" /><category term="potd" /><category term="ipe" /><category term="gct" /><category term="shonan" /><category term="jmm" /><category term="utah" /><category term="cricket" /><category term="memorial" /><category term="alenex" /><category term="cs.DS" /><category term="massive" /><category term="active-learning" /><category term="graphs" /><category term="eda" /><category term="complexity" /><category term="cs.LG" /><category term="arxiv" /><category term="socg-2010" /><category term="announcement" /><category term="cstheory" /><category term="metrics" /><category term="dimensionality-reduction" /><category term="DBR" /><category term="posters" /><category term="coding-theory" /><category term="kernels" /><category term="productivity" /><category term="conjecture" /><category term="stoc" /><category term="cs.CC" /><category term="teaching" /><category term="GIA" /><category term="cra" /><category term="empirical" /><category term="miscellaneous" /><category term="research" /><category term="personal" /><category term="distributions" /><category term="submissions" /><category term="programming" /><category term="wads" /><category term="focs2010" /><category term="streaming" /><category term="implementation" /><category term="multicore" /><category term="data-mining" /><category term="knuth" /><category term="alenex2011" /><category term="ICS" /><category term="graph minors" /><category term="deolalikar" /><category term="analco" /><category term="quant-ph" /><category term="publishing" /><category term="math.PR" /><category term="focs" /><category term="turing" /><category term="hirsch" /><category term="candes" /><category term="blogosphere" /><category term="jobs" /><category term="twitter" /><category term="surveys" /><category term="advising" /><category term="godel" /><category term="morris" /><category term="history" /><category term="awards" /><category term="parallelism" /><category term="career" /><category term="coffee" /><category term="writing" /><category term="socg" /><category term="large-data" /><category term="outreach" /><title>The Geomblog</title><subtitle type="html">Ruminations on computational geometry, algorithms, theoretical computer science and life</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://geomblog.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>1146</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/TheGeomblog" /><feedburner:info uri="thegeomblog" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><feedburner:browserFriendly>This is an XML content feed. It is intended to be viewed in a newsreader or syndicated to another site, subject to copyright and fair use.</feedburner:browserFriendly><entry gd:etag="W/&quot;DkcNRnw_eip7ImA9WhRUEEU.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-5761549743211952037</id><published>2012-01-20T11:28:00.000-07:00</published><updated>2012-01-20T11:28:17.242-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-20T11:28:17.242-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="workshops" /><category scheme="http://www.blogger.com/atom/ns#" term="math.PR" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="shonan" /><title>The Shonan Meeting (Part 3): Optimal Distributed Sampling</title><content type="html">Reservoir sampling is a beautiful gems of sampling: easy to explain, and almost magic in how it works. The setting is this:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
A stream of items passes by you, and your goal is to extract a sample of size $s$ that is uniform over all the elements you've seen so far. &lt;/blockquote&gt;
The technique works as follows: if you're currently examining the i-th element, select it with probability 1/i, and then pick an element uniformly from the current sample to be replaced by it. &lt;a href="http://geomblog.blogspot.com/2008/01/happy-birthday-don-knuth.html"&gt;I've talked about this result before&lt;/a&gt;, including three different ways of proving it. &lt;br /&gt;
&lt;br /&gt;
But what happens if you're in a &lt;a href="http://geomblog.blogspot.com/2012/01/shonan-meeting-part-1-in-beginning.html"&gt;continuous distributed setting&lt;/a&gt; ? Now each of $k$ players is reading a stream of items, and they all talk to a coordinator who wishes to maintain a random sample of the union of streams. Let's assume for now that $s \le k$&lt;br /&gt;
&lt;br /&gt;
Each player can run the above protocol and send an item to the coordinator, and the coordinator can pick a random subset from these. But this won't work ! At least, not unless each player has read in exactly the same amount of data as each other player. This is because we need to weight the sample element sent by a player with the number of elements that player has read.&lt;br /&gt;
&lt;br /&gt;
It's not hard to see that each player sends roughly log n messages to the coordinator for a stream of length n. So maybe each player also annotates the element with the number of elements it has seen so far. This sort of works, but the counts could be off significantly, since a stream that doesn't send a sample might have read many more elements since the previous time it sent an update.&lt;br /&gt;
&lt;br /&gt;
This can be fixed by having each player send an extra control message when its stream increases in size by a factor of 2, and that would not change the asymptotic complexity of the process, but we still don't get a truly uniform sample.&lt;br /&gt;
&lt;br /&gt;
The problem with this approach is that it's trying to get around knowing $n$, the size of the stream, which is expensive to communicate in a distributed setting. So can we revisit the original reservoir method&amp;nbsp; in a 'communication friendly way' ?&lt;br /&gt;
&lt;br /&gt;
Let's design a new strategy for reservoir sampling that works as follows.&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Maintain a current "threshold" t. When a new item arrives, assign it a random value r between 0 and 1. If r &amp;lt; t, keep the new item and set t = r, else discard it. &lt;/blockquote&gt;
By using the principle of deferred decisions, you can convince yourself that this does exactly the same thing as the previous strategy (because at step i, the probability of the current element being retained is its probability of r being the minimum over the set seen so far, which is 1/i). the good thing is that this approach doesn't need to know how many elements have passed so far.&lt;br /&gt;
&lt;br /&gt;
This approach can be extended almost immediately to the distributed setting. Each player now runs this protocol instead of the previous one, and every time the coordinate gets an update, it sends out a new global threshold (the minimum over all thresholds sent in) to all nodes. If you want to maintain a sample of size $s$, the coordinator keeps $s$ of the $k$ elements sent in, and the overall complexity is $O(ks \log n)$.&lt;br /&gt;
&lt;br /&gt;
But you can do even better.&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Now, each player maintains its own threshold. The coordinator doesn't broadcast the "correct" threshold &lt;i&gt;until a player sends an element whose random value is above the global threshold&lt;/i&gt;. This tells the coordinator that the player had the wrong threshold, and it then updates that player (and only that player)&lt;/blockquote&gt;
Analyzing this approach takes a little more work, but the resulting bound is much better:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
The (expected) amount of communication is $O(k \frac{\log (n/s)}{\log (k/s)})$&lt;/blockquote&gt;
What's even more impressive: this is optimal ! &lt;br /&gt;
&lt;br /&gt;
This last algorithm and the lower bound, were &lt;a href="http://home.engineering.iastate.edu/%7Esnt/pubs/disc2011-talk.pdf"&gt;presented&lt;/a&gt; by Srikanta Tirthapura at the Shonan meeting, based on his &lt;a href="http://home.engineering.iastate.edu/%7Esnt/pubs/disc2011.pdf"&gt;DISC 2011 work with David Woodruff&lt;/a&gt;. Key elements of this result (including a broadcast-the-threshold variant of the upper bound) also appeared in a &lt;a href="http://www.cs.ust.hk/%7Eyike/pods10-cdsample.pdf"&gt;PODS 2010 paper&lt;/a&gt; by Muthu, Graham Cormode, Kevin Yi and Qin Zhang. The optimal lower bound is new, and rather neat.&amp;nbsp;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-5761549743211952037?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/KNglX2tue75zCKx0iPzqYLl5_Ic/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KNglX2tue75zCKx0iPzqYLl5_Ic/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/KNglX2tue75zCKx0iPzqYLl5_Ic/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KNglX2tue75zCKx0iPzqYLl5_Ic/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=aYY9tW-3v7U:koLHSKqMXXQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=aYY9tW-3v7U:koLHSKqMXXQ:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/aYY9tW-3v7U" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/5761549743211952037/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=5761549743211952037" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/5761549743211952037?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/5761549743211952037?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/aYY9tW-3v7U/shonan-meeting-part-3-optimal.html" title="The Shonan Meeting (Part 3): Optimal Distributed Sampling" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/shonan-meeting-part-3-optimal.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUIBQX08eip7ImA9WhRUEEw.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1023341175216786054</id><published>2012-01-19T14:11:00.001-07:00</published><updated>2012-01-19T16:59:10.372-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-19T16:59:10.372-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><category scheme="http://www.blogger.com/atom/ns#" term="community" /><category scheme="http://www.blogger.com/atom/ns#" term="conferences" /><title>SODA Review II: The business meeting</title><content type="html">&lt;i&gt;Jeff Phillips posts a roundup of the SODA business meeting. Hawaii !!!!!! &lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
I thought I would also post a few notes on the SODA business meeting. I am sure I missed some details, but here are the main points. &lt;br /&gt;
&lt;br /&gt;
Everyone thought the organization of the conference was excellent (so far). 
The part in parenthesis is a joke by Kazuo Iwama towards his students - I guess that is Japanese humor, and encouragement.&lt;br /&gt;
&lt;br /&gt;
Despite being outside of North America for the first time, the attendance was quite high, I think around 350 people. And the splits were almost exactly 1/3 NA, 1/3 Europe, 1/3 Asia.&lt;br /&gt;
&lt;br /&gt;
Yuval Rabani talked about being PC for the conference. He said there were the most submissions ever, most accepted paper ever, and largest PC ever for SODA. Each PC member reviewers about 49 papers, and over 500 were sub-reviewed. We all thank Yuval for all of his hard work.&lt;br /&gt;
&lt;br /&gt;
We voted next on location for 2014 (2013 is in New Orleans). The final votes were down to Honolulu, HI and Washington DC, where Honolulu won about 60 to 49. David Johnson said they would try to book a hotel in Honolulu if he could get hotel prices below 200 USD/night. A quick look on kayak.com made it appear several large hotels could be booked next year around this time for about 160 USD/night or so. 

Otherwise it will be in DC where the theory group at UMaryland (via David Mount) have stated they would help with local arrangements. They did a great job with SoCG a few years ago, but I heard many suggestions that it be held more downtown than by UofM campus. And also there were requests for good weather. We'll see what happens...&lt;br /&gt;
&lt;br /&gt;
Finally, there was a discussion about how SODA is organized/governed. This discussion got quite lively. Bob Sedgewick led the discussion by providing a short series of slides outlining a rough plan for a "confederated SODA." &lt;a href="http://www.cs.utah.edu/%7Esuresh/blog/soda12-sedgewick.pdf"&gt;I have linked to his slides&lt;/a&gt;. This could mean several things, for instance:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt; Having ALENEX and ANALCO (and SODA) talks spread out over 4 days and intermixed even possibly in the same session (much like ESA).&lt;/li&gt;
&lt;li&gt; 
 The PCs would stay separate most likely (although merging them was discussed, but this had less support).&amp;nbsp; &lt;/li&gt;
&lt;li&gt;
 For SODA the PC could be made more hierarchical where there are, say, 6 main area chairs. Then each area chair supervises say 12 or so PC members. The general chair would coordinate and normalize all of the reviews, but otherwise it would be more hierarchical and partitioned. Then PC members in each area would have fewer papers to review, and could submit to other subareas even.&amp;nbsp; &lt;/li&gt;
&lt;li&gt;There was also a suggestion that PC chairs / steering committee members have some SODA attendance requirements. (Currently it consists of David Johnson, 2 people appointed by SIAM, and the past two PC chairs - as I think I understand. David Johnson said he would provide a link to the official SODA bylaws somewhere.).&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
Anyways, there was a lot of discussion that was boiled down to 3 votes (I will try to paraphrase, all vote totals approximate):&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;
 Should the steering committee consider spreading ALENEX, ANALCO, and SODA talks over 4 days? About 50 to 7 in favor.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Should the steering committee consider/explore some variant of the Confederated SODA model? About 50 to 2 in favor. &lt;/li&gt;
&lt;li&gt;
 Should the steering committee consider making the steering committee members elected? About 50 to 1 in favor.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
There were about 100+ votes for location, so usually about half the crowd abstained for all votes. 


There were various arguments on either side of the positions. And other suggestions. Some people had very strong and well-argued sides of these discussion points, so I don't want to try to paraphrase (and probably get something nuanced wrong), but I encourage people to post opinions and ideas in the comments.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-1023341175216786054?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/K9tw9bZYlV2FgCzEGsE-ZYkpF0M/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/K9tw9bZYlV2FgCzEGsE-ZYkpF0M/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/K9tw9bZYlV2FgCzEGsE-ZYkpF0M/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/K9tw9bZYlV2FgCzEGsE-ZYkpF0M/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=Xe_3vF1-FRw:51zaIGW8flA:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=Xe_3vF1-FRw:51zaIGW8flA:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/Xe_3vF1-FRw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1023341175216786054/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1023341175216786054" title="12 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1023341175216786054?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1023341175216786054?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/Xe_3vF1-FRw/soda-review-ii-business-meeting.html" title="SODA Review II: The business meeting" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>12</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/soda-review-ii-business-meeting.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8MSH07cSp7ImA9WhRVGUg.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-8302483164016662556</id><published>2012-01-18T23:18:00.000-07:00</published><updated>2012-01-18T23:18:09.309-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-18T23:18:09.309-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><category scheme="http://www.blogger.com/atom/ns#" term="conferences" /><title>SODA review I: Talks, talks and more talks.</title><content type="html">&lt;i&gt;I asked Jeff Phillips (a regular contributor) if he'd do some conference posting for those of us unable to make it to SODA. Here's the first of his two missives&lt;/i&gt;. &lt;br /&gt;
&lt;br /&gt;
Suresh requested I write a conference report for SODA.  I never know how to write these reports since I always feel like I must have left out some nice talk/paper and then I risk offending people.  The fact is, there are 3 parallel sessions, and I can't pay close attention to talks for 3 days straight, especially after spending the previous week at the Shonan meeting that Suresh has been blogging about.&lt;br /&gt;
&lt;br /&gt;
Perhaps, it is apt to contrast it with the Shonan meeting.  At Shonan there were many talks (often informal with much back and forth) on topics very well clustered on "Large-scale Distributed Computation".  There were several talks earlier in the workshop that just overlaid the main techniques that have become quite powerful within an area, and then there were new talks on recent breakthroughs.  But although we mixed up the ordering of subtopics a bit, there was never that far of a context switch, and you could see larger views coalescing in people's minds throughout the week.&lt;br /&gt;
&lt;br /&gt;
At SODA, the spectrum is much more diverse - probably the most diverse conference on the theoretical end of computer science.  The great thing is that I get to see colleagues across a much broader spectrum of areas.  But the talks are often a bit more specific, and despite having usually fairly coherent sessions, the context switches are typically quite a bit larger and it seems harder to stay focused enough to really get at the heart at what is in each talk.  Really getting the point requires both paying attention and being in the correct mind set to start with.  Also, there are not too many talks in my areas of interest (i.e. geometry, big data algorithmics).&lt;br /&gt;
&lt;br /&gt;
&lt;hr /&gt;
&lt;br /&gt;
So then what is there to report.  I've spent most of my time in the hallways, catching up on gossip (which either is personal, or I probably shouldn't blog about without tenure - or even with tenure), or discussing on-going or new research problems with friends (again not yet ready for a blog).  
And of the talks I saw, I generally captured vague notions or concepts.  Usually stored away for when I think about a related problem and I need to make a similar connection, or look up a technique in the paper.  And, although, I was given a CD of the proceedings, but my laptop has not CD drive.  For the reasons discussed above, I rarely completely get how something works from a short conference talk.  Here are some example snip-its of what I took away from a few talks:&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://siam.omnibooksonline.com/2012SODA/data/papers/415.pdf#page=1%20"&gt;Private Data Release Via Learning Thresholds&lt;/a&gt; | Moritz Hardt, Guy Rothblum, Rocco A. Servedio.&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Take-away : There is a deep connection between PAC learning and differential privacy.  Some results from one can be applied to the other, but perhaps many others can be as well.&lt;/blockquote&gt;
&lt;a href="http://siam.omnibooksonline.com/2012SODA/data/papers/353.pdf#page=1%20"&gt;Submatrix Maximum Queries in Monge Matrices and Monge Partial Matrices, and Their Applications&lt;/a&gt; | Haim Kaplan, Shay Mozes, Yahav Nussbaum and Micha Sharir&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Take-away: There is a cool &lt;a href="http://en.wikipedia.org/wiki/Monge_array"&gt;"Monge" property&lt;/a&gt; that matrices can have which makes many subset query operations more efficient.  This can be thought of as each row represents a pseudo-line.  Looks useful for matrix problems where the geometric intuition about what the columns mean is relevant.&amp;nbsp;&lt;/blockquote&gt;
&lt;a href="http://siam.omnibooksonline.com/2012SODA/data/papers/017.pdf#page=1%20"&gt;Analyzing Graph Structure Via Linear Measurements&lt;/a&gt; | Kook Jin Ahn, Sudipto Guha, Andrew McGregor&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Take-away : They presented a very cool linear sketch for graphs.  This allows several graph problems to be solved under a streaming (or similar models) in the way usually more abstract, if not geometric, data is. (&lt;i&gt;ed: see &lt;a href="http://geomblog.blogspot.com/2012/01/shonan-meeting-part-4-talks-review-i.html"&gt;my note on Andrew's talk&lt;/a&gt; at Shonan&lt;/i&gt;)&lt;/blockquote&gt;
&lt;br /&gt;
&lt;a href="http://siam.omnibooksonline.com/2012SODA/data/papers/366.pdf#page=1"&gt;Lsh-Preserving Functions and Their Applications&lt;/a&gt; | Flavio Chierichetti, Ravi Kumar&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Take-away: They present a nice characterization of what sorts of similarities (based on combinatorial sets), showing which ones can and cannot be used within a LSH framework.  There techniques seemed to be a bit more general that for these discrete similarities over sets, so if need to use this for another similarity may be good to check out in more detail.&amp;nbsp; &lt;/blockquote&gt;
&lt;br /&gt;
&lt;a href="http://siam.omnibooksonline.com/2012SODA/data/papers/562.pdf#page=1%20"&gt;Data Reduction for Weighted and Outlier-Resistant Clustering&lt;/a&gt; | Dan Feldman, Leonard Schulman&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Take-away: They continue to develop the understanding on what can be done for core sets using sensitivity-based analysis.  This helps outline not just what functions can be approximated with subsets as proxy, but also how the distribution of points affects these results.  The previous talk by Xin Xiao (with Kasturi Varadarajan on A Near-Linear Algorithm for Projective Clustering Integer Points) also used these concepts.&amp;nbsp;&lt;/blockquote&gt;
&lt;br /&gt;
There were many other very nice results and talks that I also enjoyed, but the take-away was often even less interesting to blog about.  Or sometimes they just made more progress towards closing a specific subarea.  I am not sure how others use a conference, but if you are preparing your talk, you might consider trying to build a clear concise take-away message into your talk so that people like me with finite attention spans can remember something precise out of it.  And so people like me are most likely to look more carefully at the paper the next time we work on a related problem.    
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-8302483164016662556?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/KV7T7B9Dm9WYid9v3SzkU7XZDRc/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KV7T7B9Dm9WYid9v3SzkU7XZDRc/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/KV7T7B9Dm9WYid9v3SzkU7XZDRc/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KV7T7B9Dm9WYid9v3SzkU7XZDRc/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=jm-lKCm8W-8:7H3eR2conEo:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=jm-lKCm8W-8:7H3eR2conEo:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/jm-lKCm8W-8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/8302483164016662556/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=8302483164016662556" title="8 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8302483164016662556?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8302483164016662556?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/jm-lKCm8W-8/soda-review-i-talks-talks-and-more.html" title="SODA review I: Talks, talks and more talks." /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>8</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/soda-review-i-talks-talks-and-more.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEYFSX06eSp7ImA9WhRVGEo.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-5487424816381617021</id><published>2012-01-18T01:41:00.000-07:00</published><updated>2012-01-18T01:41:58.311-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-18T01:41:58.311-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="workshops" /><category scheme="http://www.blogger.com/atom/ns#" term="clustering" /><title>Upcoming deadline: Multiclust 2012</title><content type="html">One of the posts queued up in the &lt;a href="http://geomblog.blogspot.com/p/conceptual-view-of-clustering.html"&gt;clustering series&lt;/a&gt; is a note on 'metaclustering', which starts with the idea that instead of looking for one good clustering, we should be trying to explore the space of clusterings obtained through different methods and pick out good solutions informed by the landscape of answers. While the concept has been around a while, the area has attracted much more interest in recent years, with two workshops on the topic at recent data mining conferences.&lt;br /&gt;
&lt;br /&gt;
I'm involved with the organization of &lt;a href="http://www.dbs.ifi.lmu.de/research/MultiClust2012/"&gt;the latest incarnation&lt;/a&gt;, to be held in conjunction with SDM 2012 in April. If you have&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
unpublished original research papers (of upto 8 pages) that are not under review 
elsewhere, vision papers and descriptions of work-in-progress
 or case studies on benchmark data as short paper submissions of up to 4
 pages&lt;/blockquote&gt;
then you should &lt;a href="http://www.easychair.org/conferences/?conf=multiclust2012"&gt;submit it&lt;/a&gt; ! The deadline is Jan 25.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-5487424816381617021?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/3toHIo1jZH3PVhvFrWHk9Tjkblw/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/3toHIo1jZH3PVhvFrWHk9Tjkblw/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/3toHIo1jZH3PVhvFrWHk9Tjkblw/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/3toHIo1jZH3PVhvFrWHk9Tjkblw/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=vYr_JAPHnpU:-hz84FzrflY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=vYr_JAPHnpU:-hz84FzrflY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/vYr_JAPHnpU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/5487424816381617021/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=5487424816381617021" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/5487424816381617021?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/5487424816381617021?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/vYr_JAPHnpU/upcoming-deadline-multiclust-2012.html" title="Upcoming deadline: Multiclust 2012" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/upcoming-deadline-multiclust-2012.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkAEQXg4fCp7ImA9WhRVGU0.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-3252004892643496546</id><published>2012-01-17T16:20:00.001-07:00</published><updated>2012-01-18T10:45:00.634-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-18T10:45:00.634-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="large-data" /><category scheme="http://www.blogger.com/atom/ns#" term="workshops" /><category scheme="http://www.blogger.com/atom/ns#" term="shonan" /><title>The Shonan Meeting (Part 2): Talks review I</title><content type="html">&lt;i&gt;I missed one whole day of the workshop because of classes, and also missed a half day because of an intense burst of slide-making. While I wouldn't apologize for missing talks at a conference, it feels worse to miss them at a small focused workshop. At any rate, the usual disclaimers apply: omissions are not due to my not liking a presentation, but because of having nothing even remotely intelligent to say about it&lt;/i&gt;. &lt;br /&gt;
&lt;br /&gt;
Jeff Phillips led off with his work on &lt;a href="http://www.nii.ac.jp/shonan/seminar011/2011/01/06/abstracts/#phillips"&gt;mergeable summaries&lt;/a&gt;. The idea is that you have a distributed collection of nodes, each with their own data. The goal is to compute some kind of summary from all the nodes, with the caveat that each node only transmits a fixed size summary to other nodes (or the parent in an implied hierarchy). What's tricky about this is keeping the error down. It's easy to see for example that $\epsilon$-samples compose - you could take two $\epsilon$-samples and take an $\epsilon$-sample of that, giving you a $2\epsilon$-sample over the union. But you want to keep the error fixed AND the size the sample fixed. He showed a number of summary structures that could be maintained in this mergeable fashion, and there are a number of interesting questions that remain open, including how to do clustering in a mergeable way.&lt;br /&gt;
&lt;br /&gt;
In the light of &lt;a href="http://geomblog.blogspot.com/2012/01/shonan-meeting-part-1-in-beginning.html"&gt;what I talked about earlier&lt;/a&gt;, you could think of the 'mergeable' model as a restricted kind of distributed computation, where the topology is fixed, and messages are fixed size. The topology is a key aspect, because nodes don't encounter data more than once. This is good, because otherwise the lack of idempotence of some of the operators could be a problem: indeed, it would be interesting to see how to deal with non-idempotent summaries in a truly distributed fashion.&lt;br /&gt;
&lt;br /&gt;
Andrew McGregor talked about graph sketching problems (sorry, no abstract yet). One neat aspect of his work is that in order to build sketches for graph connectivity, he uses a vertex-edge representation that essentially looks like the cycle-basis vector in the 1-skeleton of a simplicial complex, and exploits the homology structure to compute the connected components (aka $\beta_0$). He also uses the &lt;a href="http://en.wikipedia.org/wiki/Bipartite_double_cover"&gt;bipartite double cover trick&lt;/a&gt; to reduce bipartiteness testing to connected component computation. It's kind of neat to see topological methods show up in a useful way in these settings, and his approach probably extends to other homological primitives. &lt;br /&gt;
&lt;br /&gt;
Donatella Firmani and Luigi Laura talked about different aspects of graph sketching and MapReduce, studying core problems like the MST and bi/triconnectivity. Donatella's talk in particular had a detailed experimental study of various MR implementations for these problems, and had interesting (but preliminary) observations about tradeoff between the number of reducers and the amount of communication needed.&lt;br /&gt;
&lt;br /&gt;
This theme was explored further by &lt;a href="http://www.nii.ac.jp/shonan/seminar011/2011/01/06/abstracts/#ullman"&gt;Jeff Ullman in his talk on one-pass MR algorithms &lt;/a&gt;(&lt;i&gt;the actual talk title was slightly different, since the unwritten rule at the workshop was to change the name of the title from the official listing&lt;/i&gt;). Again, his argument was that one should be combining both the communication cost and the overall computation cost. A particularly neat aspect of his work was showing (for the problem of finding a particular shaped subgraph in a given large graph) when there was an efficient one-pass MR algorithm, given the existence of a serial algorithm for the same problem. He called such algorithms &lt;a href="http://ilpubs.stanford.edu:8090/1020/"&gt;convertible algorithms&lt;/a&gt;: one result type is that if there's an algorithm running in time $n^\alpha m^\beta$ for finding a particular subgraph of size $s$, and $s \le \alpha + 2\beta$, then there's an efficient MR algorithm for the problem (in the sense of total computation time being comparable to the serial algorithm).&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-3252004892643496546?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/-Qx_1Wiz2qLx3_J-pQ7FYCp53pA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/-Qx_1Wiz2qLx3_J-pQ7FYCp53pA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/-Qx_1Wiz2qLx3_J-pQ7FYCp53pA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/-Qx_1Wiz2qLx3_J-pQ7FYCp53pA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=idSxo5ckp68:fDTsrH5x4Uk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=idSxo5ckp68:fDTsrH5x4Uk:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/idSxo5ckp68" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/3252004892643496546/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=3252004892643496546" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3252004892643496546?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3252004892643496546?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/idSxo5ckp68/shonan-meeting-part-4-talks-review-i.html" title="The Shonan Meeting (Part 2): Talks review I" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/shonan-meeting-part-4-talks-review-i.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEQNRng5cSp7ImA9WhRVGEg.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1253768320698732310</id><published>2012-01-17T16:06:00.001-07:00</published><updated>2012-01-17T20:13:17.629-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-17T20:13:17.629-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="large-data" /><category scheme="http://www.blogger.com/atom/ns#" term="workshops" /><category scheme="http://www.blogger.com/atom/ns#" term="shonan" /><category scheme="http://www.blogger.com/atom/ns#" term="models" /><title>The Shonan Meeting (Part 1): In the beginning, there was a disk...</title><content type="html">&lt;i&gt;What follows is a personal view of the evolution of large-data models. This is not necessarily chronological, or even reflective of reality, but it's a retroactive take on the field, inspired by listening to talks at the Shonan meeting. &lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Arguably, the first formal &lt;i&gt;&lt;b&gt;algorithmic &lt;/b&gt;&lt;/i&gt;engagement with large data&amp;nbsp; was the &lt;a href="http://dl.acm.org/citation.cfm?doid=48529.48535"&gt;Aggarwal-Vitter external memory model from 1988&lt;/a&gt;. The idea was simple enough: accessing an arbitrary element of disk was orders of magnitude more expensive than accessing an element of main memory, so let's ignore main memory access and charge a single unit for accessing a block of disk. &lt;br /&gt;
&lt;br /&gt;
The external memory model was (and is still) a very effective model of disk access. It wasn't just a good guide to thinking about algorithm design, it also encouraged design strategies that were borne out well in practice. One could prove that natural-sounding buffering strategies were in fact optimal, and that prioritizing sequential scans as far as possible (even to the extent of preparing data for sequential scans) was more efficient. Nothing earth-shattering, but a model that guides (and conforms to) proper practice is always a good one. &lt;br /&gt;
&lt;br /&gt;
Two independent directions spawned off from the external memory model. One direction was to extend the hierarchy. Why stop at one main memory level when we have multilevel caches&amp;nbsp; ? A simple extension to handle caches is tricky, because the access time differential between caches and main memory isn't sufficient to justify the idealized "0-1" model that the EM model used. But throwing in another twist - I don't actually know the correct block size for transfer of data between hierarchy levels - led us to &lt;a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.139.4254&amp;amp;rep=rep1&amp;amp;type=pdf"&gt;cache-obliviousness&lt;/a&gt;. &lt;br /&gt;
&lt;br /&gt;
I can't say for sure whether the cache-oblivious model speaks to the practice of programming with caches as effectively as the EM model. Being aware of your cache can bring significant benefits in principle. But the design principles ("repeated divide and conquer, and emphasizing locality of access") are sound, and there's already at least one company (&lt;a href="http://www.tokutek.com/"&gt;Tokutek&lt;/a&gt;, founded by Martin Farach-Colton, &lt;strike&gt;and &lt;/strike&gt;Michael Bender and Bradley Kuszmaul) that is capitalizing on the performance yielded by cache-oblivious data structures. &lt;br /&gt;
&lt;br /&gt;
The other direction was to weaken the power of the model. Since a sequential scan was so much more efficient than random access to disk, a natural question was to ask what you could do with just one scan. And thus was born the streaming model, which is by far the most successful model for large-data to date, with theoretical depth and immense practical value. &lt;br /&gt;
&lt;br /&gt;
What we've been seeing over the past few years is the evolution of the streaming model to capture ever more complex data processing scenarios and communication frameworks. &lt;br /&gt;
&lt;br /&gt;
It's quite useful to think of a stream algorithm as "communicating" a limited amount of information (the working memory) from the first half of the stream to the second. Indeed, this view is the basis for communication-complexity-based lower bounds for stream algorithms. &lt;br /&gt;
&lt;br /&gt;
But if we think of this as an algorithmic principle, we then get into the realm of a distributed computation, where one player possesss the "first half" of the data, the other player has "the second half" and the goal is for them to exchange a small number of bits with each other in order to compute something (while streaming is one-way communication, a multi-pass streaming algorithm is a two-way communication). &lt;br /&gt;
&lt;br /&gt;
Of course, there's nothing saying that we only have two players. This gets you to the $k$-player setup for a distributed computation, in which you wish to minimize the amount of communication exchanged as part of a computation. This model is of course not new at all ! It's exactly the distributed computing model pioneered in the 80s and 90s and has a natural home at PODC. What appears to be different in its new uses is that the questions being asked are not the old classics like leader election or byzantine agreement, but statistical estimation on large data sets. In other words, the reason to limit communication is because of the need to process a large data set, rather than the need to merely coordinate. It's a fine distinction, and I'm not sure I entirely believe it myself :)&lt;br /&gt;
&lt;br /&gt;
There are many questions about how to compute various objects in a distributed setting: of course the current motivation is to do with distributed data centers, sensor networks, and even different cores on a computer. Because of the focus on data analysis, there are sometimes surprising results that you can prove. For example, &lt;a href="http://www.cs.ust.hk/%7Eyike/sigmod11-senquan.pdf"&gt;a recent SIGMOD paper by&lt;/a&gt; Zengfeng Huang, Lu Wang, Ke Yi, 
and Yunhao Liu shows that if you want to do quantile estimation, you only need communication that's sublinear in the number of players ! The trick here is that you don't need to have very careful error bounds on the estimates at each player before sending up the summary to a coordinator. &lt;br /&gt;
&lt;br /&gt;
It's also quite interesting to think about distributed learning problems, where the information being exchanged is specifically in order to build a good model for whatever task you're trying to learn. S&lt;a href="http://www.cs.utah.edu/%7Esuresh/web/2011/12/12/protocols-for-learning-classifiers-on-distributed-data/"&gt;ome recent work that I have together with Jeff Phillips, Hal Daume and my student Avishek Saha&lt;/a&gt; explores the communication complexity of doing classification in such a setting. &lt;br /&gt;
&lt;br /&gt;
An even more interesting twist on the distributed setting is the so-called 'continuous streaming' setting. Here, you don't just have a one-shot communication problem. Each player receives a stream of data, and now the challenge is not just to communicate a few bits of information to solve a problem, but to update the information appropriately as new input comes in. Think of this as streaming with windows, or a dynamic version of the basic distributed setting. &lt;br /&gt;
&lt;br /&gt;
Here too, there are a number of interesting results, a beautiful new sampling trick that I'll talk about next, and some lower bounds. &lt;br /&gt;
&lt;br /&gt;
I haven't even got to MapReduce yet, and how it fits in: while you're waiting, you might want to &lt;a href="http://www.cs.utah.edu/%7Esuresh/web/2011/12/12/protocols-for-learning-classifiers-on-distributed-data/"&gt;revisit this post&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-1253768320698732310?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/AiPL2ev8NPjjV6jckVdUDtQui1g/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/AiPL2ev8NPjjV6jckVdUDtQui1g/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/AiPL2ev8NPjjV6jckVdUDtQui1g/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/AiPL2ev8NPjjV6jckVdUDtQui1g/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=4gIOs9_GZVc:E_wpiRhlGNY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=4gIOs9_GZVc:E_wpiRhlGNY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/4gIOs9_GZVc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1253768320698732310/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1253768320698732310" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1253768320698732310?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1253768320698732310?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/4gIOs9_GZVc/shonan-meeting-part-1-in-beginning.html" title="The Shonan Meeting (Part 1): In the beginning, there was a disk..." /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>4</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/shonan-meeting-part-1-in-beginning.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkYCRH8zeSp7ImA9WhRVFkQ.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-378361863403906326</id><published>2012-01-15T22:02:00.000-07:00</published><updated>2012-01-15T22:02:45.181-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-15T22:02:45.181-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="large-data" /><category scheme="http://www.blogger.com/atom/ns#" term="workshops" /><category scheme="http://www.blogger.com/atom/ns#" term="shonan" /><title>The Shonan Meeting (Part 0): On Workshops</title><content type="html">Coming up with new ideas requires concentration and immersion. When you spend enough unbroken time thinking about a problem, you start forming connections between thoughts, and eventually you get a giant "connected component" that's an actual idea. &lt;br /&gt;&lt;br /&gt;Distractions, even technical ones, kill this process. And this is why even at a focused theory conference, I don't reach that level of &lt;a href="http://en.wikipedia.org/wiki/Flow_%28psychology%29"&gt;"flow"&lt;/a&gt;. While I'm bombarded from all directions by interesting theory, there's a lot of context switching. Look, a new TSP result ! origami and folding - fun ! Who'd have thought of analyzing Jenga ! did&amp;nbsp; someone really prove that superlinear epsilon net lower bound ? &lt;br /&gt;&lt;br /&gt;This is why focused workshops are so effective. You get bombarded with information for sure, but each piece reinforces aspects of the overall theme if it's done well. Slowly, over the course of the event, a bigger picture starts emerging, connections start being made, and you can feel the buzz of new ideas. &lt;br /&gt;&lt;br /&gt;And this is why the trend of 'conferencizing' workshops, that &lt;a href="http://cacm.acm.org/magazines/2011/1/103228-where-have-all-the-workshops-gone/fulltext"&gt;Moshe Vardi lamented recently&lt;/a&gt;, is so pernicious. it's another example of perverse incentives ("conferences count more than workshops for academic review, and so let's redefine a workshop as a conference"). A good workshop (with submitted papers or otherwise) provides focus and intensity, and good things come of it. A workshop that's really just a miniconference doesn't have either the intense intimacy of a true workshop or the quality of a larger symposium.&lt;br /&gt;&lt;br /&gt;All of this is a very roundabout way of congratulating Muthu, Graham Cormode and Ke Yi (ed: &lt;i&gt;Can we just declare that Muthu has reached exalted one-word status, like Madonna and Adele ? I can't imagine anyone in the theory community hearing the name 'Muthu' and not knowing who that is&lt;/i&gt;) for putting on a &lt;a href="http://www.nii.ac.jp/shonan/blog/2011/04/11/large-scale-distributed-computation/"&gt;fantastic workshop on Large-Scale Distributed Computing&lt;/a&gt; at the Shonan Village Center (the Japanese Dagstuhl, if you will). There was reinforcement, intensity, the buzz of new ideas, and table tennis ! There was also the abomination of&amp;nbsp; fish-flavored cheese sticks, of which nothing more will be said. &lt;br /&gt;&lt;br /&gt;In what follows, I'll have a series of posts from the event itself, with a personal overview of the evolution of the area, highlights from the talks, and a wrap up. Stay tuned...&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-378361863403906326?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/R4Zt-rrx8FnCNpwWdeqCSc-m4Rk/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/R4Zt-rrx8FnCNpwWdeqCSc-m4Rk/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/R4Zt-rrx8FnCNpwWdeqCSc-m4Rk/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/R4Zt-rrx8FnCNpwWdeqCSc-m4Rk/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=JK0SVSVFXuw:pSuUapoBz-A:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=JK0SVSVFXuw:pSuUapoBz-A:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/JK0SVSVFXuw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/378361863403906326/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=378361863403906326" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/378361863403906326?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/378361863403906326?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/JK0SVSVFXuw/shonan-meeting-part-0-on-workshops.html" title="The Shonan Meeting (Part 0): On Workshops" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/shonan-meeting-part-0-on-workshops.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkcBR3k6fip7ImA9WhRWFkk.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-2643655609653719648</id><published>2012-01-03T20:34:00.001-07:00</published><updated>2012-01-03T20:34:16.716-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-03T20:34:16.716-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="teaching" /><category scheme="http://www.blogger.com/atom/ns#" term="programming" /><title>Teaching review I: Programming</title><content type="html">&lt;i&gt;Please note: I'm in Boston this week at the Joint Math Meetings (6100 participants and counting!) for a &lt;a href="http://geomblog.blogspot.com/2011/11/computational-geometry-at-joint-math.html"&gt;SIAM minisymposium on computational geometry&lt;/a&gt;. If you happen to be in the area for the conference, stop by our session on Thursday at 8:30am. &lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
I always assign programming questions in my graduate algorithms class. &lt;a href="http://geomblog.blogspot.com/2008/12/programming-assignments.html"&gt;In the beginning&lt;/a&gt;, I used the ACM programming competition server, and also tried the Sphere Online system. This semester, I didn't do either, designing my own homeworks.&lt;br /&gt;
&lt;br /&gt;
Why do I ask students to code ? There are &lt;a href="http://cstheory.stackexchange.com/questions/8851/how-important-is-knowing-how-to-program-for-tcs/8908#8908"&gt;many reasons&lt;/a&gt; it's a good idea to get students to program in a graduate algorithms class. There are some additional reasons in my setting: a majority of the students in my class take it as a required course for the MS or Ph.D program. They don't plan on doing research in algorithms, many of them are looking for jobs at tech companies, and many others will at best be using algorithmic thinking in their own research.&lt;br /&gt;
&lt;br /&gt;
Given that, the kinds of programming assignments I tried to give this semester focused on the boundary between theory and practice (or where O() notation ends). In principle, the goal of each assignment on a specific topic was to convert the theory (dynamic programming, prune and search, hashing etc) into practice by experimenting with different design choices and seeing how they affected overall run time.&lt;br /&gt;
&lt;br /&gt;
Designing assignments that couldn't be solved merely by downloading some code was particularly tricky. As Panos Ipeirotis recommended in his study of cheating in the classroom (original post deleted, &lt;a href="http://www.behind-the-enemy-lines.com/2011/07/tale-about-parking.html"&gt;here's the post-post&lt;/a&gt;), the key is to design questions whose answer requires some exploration after you've implemented the algorithm. I was reasonably happy with my hashing assignment, where students were asked to experiment with different hashing strategies (open addressing, chaining, cuckoo hashing), measure the hash table behavior for different parameter choices, and draw their own conclusions. There was a fair degree of consistency in the reported results, and I got the feeling that the students (and I!) learned something lasting about the choices one makes in hashing (for example, simple cuckoo hashing degrades rapidly after a 70% load ratio).&lt;br /&gt;
&lt;br /&gt;
I did have a few disasters, and learned some important lessons.&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;If I expect to run student code on my test cases, I HAVE to provide a test harness into which they'd merely insert a subroutine. Otherwise, merely specifying input and output format is not enough. Students are not as familiar with a simple UNIX command line interface as I expected, and use pre-canned libraries in ways that make uniform testing almost impossible. Note that this is not language-independent, unless I'm willing to provide harnesses for the myriad different languages people use. &lt;/li&gt;
&lt;li&gt;If you do want to accept code without using the 'subroutine-inside-harness' form, then you need some kind of testing environment where they can submit the code and get answers. This is essentially what ACM/TopCoder/SphereOnline do.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;If you're accepting code from students, use &lt;a href="http://theory.stanford.edu/%7Eaiken/moss/"&gt;Moss&lt;/a&gt; or something like it. It's great for detecting suspiciously common code. When I used it and detected duplicates, in all cases the students admitted to working together. &lt;/li&gt;
&lt;li&gt;It's easier to design assignments where the submitted product is not code, but some analysis of the performance (like in the hashing assignment above). Firstly, this is platform independent, so I don't care if people use Java, C, C++, C#, python, perl, ruby, scheme, racket, haskell, .....Secondly, it avoids the issue of "did you write the code yourself" - not entirely, but partly.&lt;/li&gt;
&lt;/ul&gt;
But I'll definitely do this again. Firstly, it's good for the students - from what I hear, having written actual code for dynamic programming makes it much easier for them to write code under pressure during their tech interviews, and I've heard that knowledge of DPs and network flows is a key separator during tech hiring. Secondly, even for theory students, it helps illustrate where asymptotic analysis is effective and where it breaks down. I think this is invaluable to help theoreticians think beyond galactic algorithms.&lt;br /&gt;
&lt;br /&gt;
Of course, no discussion of programming in algorithms classes is complete with a reference to &lt;a href="http://mybiasedcoin.blogspot.com/2007/06/what-were-doing-wrong-programming-in.html"&gt;Michael Mitzenmacher's exhortation&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-2643655609653719648?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/mVJt0yxg14WRFNIAffvFHeUj11M/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mVJt0yxg14WRFNIAffvFHeUj11M/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/mVJt0yxg14WRFNIAffvFHeUj11M/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mVJt0yxg14WRFNIAffvFHeUj11M/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=peKWur8i9pQ:e-AZOKd70ZY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=peKWur8i9pQ:e-AZOKd70ZY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/peKWur8i9pQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/2643655609653719648/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=2643655609653719648" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2643655609653719648?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2643655609653719648?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/peKWur8i9pQ/teaching-review-i-programming.html" title="Teaching review I: Programming" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>4</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2012/01/teaching-review-i-programming.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkAGQXk9fip7ImA9WhRWEE0.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1677215100084111397</id><published>2011-12-26T00:37:00.001-07:00</published><updated>2011-12-27T09:52:00.766-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-27T09:52:00.766-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="community" /><category scheme="http://www.blogger.com/atom/ns#" term="reviewing" /><title>On PC members submitting papers</title><content type="html">&lt;b&gt;Update&lt;/b&gt;:&lt;i&gt; Michael Mitzenmacher's posts (&lt;a href="http://mybiasedcoin.blogspot.com/2009/02/stoc-pc-meeting-part-iii-conflicts-of.html"&gt;one&lt;/a&gt;, &lt;a href="http://mybiasedcoin.blogspot.com/2009/02/stoc-pc-meeting-part-iv-conflicts-of.html"&gt;two&lt;/a&gt;, and &lt;a href="http://mybiasedcoin.blogspot.com/2010/02/conflicts-of-interest-yet-again.html"&gt;three&lt;/a&gt;,
 and the resulting comments) on implementing CoI at STOC are well worth 
reading (thanks, Michael). The comments there make me despair that *any*
 change will ever be implemented before the next century, but given that
 we've been able to make some changes already (electronic proceedings, 
contributed workshops, and so on), I remain hopeful.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
For all but theory researchers, the reaction to the above statement is usually "don't they always?". In theoryCS, we pride ourselves on not having PC members submit papers to conferences. What ends up happening is:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;You can't have too many PC members on a committee because otherwise there won't be enough submissions&lt;/li&gt;
&lt;li&gt;The load on each PC member is much larger than reasonable (I'm managing 41 papers for STOC right now, and it's not uncommon to hit 60+ for SODA)&lt;/li&gt;
&lt;/ul&gt;
There's an ancillary effect that because of the first point, theory folks have fewer 'PC memberships' on their CV&amp;nbsp; which can cause problems for academic performance review, but this is a classic &lt;a href="http://en.wikipedia.org/wiki/Goodhart%27s_law"&gt;Goodhart's Law&lt;/a&gt; issue, so I won't worry about it.&lt;br /&gt;
&lt;br /&gt;
The main principle at play here is: we don't want potentially messy conflicts or complex conflict management issues if we do have PC members submitting papers. However, it seems to me that the &lt;b&gt;practice &lt;/b&gt;of how we review papers is far different from this principle.&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
Consider: I get an assignment of X papers to review if I'm on a conference PC. I then scramble around finding subreviewers for a good fraction of the papers I'm assigned (I used to do this less, but I eventually realized that a qualified subreviewer is FAR better than me in most subareas outside my own expertise, and is better for the paper).&lt;br /&gt;
&lt;br /&gt;
Note (and this is important) &lt;i&gt;my subreviewers have typically submitted papers to this conference&lt;/i&gt; (although I don't check) and I rely on them to declare any conflicts as per conference guidelines.&lt;br /&gt;
&lt;br /&gt;
Subreviewers also get requests from different PC members, and some subreviewers might themselves review 3-4 papers. &lt;br /&gt;
&lt;br /&gt;
Compare this to (say) a data mining conference: there are 30+ "area chairs" or "vice chairs", and over 200 PC members. PC members each review between 5-10 papers, and often don't even know who the other reviewers are (although they can see their reviews once they're done). The area/vice chairs manage 20-30 papers each, and their job is to study the reviews, encourage discussion as needed, and formulate the final consensus decision and 'meta-review'.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
If you set "theory subreviewer = PC member" and "theory PC member = vice chair", you get systems that aren't significantly different. The main differences are:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;theory subreviewers don't typically get to see other reviews of the paper. So their score assignment is in a vacuum.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;theory PC members are expected to produce a review for a paper taking the subreviewer comments into account (as opposed to merely scrutinizing the reviews being provided)&lt;/li&gt;
&lt;li&gt;managing reviewer comments for 30 papers is quite different to generating 30 reviews yourself (even with subreviewer help)&lt;/li&gt;
&lt;li&gt;A downside of the two-tier PC system is also that there isn't the same global view of the entire pool that a theory PC gets. But this is more a convention than a rule: there's nothing stopping a PC for opening up discussions to all vice chairs.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;One advantage of area chairs is that at least all papers in a given area get one common (re)viewer. that's not necessarily the case in a theory PC without explicit coordination from the PC chair and the committee itself. &lt;/li&gt;
&lt;/ul&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;b&gt;But the main claimed difference (that people submitting papers don't get to review them) is false. &lt;/b&gt;Even worse, when submitters do review papers, this is 'under the table' and so there isn't the same strict conflict management that happens with explicit PC membership.&amp;nbsp;&lt;/blockquote&gt;
&lt;br /&gt;
We're dealing with problems of scale in all aspects of the paper review and evaluation process. This particular one though could be fixed quite easily.&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-1677215100084111397?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/m5nsBv0BJWERZtsCTyjX2SqQdkQ/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/m5nsBv0BJWERZtsCTyjX2SqQdkQ/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/m5nsBv0BJWERZtsCTyjX2SqQdkQ/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/m5nsBv0BJWERZtsCTyjX2SqQdkQ/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=zculi1nnQfY:LO2_a8OlJ2M:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=zculi1nnQfY:LO2_a8OlJ2M:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/zculi1nnQfY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1677215100084111397/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1677215100084111397" title="32 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1677215100084111397?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1677215100084111397?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/zculi1nnQfY/on-pc-members-submitting-papers.html" title="On PC members submitting papers" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>32</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/on-pc-members-submitting-papers.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEcERX48fyp7ImA9WhRXFkg.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-2876164634780340570</id><published>2011-12-23T09:00:00.000-07:00</published><updated>2011-12-23T09:00:04.077-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-23T09:00:04.077-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><category scheme="http://www.blogger.com/atom/ns#" term="social-networking" /><category scheme="http://www.blogger.com/atom/ns#" term="icdm" /><title>Thoughts on ICDM II: Social networks</title><content type="html">The other trend that caught my eye at ICDM is the dominance of social networking research. There was a trend line at the business meeting that bore this out, showing how topics loosely classified as social networking had a sharp rise among accepted papers in ICDM over the past few years.&lt;br /&gt;
&lt;br /&gt;
There were at least three distinct threads of research that I encountered at the conference, and in each of them, there's something to interest theoreticians.&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;The first strand is &lt;b&gt;modelling&lt;/b&gt;: is there a way to describe social network graphs using abstract evolution models or random graph processes. I spent some time discussing this in a &lt;a href="http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-i-negative-results_20.html"&gt;previous post&lt;/a&gt;, so I won't say more about it here. Suffice it to say that there's interesting work in random graph theory underpinning this strand, as well as a lot of what I'll call 'social network archaeology': scouring existing networks for interesting structures and patterns that could be the basis for a future model.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;The second strand is &lt;b&gt;pattern discovery&lt;/b&gt;, and the key term here is 'community': is there a way to express natural communities in social networks in a graph-theoretic manner ? While modularity is one of the most popular ways of defining community, it's not the only one, and has deficiencies of its own. In particular, it's not clear how to handle "soft" or "overlapping" communities. More generally, there appears to be no easy way to capture the dynamic (or time-varying) nature of communities, something &lt;a href="http://compbio.cs.uic.edu/%7Etanya/"&gt;Tanya Berger-Wolf&lt;/a&gt; has spent a lot of energy thinking about. Again, while modelling is probably the biggest problem here, I think there's a lot of room for good theory, especially when trying to capture dynamic communities.&lt;/li&gt;
&lt;li&gt;The final strand is &lt;b&gt;influence flow&lt;/b&gt;. After all, the goal of all social networking research is to monetize it (I kid, I kid). A central question here is: can you identify the key players who can make something go viral for cheap ? is the network topology a rich enough object to identify these players, and even if you do, how can you maximize flow (on a budget, efficiently).&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
There were many papers on all of these topics -- too many to summarize here. But the landscape is more or less as I laid it out. Social networking research is definitely in its bubble phase, which means it's possible to get lots of papers published without necessarily going deep into the problem space. This can be viewed as an invitation to jump in, or a warning to stay out, depending on your inclination. And of course, the definitive tome on this topic is the &lt;a href="http://www.cs.cornell.edu/home/kleinber/networks-book/"&gt;Kleinberg-Easley book&lt;/a&gt;.&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
This concludes my ICDM wrap-up. Amazingly, it only took me a week after the conference concluded to write these up.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-2876164634780340570?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/GTzPmYFG3Epvp3OUSrBnHD7TATY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/GTzPmYFG3Epvp3OUSrBnHD7TATY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/GTzPmYFG3Epvp3OUSrBnHD7TATY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/GTzPmYFG3Epvp3OUSrBnHD7TATY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=s3lTKZA5Pas:iKVYmLKyXBY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=s3lTKZA5Pas:iKVYmLKyXBY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/s3lTKZA5Pas" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/2876164634780340570/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=2876164634780340570" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2876164634780340570?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2876164634780340570?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/s3lTKZA5Pas/thoughts-on-icdm-ii-social-networks.html" title="Thoughts on ICDM II: Social networks" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-ii-social-networks.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MMR3k4cCp7ImA9WhRXFUQ.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-331791398641567729</id><published>2011-12-22T17:18:00.000-07:00</published><updated>2011-12-22T17:18:06.738-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-22T17:18:06.738-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="community" /><category scheme="http://www.blogger.com/atom/ns#" term="socg" /><title>CGWeek !!!</title><content type="html">&lt;div class="tr_bq"&gt;
&lt;i&gt;If you're on the compgeom mailing list, you probably know this already, but if not, read on&lt;/i&gt;: &lt;/div&gt;
&lt;br /&gt;
There's an increased interest in expanding the nature and scope of events at SoCG beyond the conference proper. To this end, Joe Mitchell put together a committee titled "CG:APT" (CG: applications, practice and theory) to chalk out a strategy and solicit contributions.&lt;br /&gt;
&lt;br /&gt;
The call for contributions is now out, and the main nugget is this:&lt;br /&gt;
&lt;blockquote&gt;
Proposals are invited for workshops/minisymposia, or other types of
events on topics related to all aspects of computational geometry and
its applications. Typical events may feature some number of invited
speakers and possibly some number of contributed presentations.
Events may feature other forms of communications/presentations too,
e.g., via software demos, panel discussions, industry forum,
tutorials, posters, videos, implementation challenge, artwork, etc.
CG:APT events will have no formal proceedings; optionally, the
organizers may coordinate with journals to publish special issues or
arrange for other dissemination (e.g., via arXiv, webpages, printed
booklets, etc).&lt;/blockquote&gt;
In other words, anything goes ! (this is an experiment, after all). Topics are essentially anything that might be of interest geometrically in a broad sense (i.e not limited to what might appear at the conference itself).&lt;br /&gt;
 &lt;br /&gt;
I'd strongly encourage people to consider putting together a proposal for an event. The procedure is really simple, and only needs a two page proposal containing:&lt;br /&gt;
&lt;br /&gt;
&lt;ol&gt;
&lt;li&gt;

 Title/theme of the workshop/minisymposium/event&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Organizer(s) (name, email)&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Brief scientific summary and discussion of merits (to CG) of the 
    proposed topic.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;A description of the proposed format and agenda&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Proposed duration: include both minimum and ideal; we anticipate 
    durations of approximately a half day (afternoon), with the 
    possibility that some meritorious events could extend across
    two half-days.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Procedures for selecting participants and presenters&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Intended audience&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Potential invited speakers/panelists&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Plans for dissemination (e.g., journal special issues)&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Past experience of the organizer(s) relevant to the event&amp;nbsp;&lt;/li&gt;
&lt;/ol&gt;
Please note: &lt;a href="http://geomblog.blogspot.com/2010/06/on-acceptance-rates-and-flagship.html"&gt;EVERY COMMUNITY DOES THIS&lt;/a&gt; (and now, even theory). The deadline is Jan 13, 2012, and proposals should be emailed to Joe Mitchell (joseph.mitchell@stonybrook.edu). Note that the idea is for such events to be in the afternoon, after morning technical sessions of SoCG. &lt;br /&gt;
&lt;br /&gt;
There are many people out there who grumble about how insular the CG community is. Now's your chance to walk into the lion's den (&lt;a href="http://socg2012.web.unc.edu/"&gt;at Chapel Hill&lt;/a&gt;) and tell it off :). &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-331791398641567729?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/xDr44ubUlN34sbNEVfHIuxe3r5M/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/xDr44ubUlN34sbNEVfHIuxe3r5M/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/xDr44ubUlN34sbNEVfHIuxe3r5M/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/xDr44ubUlN34sbNEVfHIuxe3r5M/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=HxtMvjL0bJY:RH0j6gLTMl0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=HxtMvjL0bJY:RH0j6gLTMl0:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/HxtMvjL0bJY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/331791398641567729/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=331791398641567729" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/331791398641567729?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/331791398641567729?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/HxtMvjL0bJY/cgweek.html" title="CGWeek !!!" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/cgweek.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUFQ307fip7ImA9WhRXFUk.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6895330616188493298</id><published>2011-12-22T00:00:00.000-07:00</published><updated>2011-12-22T00:50:12.306-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-22T00:50:12.306-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><category scheme="http://www.blogger.com/atom/ns#" term="icdm" /><category scheme="http://www.blogger.com/atom/ns#" term="classification" /><title>Thoughts on ICDM I: Negative results (part C)</title><content type="html">&lt;i&gt;This is the third of three posts (&lt;a href="http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-i-negative-results.html"&gt;one&lt;/a&gt;, &lt;a href="http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-i-negative-results_20.html"&gt;two&lt;/a&gt;) on negative results in data mining, inspired by thoughts and papers from ICDM 2011.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
If you come up with a better way of doing classification (for now let's just consider classification, but these remarks apply to clustering and other tasks as well), you have to compare it to prior methods to see which works better. (note: &lt;i&gt;this is a tricky problem in clustering that my student Parasaran Raman has been working on: more on that later&lt;/i&gt;.).&lt;br /&gt;
&lt;br /&gt;
The obvious way to compare two classification methods is how well they do compared to some ground truth (i.e labelled data), but this is a one-parameter system, because by changing the threshold of the classifier (or if you like, translating the hyperplane around),you can change the false positive and false negative rates.&lt;br /&gt;
&lt;br /&gt;
Now the more smug folks reading these are waiting with 'ROC' and "AUC" at the tip of their tongues, and they'd be right ! You can plot a curve of the false positive vs false negative rate and take the area under the curve (AUC) as a measure of the effectiveness of the classifier.&lt;br /&gt;
&lt;br /&gt;
For example, if the y axis measured increase false negatives, and the x-axis measured increasing false positives, you'd want a curve that looked like an L with the apex at the origin, and a random classifier would look like the line x+y = 1. The AUC score would be zero for the good classifier and 0.5 for the bad one (there are ways of scaling this to be between 0 and 1).&lt;br /&gt;
&lt;br /&gt;
The AUC is a popular way of comparing methods in order to balance the different error rates. It's also attractive because it's parameter-free and is objective: seemingly providing a neutral method for comparing classifiers independent of data sets, cost measures and so on.&lt;br /&gt;
&lt;br /&gt;
But is it ?&lt;br /&gt;
&lt;br /&gt;
There's a line of work culminating (so far) in the ICDM 2011 paper 'An Analysis of Performance Measures For Binary Classifiers' by &lt;a href="http://www.linkedin.com/pub/charles-parker/11/85b/4b5"&gt;Charles Parker&lt;/a&gt; of BigML.com (sorry, no link yet). The story is quite complicated, and I doubt I can do it justice in a blog post, so I'll try to summarize the highlights, with the caveat that there's nuance that I'm missing out on, and you'll have to read the papers to dig deeper.&lt;br /&gt;
&lt;br /&gt;
The story starts with "&lt;a href="http://www.springerlink.com/content/y35743hp7010g354/"&gt;Measuring classifier performance: a coherent alternative to the area under the ROC curve&lt;/a&gt;" ($$ link) by David Hand (&lt;a href="http://www.cs.iastate.edu/%7Ecs573x/Notes/hand-article.pdf"&gt;copy of paper here&lt;/a&gt;). His central result is a very surprising one:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
The AUC measure implicitly uses different misclassification costs when evaluating different classifiers, and thus is incoherent as an "objective" way of comparing classifiers. &lt;/blockquote&gt;
To unpack this result a little, what's going on is this. Suppose you have a scenario where correct classification costs you nothing, and misclassification costs you a certain amount (that could be different for the two different kinds of misclassification). You can now write down an overall misclassification cost for any threshold used for a classifier, and further you can compute the optimal threshold (that minimizes this cost). If you don't actually know the costs (as is typical) you can then ask for the expected misclassification cost assuming some distribution over the costs.&lt;br /&gt;
&lt;br /&gt;
If you run this computation through, what you end up with is a linear transformation of the AUC, where the distribution over the costs &lt;i&gt;depends on the distribution of scores assigned by the classifier&lt;/i&gt; ! In other words, as Hand puts it,&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
It is as if one measured person A’s height using a ruler calibrated in inches and person B’s using one calibrated in centimetres, and decided who was the taller by merely comparing the numbers, ignoring the fact that different units of measurement had been used&lt;/blockquote&gt;
This is a rather devastating critique of the use of the AUC. While there's been pushback (case in point is an &lt;a href="http://users.dsic.upv.es/%7Eflip/papers/TRCoherentAUC.pdf"&gt;ICML 2011 paper by Flach, Hernandez-Orallo and Ferri&lt;/a&gt; which is a very interesting read in its own right), the basic premise and argument is not contested (what's contested is the importance of finding the optimal threshold). Hand recommends a few alternatives, and in fact suggests that the distribution of costs should instead be made explicit, rather than being implicit (and subject to dependence on the data and classifiers)&lt;br /&gt;
&lt;br /&gt;
What Parker does in his ICML paper is take this further. In the first part of his paper, he extends the Hand analysis to other measures akin to the AUC, showing that such measures are incoherent as well. In the second part of his paper, he unleashes an experimental tour de force of classifier comparisons under different quality measures, showing that&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;nearly 50% of the time, measures disagree on which classifier in a pair is more effective. He breaks down the numbers in many different ways to show that if you come up with a new classification algorithm tomorrow, you'd probably be able to cherry pick a measure that showed you in a good light.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;It's the measures perceived to be more "objective" or parameter-less that had the most trouble reconciling comparisons between classifiers.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;It's also not the case that certain classifiers are more likely to cause disagreements: the problems are spread out fairly evenly.&lt;/li&gt;
&lt;li&gt;His experiments also reinforce Hand's point that it's actually better to define measures that explicitly use domain knowledge, rather than trying to achieve some objective measure of quality. Measures that were either point-based (not integrating over the entire range) or domain specific tended to work better.&amp;nbsp;&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
I'm not even close to describing the level of detail in his experiments: it's a really well-executed empirical study that should be a case study for anyone doing experimental work in the field. It's especially impressive because from personal experience I've found it to be REALLY HARD to do quality methodological studies in this area (as opposed to the "define algorithm-find-toy-data-profit" model that most DM papers seem to follow").&lt;br /&gt;
 &lt;br /&gt;
 At a deeper level, the pursuit of objective comparisons that can be reduced to a single number seems fundamentally misguided to me. First of all, we know that precise cost functions are often the wrong way to go when designing algorithms (because of modelling issues and uncertainty about the domain). Secondly, we know that individual methods have their own idiosyncracies - hence the need for 'meta' methods. And finally, we're seeing that even the meta-comparison measures have severe problems ! In some ways, we're pursuing 'the foolish hobgoblin of precision and objectivity' in an area where context is more important than we as mathematicians/engineers are used to. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-6895330616188493298?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/nuSqLEr_sP5KZMU41XDVXrtCI98/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/nuSqLEr_sP5KZMU41XDVXrtCI98/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/nuSqLEr_sP5KZMU41XDVXrtCI98/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/nuSqLEr_sP5KZMU41XDVXrtCI98/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=tUU4mSXR6HM:h1wPv7FaKEk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=tUU4mSXR6HM:h1wPv7FaKEk:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/tUU4mSXR6HM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6895330616188493298/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6895330616188493298" title="6 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6895330616188493298?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6895330616188493298?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/tUU4mSXR6HM/thoughts-on-icdm-i-negative-results_22.html" title="Thoughts on ICDM I: Negative results (part C)" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>6</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-i-negative-results_22.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEYGRno4fyp7ImA9WhRXFUQ.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-24764356043505088</id><published>2011-12-20T09:00:00.000-07:00</published><updated>2011-12-22T15:15:27.437-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-22T15:15:27.437-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="graphs" /><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><category scheme="http://www.blogger.com/atom/ns#" term="icdm" /><title>Thoughts on ICDM I: Negative Results (part B)</title><content type="html">&lt;a href="http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-i-negative-results.html"&gt;Continuing where I left off on the idea of negative results in data mining&lt;/a&gt;, there was a beautiful paper at ICDM 2011 on the use of Stochastic Kronecker graphs to model social networks. And in this case, the key result of the paper came from theory, so stay tuned !&lt;br /&gt;
&lt;br /&gt;
One of the problems that bedevils research in social networking is the lack of good graph models. Ideally, one would like a random graph model that evolves into structures that look like social networks. Having such a graph model is nice because&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;you can target your algorithms to graphs that look like this, hopefully making them more efficient&lt;/li&gt;
&lt;li&gt;You can re-express an actual social network as a set of parameters to a graph model: it compacts the graph, and also gives you a better way of understanding different kinds of social networks: Twitter is a (0.8, 1, 2.5) and Facebook is a (1, 0.1, 0.5), and so on.&lt;/li&gt;
&lt;li&gt;If you're lucky, the model describes not just reality, but how it forms. In other words, the model captures the actual social processes that lead to the formation of a social network. This last one is of great interest to sociologists.&lt;/li&gt;
&lt;/ul&gt;
But there aren't even good graph models that capture known properties of social networks. For example, the classic &lt;a href="http://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93R%C3%A9nyi_model"&gt;Erdos-Renyi&lt;/a&gt; (ER) model of a random graph doesn't have the heavy-tailed degree distribution that's common in social networks. It also doesn't have a property that's common to large social networks: densification, or the fact that even as the network grows, the diameter stays small (implying that the network seems to get denser over time).&lt;br /&gt;
&lt;br /&gt;
One approach to fixing this models a social network as a Stochastic Kronecker graph. You can read more about these graphs here: a simple way of imagining them is that you add an edge in the graph by a random process that does a (kind of) quad tree like descent down a partitioning of the adjacency matrix and places a 1 at a leaf. SKGs were proposed by &lt;a href="http://jmlr.csail.mit.edu/papers/volume11/leskovec10a/leskovec10a.pdf"&gt;Leskovec, Chakrabarti, Kleinberg and Faloutsos&lt;/a&gt;, and include ER graphs as a special case. They appear to capture heavy tailed degree distributions as well as densification, and have become a popular model used when testing algorithms on social networks. They're also used as the method to generate benchmark graphs for the HPC benchmark &lt;a href="http://www.graph500.org/"&gt;Graph500&lt;/a&gt;. &lt;br /&gt;
&lt;br /&gt;
But a thorough understanding of the formal properties of SKGs has been lacking. In "&lt;a href="http://arxiv.org/abs/1102.5046"&gt;An In-Depth Analysis of Stochastic Kronecker Graphs&lt;/a&gt;", Seshadhri, Pinar and Kolda show some rather stunning results. Firstly, they provide a complete analysis of the degree distribution of an SKG, and prove a beautiful result showing that it oscillates between having a lognormal and exponential tail. Their theorems are quite tight: plots of the actual degree distribution match their theorems almost perfectly, and convincingly display the weird oscillations in the degree frequencies (see Figure 2 in the paper).&lt;br /&gt;
&lt;br /&gt;
Secondly, they also formally explain why a noisy variant of SKGs appears to have much more well-behaved degree distribution, proving that a slightly different generative process will indeed generate the desired distribution observed in practice.&lt;br /&gt;
&lt;br /&gt;
Finally, they also show that the graphs generated by an SKG have many more isolated nodes than one might expect, sometimes upto 75% of the total number of vertices ! This has direct implications for the use of SKGs as benchmarks. Indeed, they mention that the Graph500 committee is considering changing their benchmarks based on this paper - now that's impact :)&lt;br /&gt;
&lt;br /&gt;
What I like about this paper is that it proves definitive theoretical results about a popular graph model, and very clearly points out that it has significant problems. So any methodology that involves using SKGs for analysis will now have to be much more careful about the claims it makes.&lt;br /&gt;
&lt;br /&gt;
p.s There's also more supporting evidence on the lack of value of SKGs from another metric (the clustering coefficient, that measures how many configurations uv, uw also have the third edge vw). Real social networks have a high CC, and SKGs don't.This was first mentioned by &lt;a href="http://www.cs.ucsb.edu/%7Eravenben/publications/pdf/socialmodel-www10.pdf"&gt;Sala, Cao, Wilson, Zablit, Zheng and Zhao&lt;/a&gt;, and Seshadhri/Pinar/Kolda have &lt;a href="http://arxiv.org/abs/1110.4925"&gt;more empirical evidence&lt;/a&gt; for it as well. (Disclaimer: I was pointed to these two references by Seshadhri: my opinions are my own though :))&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-24764356043505088?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/3lT_BobE3CN2QTU_P2jagEjzpUs/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/3lT_BobE3CN2QTU_P2jagEjzpUs/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/3lT_BobE3CN2QTU_P2jagEjzpUs/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/3lT_BobE3CN2QTU_P2jagEjzpUs/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=XK3bpO--fwA:C3Y9hiX9ufE:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=XK3bpO--fwA:C3Y9hiX9ufE:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/XK3bpO--fwA" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/24764356043505088/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=24764356043505088" title="7 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/24764356043505088?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/24764356043505088?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/XK3bpO--fwA/thoughts-on-icdm-i-negative-results_20.html" title="Thoughts on ICDM I: Negative Results (part B)" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>7</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-i-negative-results_20.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkIASXc6cCp7ImA9WhRXEks.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-27777306597220525</id><published>2011-12-18T20:15:00.001-07:00</published><updated>2011-12-18T20:15:48.918-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-18T20:15:48.918-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><category scheme="http://www.blogger.com/atom/ns#" term="icdm" /><title>Thoughts on ICDM I: Negative Results (part A)</title><content type="html">I just got back from &lt;a href="http://icdm2011.cs.ualberta.ca/"&gt;ICDM (the IEEE conference on Data Mining)&lt;/a&gt;. Data mining conferences are quite different from theory conferences (and much more similar to ML or DB conferences): there are numerous satellite events (workshops, tutorials and panels in this case), many more people (551 for ICDM, and that's on the smaller side), and a wide variety of papers that range from SODA-ish results to user studies and industrial case studies.&lt;br /&gt;
&lt;br /&gt;
While your typical data mining paper is still a string of techniques cobbled together without rhyme or reason (anyone for spectral manifold-based correlation clustering with outliers using MapReduce?), there are some general themes that might be of interest to an outside viewer. What I'd like to highlight here is a trend (that I hope grows) in negative results.&lt;br /&gt;
&lt;br /&gt;
It's not particularly hard to invent a new method for doing data mining. It's much harder to show why certain methods will fail, or why certain models don't make sense. But in my view, the latter is exactly what the field needs in order to give it a strong inferential foundation to build on (I'll note here that I'm talking specifically about data mining, NOT machine learning - the difference between the two is left for another post).&lt;br /&gt;
&lt;br /&gt;
In the interest of brevity, I'll break this trend down in three posts. The first result I want to highlight isn't even quite a result, and isn't even from ICDM ! It's actually from KDD 2011 (back in August this year). The paper is &lt;a href="http://dl.acm.org/citation.cfm?id=2020496"&gt;Leakage in Data Mining: Formulation, Detection, and Avoidance&lt;/a&gt;, by Shachar Kaufman, Saharon Rosset, and Claudia Perlich, and got the Best Paper Award at KDD this year. &lt;br /&gt;
&lt;br /&gt;
The problem they examine is "leakage", or the phenomenon that even in a 'train model and test model" framework, it is possible for valuable information to "leak" from the test data or even from other sources to the training system, making a learned model look surprisingly effective and even give it predictive powers beyond what it really can do. Obviously, the problem is that when such models are then applied to new data, their performance is worse than expected, compared to a model that wasn't "cheating" in this way.&lt;br /&gt;
&lt;br /&gt;
They cite a number of examples, including many that come from the data challenges that have become all the rage. The examples highlight different kinds of leakage, including "seeing the future", cross contamination of data from other data sets, and even leakage by omission, where a well-intentioned process of anonymization actually leaks data about the trend being analyzed.&lt;br /&gt;
&lt;br /&gt;
While there are no results of any kind (experimental or theoretical), the authors lay out a good taxonomy of common ways in which leakage can happen, and describe ways of avoiding leakage (when you have control over the data) and detecting it (when you don't). What makes their paper really strong is that they illustrate this with specific examples from recent data challenges, explaining how the leakage occurs, and how the winners took advantage of this leakage explicitly or implicitly.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
There are no quick and dirty fixes for these problems: ultimately, leakage can even happen through bad modelling, and sometimes modellers fail to remove leaky data because they're trying to encourage competitors to build good predictive models. Ironically, it is this encouragement that can lead to less predictive models on truly novel data. But the paper makes a strong argument that the way we collect data and use it to analyze our algorithms is fundamentally flawed, and this is especially true for the more sophisticated (and mysterious) algorithms that might be learning models through complex exploitation of the trained data.&lt;br /&gt;
&lt;br /&gt;
It remains to be seen whether the recommendations of this paper will be picked up, or if there will be more followup work along these lines. I hope it does.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-27777306597220525?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/1zuVb6N9t_OfTEv_q7nFMj8WevA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1zuVb6N9t_OfTEv_q7nFMj8WevA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/1zuVb6N9t_OfTEv_q7nFMj8WevA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1zuVb6N9t_OfTEv_q7nFMj8WevA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_CvJrnWLkZc:sJaVj7-ph7U:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_CvJrnWLkZc:sJaVj7-ph7U:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/_CvJrnWLkZc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/27777306597220525/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=27777306597220525" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/27777306597220525?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/27777306597220525?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/_CvJrnWLkZc/thoughts-on-icdm-i-negative-results.html" title="Thoughts on ICDM I: Negative Results (part A)" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/thoughts-on-icdm-i-negative-results.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkcNQHk-eip7ImA9WhRXEkk.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1759580395714890465</id><published>2011-12-18T14:34:00.000-07:00</published><updated>2011-12-18T14:34:51.752-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-18T14:34:51.752-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="k-12" /><category scheme="http://www.blogger.com/atom/ns#" term="teaching" /><title>A rant about school science fair projects</title><content type="html">It's science fair time at my son's school. He's in the first grade, so admittedly there's not&amp;nbsp; a lot that he can do without a reasonable amount of *cough* parental*cough* help. But why do we not have a 'mathematics' fair or a 'programming fair ?&lt;br /&gt;
&lt;br /&gt;
The science fair project format is very confining. You have to propose a hypothesis, tabulate a bunch of results, do some analysis, and discuss conclusions, with nice charts/graphs and other such science cultism. Even if you're interested in something more 'mathematical', there's no real way of shoehorning it into the format. A year or so ago, a colleague of mine was asking me about origami-related projects (because his daughter loves paper folding) but apart from experimenting with knapsack-style algorithms to determine how to fold a ruler into a specified length, we couldn't figure out something that fit into the 'hypothesis-experiment' setting. &lt;br /&gt;
&lt;br /&gt;
Granted, it's a &lt;b&gt;science &lt;/b&gt;fair. But at this age level, I assume the whole point of participation in science fairs is about learning something about science, rather than conducting rigorous analysis. You could equally well learn about something mathematical and demonstrate that knowledge. But there's no forum to do that in.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-1759580395714890465?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/9Y0_qcg-eKfa86AiCDVW7A5rR90/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/9Y0_qcg-eKfa86AiCDVW7A5rR90/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/9Y0_qcg-eKfa86AiCDVW7A5rR90/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/9Y0_qcg-eKfa86AiCDVW7A5rR90/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=kApszlNXAq4:hA05UJ0cAAE:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=kApszlNXAq4:hA05UJ0cAAE:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/kApszlNXAq4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1759580395714890465/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1759580395714890465" title="8 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1759580395714890465?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1759580395714890465?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/kApszlNXAq4/rant-about-school-science-fair-projects.html" title="A rant about school science fair projects" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>8</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/rant-about-school-science-fair-projects.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkMCSX87eCp7ImA9WhRXEEg.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-7088782313023783379</id><published>2011-12-16T09:54:00.000-07:00</published><updated>2011-12-16T09:54:28.100-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-16T09:54:28.100-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="simons foundation" /><category scheme="http://www.blogger.com/atom/ns#" term="funding" /><category scheme="http://www.blogger.com/atom/ns#" term="fellowships" /><title>Simons Fellowship for theory grad students.</title><content type="html">The Simons Foundation has been a great boon for theoretical computer science, supporting postdocs galore, and even running a "sooper-seekrit" search for a new TCS institute.&lt;br /&gt;
&lt;br /&gt;
Their latest initiative is at the grad student level. &lt;a href="https://simonsfoundation.org/mps-simons-graduate-fellowships-rfa"&gt;They're offering a 2-year fellowship to grad students "with an outstanding track record of research accomplishments"&lt;/a&gt;. I think the idea is to support students who've established a good body of work, and could use this to coast towards their graduation and ramp up their research even more.&lt;br /&gt;
&lt;br /&gt;
The support is considerable:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Each award will provide annual support for the following: &lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Fellowship stipend as set by the student’s institution.&lt;/li&gt;
&lt;li&gt;Tuition and fees&amp;nbsp;at the student's institution, to the extent previously covered by other outside funding.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;$3,000 in additional support for the student to use at his/her discretion.&lt;/li&gt;
&lt;li&gt;$5,000 per year toward the student’s health insurance and other fringe benefits.&lt;/li&gt;
&lt;li&gt;$5,000 per year for travel, books, a computer and other supplies.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;$5,000 in institutional overhead allowance.&lt;/li&gt;
&lt;/ul&gt;
&lt;/blockquote&gt;
Fellowships will start September 2012 and end August 2014.&lt;br /&gt;
&lt;br /&gt;
How do you apply ?&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;strong&gt;&lt;/strong&gt;Applicants may apply through proposalCENTRAL (&lt;a href="http://proposalcentral.altum.com/default.asp?GMID=50"&gt;http://proposalcentral.altum.com/default.asp?GMID=50&lt;/a&gt;) beginning December 7, 2011. The deadline to apply is &lt;strong style="color: red;"&gt;May 1, 2012&lt;/strong&gt;.
 Please coordinate submission of the proposal with the appropriate 
officials in accordance with institution/university policies. Please see
 the Application Instructions for further information. &lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;
 &lt;strong&gt;Application Requirements:&lt;/strong&gt;&lt;br /&gt;
 &lt;ul&gt;
&lt;li&gt;Research Statement&lt;strong&gt; (two page limit):&lt;/strong&gt;
 A statement summarizing the applicant’s research contributions, 
research plans for the immediate future, and career goals. References do
 not need to be included within the page limit, but should not exceed an
 additional page.&lt;/li&gt;
&lt;li&gt;A curriculum vitae &lt;strong&gt;(two page limit)&lt;/strong&gt;, which includes institution, advisor, and a list of publications.&lt;/li&gt;
&lt;li&gt;A letter of support from the Department Chair.&lt;/li&gt;
&lt;li&gt;A letter of support from the student’s advisor.&lt;/li&gt;
&lt;li&gt;A
 letter from a reference outside the student’s institution. This letter 
must be submitted directly via proposalCENTRAL by the reference. Please 
see the Application Instructions for more information.&lt;/li&gt;
&lt;li&gt;Thesis topic.&lt;/li&gt;
&lt;/ul&gt;
&lt;/blockquote&gt;
&lt;br /&gt;
 (HT Sampath Kannan for pointing this out)&lt;br /&gt;
 &lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-7088782313023783379?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/uFOlmMH-5rqT5FTfoTcgCuFAEqs/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/uFOlmMH-5rqT5FTfoTcgCuFAEqs/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/uFOlmMH-5rqT5FTfoTcgCuFAEqs/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/uFOlmMH-5rqT5FTfoTcgCuFAEqs/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=MPKNNni1vN0:OivlTguJhIo:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=MPKNNni1vN0:OivlTguJhIo:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/MPKNNni1vN0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/7088782313023783379/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=7088782313023783379" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7088782313023783379?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7088782313023783379?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/MPKNNni1vN0/simons-fellowship-for-theory-grad.html" title="Simons Fellowship for theory grad students." /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/simons-fellowship-for-theory-grad.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU4NRHwycCp7ImA9WhRQE0s.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-8613806333590753728</id><published>2011-12-08T10:03:00.001-07:00</published><updated>2011-12-08T10:06:35.298-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-08T10:06:35.298-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="awards" /><category scheme="http://www.blogger.com/atom/ns#" term="acm" /><title>ACM Fellows, 2011</title><content type="html">Many theoreticians on &lt;a href="http://www.acm.org/press-room/news-releases/2011/fellows-2011/"&gt;this year's list of ACM Fellows&lt;/a&gt;:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Serge Abiteboul&lt;/li&gt;
&lt;li&gt;Guy Blelloch&lt;/li&gt;
&lt;li&gt;David Eppstein&lt;/li&gt;
&lt;li&gt;Howard Karloff&lt;/li&gt;
&lt;li&gt;Joe Mitchell&lt;/li&gt;
&lt;li&gt;Janos Pach&lt;/li&gt;
&lt;li&gt;Diane Souvaine&lt;/li&gt;
&lt;/ul&gt;
Congratulations to them, and to all the Fellows this year (especially my collaborator Divesh Srivastava)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-8613806333590753728?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/UZYpsXEiU5zSlKRAK6ALPKmjHeA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/UZYpsXEiU5zSlKRAK6ALPKmjHeA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/UZYpsXEiU5zSlKRAK6ALPKmjHeA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/UZYpsXEiU5zSlKRAK6ALPKmjHeA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=5C3uoTnlhcQ:voiOU96r6pA:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=5C3uoTnlhcQ:voiOU96r6pA:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/5C3uoTnlhcQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/8613806333590753728/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=8613806333590753728" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8613806333590753728?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8613806333590753728?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/5C3uoTnlhcQ/acm-fellows-2011.html" title="ACM Fellows, 2011" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/acm-fellows-2011.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ak8BR348fyp7ImA9WhRQE0g.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-515816469173254442</id><published>2011-12-08T08:37:00.001-07:00</published><updated>2011-12-08T08:40:56.077-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-08T08:40:56.077-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="community" /><category scheme="http://www.blogger.com/atom/ns#" term="socg" /><category scheme="http://www.blogger.com/atom/ns#" term="acm" /><title>SoCG and ACM: The Results Show</title><content type="html">From Mark de Berg:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
The bottom row of the table [below] gives the number of votes for each of the three options

&lt;br /&gt;
&lt;br /&gt;
&lt;div style="margin-left: 36.0pt;"&gt;
&lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;; font-size: 10.0pt;"&gt;&lt;span&gt;A.&lt;span style="font: 7.0pt &amp;quot;Times New Roman&amp;quot;;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;; font-size: 10.0pt;"&gt;I
prefer to stay with ACM.&lt;/span&gt;&lt;/div&gt;
&lt;div style="margin-left: 18.0pt;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="margin-left: 36.0pt;"&gt;
&lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;; font-size: 10.0pt;"&gt;&lt;span&gt;B.&lt;span style="font: 7.0pt &amp;quot;Times New Roman&amp;quot;;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;; font-size: 10.0pt;"&gt;If
involvement of ACM can be restricted to publishing the proceedings, at low cost
for SoCG, then I prefer to stay with ACM; otherwise I prefer to leave ACM.&lt;/span&gt;&lt;/div&gt;
&lt;div style="margin-left: 18.0pt;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="margin-left: 36.0pt;"&gt;
&lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;; font-size: 10.0pt;"&gt;&lt;span&gt;C.&lt;span style="font: 7.0pt &amp;quot;Times New Roman&amp;quot;;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;; font-size: 10.0pt;"&gt;&amp;nbsp;I
prefer to leave ACM, and organize SoCG as an independent conference with
proceedings published in LIPIcs.&lt;/span&gt;&lt;/div&gt;
&lt;div style="margin-left: 36pt;"&gt;
&lt;br /&gt;&lt;/div&gt;
and it also gives a breakdown of the votes by the number of SoCG’s that the voter attended in the last 10 years.  &lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;;"&gt;&amp;nbsp;&lt;/span&gt;

&lt;br /&gt;
&lt;table border="0" cellpadding="0" cellspacing="0" style="border-collapse: collapse;"&gt;
 &lt;tbody&gt;
&lt;tr style="min-height: 14.35pt;"&gt;
  &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.85pt;" valign="top" width="69"&gt;
  &lt;div align="right" class="MsoNormal" style="text-align: right; text-autospace: none;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="background: #99ccff; border-bottom: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div class="MsoNormal" style="text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;A:
  stay&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="background: #99ccff; border-bottom: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 106.0pt;" valign="top" width="141"&gt;
  &lt;div class="MsoNormal" style="text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;B:
  proceedings only&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="background: #99ccff; border-bottom: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div class="MsoNormal" style="text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;C:
  leave&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: solid windowtext 1.0pt; border-right: none; border-top: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;total&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr style="min-height: 14.35pt;"&gt;
  &lt;td style="background: #99ccff; border-right: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.85pt;" valign="top" width="69"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;A: 0&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;4&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 106.0pt;" valign="top" width="141"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;3&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;3&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-left: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;10&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr style="min-height: 14.35pt;"&gt;
  &lt;td style="background: #99ccff; border-right: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.85pt;" valign="top" width="69"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;B: 1-2&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;6&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 106.0pt;" valign="top" width="141"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;16&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;19&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-left: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;41&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr style="min-height: 14.35pt;"&gt;
  &lt;td style="background: #99ccff; border-right: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.85pt;" valign="top" width="69"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;C: 3-5&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;11&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 106.0pt;" valign="top" width="141"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;15&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;16&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-left: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;42&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr style="min-height: 14.35pt;"&gt;
  &lt;td style="background: #99ccff; border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.85pt;" valign="top" width="69"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;D: &amp;gt;5&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-bottom: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;8&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-bottom: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 106.0pt;" valign="top" width="141"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;14&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-bottom: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;9&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: solid windowtext 1.0pt; border-right: none; border-top: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;31&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;tr style="min-height: 14.35pt;"&gt;
  &lt;td style="border-right: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.85pt;" valign="top" width="69"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;total &lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;29&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 106.0pt;" valign="top" width="141"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;48&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center; text-autospace: none;"&gt;
&lt;span style="color: black;"&gt;47&lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
  &lt;td style="border-left: solid windowtext 1.0pt; border: none; min-height: 14.35pt; padding: 0cm 1.5pt 0cm 1.5pt; width: 51.0pt;" valign="top" width="68"&gt;
  &lt;div align="center" class="MsoNormal" style="text-align: center;"&gt;
&lt;span style="color: black;"&gt;124 &lt;/span&gt;&lt;/div&gt;
&lt;/td&gt;
 &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
&lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;;"&gt;&lt;/span&gt;

&lt;br /&gt;
&lt;br /&gt;&lt;br /&gt;Based on the result of the Poll, the Steering Committee decided to start negotiating with ACM to see if they can offer SoCG an arrangement in which involvement of ACM is limited primarily to publishing the proceedings, with the possible option for greater involvement if the local organizers request/require it.&lt;/blockquote&gt;
It will be interesting to see how this plays out. &lt;span style="font-family: &amp;quot;Arial&amp;quot;,&amp;quot;sans-serif&amp;quot;;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-515816469173254442?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/LY0vNUABKDYs8xqlZCKKIUUc-ac/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LY0vNUABKDYs8xqlZCKKIUUc-ac/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/LY0vNUABKDYs8xqlZCKKIUUc-ac/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LY0vNUABKDYs8xqlZCKKIUUc-ac/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=lMPFPotrBFY:pnQaFmqWdGc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=lMPFPotrBFY:pnQaFmqWdGc:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/lMPFPotrBFY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/515816469173254442/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=515816469173254442" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/515816469173254442?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/515816469173254442?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/lMPFPotrBFY/socg-and-acm-results-show.html" title="SoCG and ACM: The Results Show" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/12/socg-and-acm-results-show.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0ADQXszfip7ImA9WhRSGU4.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1445811966124427664</id><published>2011-11-21T20:55:00.001-07:00</published><updated>2011-11-21T22:29:30.586-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-21T22:29:30.586-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="community" /><category scheme="http://www.blogger.com/atom/ns#" term="socg" /><title>On getting rid of ACM affiliation...</title><content type="html">&lt;a href="http://geomblog.blogspot.com/2011/11/socg-and-acm-relationship-status.html"&gt;As I mentioned earlier&lt;/a&gt;, the SoCG steering committee is running a poll on whether to separate SoCG from ACM and run it as an independent symposium with proceedings published by the Dagstuhl LIPIcs series. &lt;br /&gt;
&lt;br /&gt;
At the time I had considered writing a post expressing my own thoughts on the matter, and then promptly forgot about it. The poll is still open (although probably many of you have voted already) and so I thought (with some external nudging :)) that I'd state my position here (one that I summarized in brief for the poll).&lt;br /&gt;
&lt;br /&gt;
If you'd rather not read any more, here's the summary:&amp;nbsp; &lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;blockquote class="tr_bq"&gt;
I think it's a bad idea. &lt;/blockquote&gt;
&lt;div style="text-align: left;"&gt;
And here's why&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
I understand the rationale behind this: it can be a little difficult to work with ACM. ACM does take a contingency fee that increases registration. The cost of proceedings is a significant fraction of the total budget, and the timelines can be a little tricky to work with. These and more are outlined in the &lt;a href="http://computational-geometry.org/documents/ACM-Poll.pdf"&gt;poll document&lt;/a&gt;, and you can read all the points being made there. &lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
But that's not why I think this is a bad idea (and I do have specific objections to the claims above). I think it's a bad idea because ACM is not just a conference organizing, proceedings publishing enterprise that has bad taste in style files. It's also the home of SIGACT.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
For better or worse, SIGACT is the flagship representative body for theoretical computer science in the US. General theory activities like the Theory Matters wiki and the SIGACT committee for the advancement of theoretical computer science were jump-started by SIGACT. Association with SIGACT means that the work you do is 'theoryCS (A)' in some shape of form.&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
If you're doing geometry in the US, then affiliation with SIGACT is not a bad thing. It means that you're part of the larger theory community. It means that when the theory community makes pitches to the NSF to get more funding for theory research, you're included. It also helps because there aren't other communities (SIGGRAPH?) ready to absorb geometry into their fold.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
While de-linking from ACM doesn't mean that we turn in our 'theory badges', it doesn't help the already fraught relationship between computational geometry and the larger theory community. And while the relationship with ACM is not crucial to the survival of the geometry community in the US, being part of a larger theory community that speaks the same language is crucial. &lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
p.s The center of mass in CG is closer to Europe than many might realize. I understand that our European colleagues could care less about US funding and community issues, which is fair for them. But this matter affects US-resident folks more than ACM's surcharges affect the European researchers, and&amp;nbsp; our community isn't so big that we can withstand shocks to one side of it. &lt;/div&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-1445811966124427664?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/fjlbcfM8LENDMuEVWPKtK3-X0lk/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/fjlbcfM8LENDMuEVWPKtK3-X0lk/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/fjlbcfM8LENDMuEVWPKtK3-X0lk/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/fjlbcfM8LENDMuEVWPKtK3-X0lk/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=owLmh2o9RcU:8rvILBDt8ao:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=owLmh2o9RcU:8rvILBDt8ao:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/owLmh2o9RcU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1445811966124427664/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1445811966124427664" title="22 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1445811966124427664?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1445811966124427664?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/owLmh2o9RcU/on-getting-rid-of-acm-affiliation.html" title="On getting rid of ACM affiliation..." /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>22</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/11/on-getting-rid-of-acm-affiliation.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE8BRXk4cCp7ImA9WhRSE0Q.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-5209210867021764396</id><published>2011-11-15T15:37:00.001-07:00</published><updated>2011-11-15T15:40:54.738-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-15T15:40:54.738-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><category scheme="http://www.blogger.com/atom/ns#" term="travel" /><category scheme="http://www.blogger.com/atom/ns#" term="funding" /><title>SODA 2012 student travel support</title><content type="html">While it's not a &lt;a href="http://abcnews.go.com/blogs/lifestyle/2011/10/10000-free-round-trip-tickets-to-japan/"&gt;free round trip to Japan for blogging&lt;/a&gt;, ....&lt;br /&gt;
&lt;br /&gt;
Well actually it is !&lt;br /&gt;
&lt;br /&gt;
David Johnson asks me to point out that the deadline for applying for student travel support to SODA 2012 has been extended to &lt;b&gt;Nov 29&lt;/b&gt;. &lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
The United States National Science Foundation,  IBM, and Microsoft  
have provided funding to support student travel grants. The nominal 
award is  $1000 toward travel expenses to Kyoto, Japan for SODA12, or 
ALENEX12 or  ANALCO12. In special cases of large travel costs somewhat 
larger grants may be  given. &lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;

 Any full-time  student in good standing who is affiliated with a 
United States institution is  eligible to receive an award. All awardees
 are responsible for paying for  meeting registration fees. However, 
registration fees may be included in travel  expenses &lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;

 Top priority  will be given to students presenting papers at the 
meeting, with second  priority to students who are co-authors of papers.
 Ties will be broken in favor  of students who have not attended a prior
 SODA.&lt;/blockquote&gt;
For more information, visit the &lt;a href="http://www.siam.org/meetings/da12/tsupport.php"&gt;SIAM travel support page&lt;/a&gt;.&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
And if you'd also like to blog about the conference once you're there, the &lt;a href="http://cstheory.blogoverflow.com/"&gt;cstheory blog&lt;/a&gt; is always looking for volunteers !&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-5209210867021764396?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/Sv-1ywLfLRVvQShoRJekw3YCWlo/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Sv-1ywLfLRVvQShoRJekw3YCWlo/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/Sv-1ywLfLRVvQShoRJekw3YCWlo/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Sv-1ywLfLRVvQShoRJekw3YCWlo/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=35EABrEcTGE:73aaNxUy9QU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=35EABrEcTGE:73aaNxUy9QU:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/35EABrEcTGE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/5209210867021764396/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=5209210867021764396" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/5209210867021764396?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/5209210867021764396?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/35EABrEcTGE/soda-2012-student-travel-support.html" title="SODA 2012 student travel support" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/11/soda-2012-student-travel-support.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkEFSXg8eSp7ImA9WhRSE0k.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-7696856316448295652</id><published>2011-11-15T01:40:00.001-07:00</published><updated>2011-11-15T02:16:58.671-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-15T02:16:58.671-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="dimensionality-reduction" /><title>An intuitive argument for the JL lemma ?</title><content type="html">Hal Daumé (as is his wont) asked me a simple question with no easy answer. His question was&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;i&gt;Is there a proof of the Johnson-Lindenstrauss Lemma that can be explained to an undergraduate ? &lt;/i&gt;&lt;/blockquote&gt;
While there are different "simple" proofs of the JL Lemma (the Gupta-Dasgupta proof is one such), it's not clear whether these are "undergraduate" level. So instead of answering his original question, I decided to change it to&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;i&gt;Is there a proof of the JL Lemma that isn't "geometric" ? &lt;/i&gt;&lt;/blockquote&gt;
It's a little odd to even ask the question, considering the intrinsic geometric nature of the lemma. But there's a reasonably straightforward way of seeing how the bound emerges without needing to worry too much about random rotations, matrices of Gaussians or the Brunn-Minkowski theorem.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Warning&lt;/b&gt;: what follows is a heuristic argument that helps suggest why the bound is in the form that it is: it should not be confused for an actual proof. &lt;br /&gt;
&lt;br /&gt;
In its original form, the JL Lemma says that any set of $n$ points in $R^d$ can be embedded in $R^k$ with $k = O(\log n/\epsilon^2)$ such that all distances are preserved to within a $1+\epsilon$ factor. But the real result at the core of this is that there is a linear mapping taking a unit vector in $R^d$ to a vector of norm in the range $1\pm \epsilon$ in $R^k$, where $k = 1/\epsilon^2$ (the rest follows by scaling and an application of the union bound).&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Trick #1&lt;/b&gt;: Take a set of values $a_1, \ldots, a_n$ and set $Y = \sum_i a_i r_i$, where $r_i$ is chosen (iid) to be +1 or -1 with equal probability. Then $E[Y^2] = \sum a_i^2$.This can be verified by an easy calculation.&lt;br /&gt;
&lt;br /&gt;
So now consider the vector $v$. &lt;b&gt;&lt;i&gt;Let's assume that $v$'s "mass" is roughly equally distributed among its coordinates&lt;/i&gt;&lt;/b&gt;. Take a random sample of $d/k$ of the coordinates of $v$ and apply the above trick to the values. Under the above assumption, the resulting $Y^2$ will have roughly $1/k$ of the total (squared) mass of $v$. Scale up by $k$.&lt;br /&gt;
&lt;br /&gt;
This is one estimator of the norm of $v$. It is unbiased and it has a bounded maximum value because of the assumption. This means that we can apply a Chernoff bound over a set of $k$ such estimators. Roughly speaking, the probability of deviation from the mean is $\exp(-\epsilon^2 k)$, giving the desired value of $k$.&lt;br /&gt;
&lt;br /&gt;
But how do we enforce the assumption ? By applying a random Fourier transform (or actually, a random Hadamard transform). this "spreads" the mass of the vector out among the coordinates (technically by ensuring an upper bound on the $\ell_\infty$ norm).&lt;br /&gt;
&lt;br /&gt;
That's basically it. Almost all the papers that follow the Ailon-Chazelle work proceed in this manner, with increasing amounts of cleverness to reuse samples, or only run the Fourier transform locally, or even derandomize the process. What distinguishes this presentation of the result from the earlier approaches (which basically boil down to: populate a matrix with entries drawn from a distribution having subGaussian tails) is that it separates the "spreading" step (called the preconditioner) from the latter, more elementary step (the sampling of coordinates). It turns out that in practice the preconditioner can often be omitted without incurring too much error, yielding an extremely efficient (and sparse) linear transform. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-7696856316448295652?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/XDUXyclxcgdhB1N7dtO8FZRIbhE/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/XDUXyclxcgdhB1N7dtO8FZRIbhE/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/XDUXyclxcgdhB1N7dtO8FZRIbhE/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/XDUXyclxcgdhB1N7dtO8FZRIbhE/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=h2fojD0kUZQ:kvKXn7aav_4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=h2fojD0kUZQ:kvKXn7aav_4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/h2fojD0kUZQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/7696856316448295652/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=7696856316448295652" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7696856316448295652?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7696856316448295652?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/h2fojD0kUZQ/intuitive-argument-for-jl-lemma.html" title="An intuitive argument for the JL lemma ?" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>4</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/11/intuitive-argument-for-jl-lemma.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkcNQ3w-cCp7ImA9WhRTGUw.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1966717831172710826</id><published>2011-11-10T01:34:00.002-07:00</published><updated>2011-11-10T01:34:52.258-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-10T01:34:52.258-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="jmm" /><category scheme="http://www.blogger.com/atom/ns#" term="conferences" /><title>Computational Geometry at the joint math meetings</title><content type="html">The &lt;a href="http://jointmathematicsmeetings.org/jmm"&gt;Joint Mathematics Meetings&lt;/a&gt;&amp;nbsp; is billed as the "largest annual mathematics meeting in the world". In 2012 it's in Boston between Jan 4 and 7, and I'm told that there will be over 2000 presenters.&lt;br /&gt;
&lt;br /&gt;
Happily, some of these will be geometers.&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Satyen Devadoss and Joe O'Rourke will be running an &lt;a href="http://jointmathematicsmeetings.org/meetings/2138_maasc"&gt;MAA short course&lt;/a&gt; based on their new book on Jan 2-3 before the main conference starts. &lt;/li&gt;
&lt;li&gt;I'm running a SIAM minisymposium on computational geometry on Thursday Jan 5 in the morning. We have a great and diverse set of speakers:&amp;nbsp;
&lt;ul&gt;
&lt;li&gt;Erin Chambers:&amp;nbsp; &lt;a href="http://www.ams.org/amsmtgs/2138_abstracts/1077-68-2460.pdf"&gt;Computing interesting topological features on surface embedded graphs&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Tasos Sidiropoulos: &lt;a href="http://www.ams.org/amsmtgs/2138_abstracts/1077-68-1790.pdf"&gt;Optimal stochastic planarization&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Seth Pettie: &lt;a href="http://www.ams.org/amsmtgs/2138_abstracts/1077-05-2046.pdf"&gt;An Introduction to Davenport-Schinzel               Sequences, Forbidden 0-1 Matrices, and                              Their Geometric Applications&lt;/a&gt;.      &lt;/li&gt;
&lt;li&gt;Yusu Wang: &lt;a href="http://www.ams.org/amsmtgs/2138_abstracts/1077-68-1943.pdf"&gt;Toward understanding complex data: graph               Laplacians on singular manifolds&lt;/a&gt;. &lt;/li&gt;
&lt;li&gt;Jeff Phillips: &lt;a href="http://www.ams.org/amsmtgs/2138_abstracts/1077-51-2277.pdf"&gt;Computational Geometry on Uncertain Data&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Eric Price: &lt;a href="http://www.ams.org/amsmtgs/2138_abstracts/1077-51-2732.pdf"&gt;Geometric Aspects of Compressive Sensing&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
I'll be talking about &lt;a href="http://www.ams.org/amsmtgs/2138_abstracts/1077-51-2840.pdf"&gt;Horoball Hulls and Extents in Positive Definite Space.&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Finally, Erik Demaine will be delivering the &lt;a href="http://jointmathematicsmeetings.org/meetings/national/jmm2012/2138_speakers"&gt;AMS-MAA-SIAM Gerald and Judith Porter Public Lecture&lt;/a&gt; on Saturday, titled "Geometric puzzles: Algorithms and complexity" &lt;/li&gt;
&lt;/ul&gt;
&lt;i&gt;&amp;nbsp;&lt;/i&gt;There will also be an all-day AMS special session on &lt;a href="http://jointmathematicsmeetings.org/meetings/national/jmm2012/2138_program_ss61.html#2138:SS61A"&gt;Computational and Applied Topology&lt;/a&gt; (also on Thursday). &lt;br /&gt;
&lt;br /&gt;
So there's lots to do at the JMM if you're interested in geometry. So if you're in the area in January, drop by ! &lt;br /&gt;
&lt;ul&gt;
&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-1966717831172710826?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/ZhnIcpOS6Rt1HwBrfeDhBsbGscI/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZhnIcpOS6Rt1HwBrfeDhBsbGscI/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/ZhnIcpOS6Rt1HwBrfeDhBsbGscI/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZhnIcpOS6Rt1HwBrfeDhBsbGscI/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=3hH5xMWCW-0:LObUsMfjE3I:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=3hH5xMWCW-0:LObUsMfjE3I:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/3hH5xMWCW-0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1966717831172710826/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1966717831172710826" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1966717831172710826?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1966717831172710826?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/3hH5xMWCW-0/computational-geometry-at-joint-math.html" title="Computational Geometry at the joint math meetings" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/11/computational-geometry-at-joint-math.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0IHRHw_eip7ImA9WhRTFEw.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-3429627320989029288</id><published>2011-11-04T09:00:00.000-06:00</published><updated>2011-11-04T09:12:15.242-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-04T09:12:15.242-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="community" /><category scheme="http://www.blogger.com/atom/ns#" term="socg" /><category scheme="http://www.blogger.com/atom/ns#" term="acm" /><title>SoCG and ACM: Relationship status - Complicated.</title><content type="html">&lt;div class="tr_bq"&gt;
Mark de Berg (secretary of the SoCG steering committee), sent out an email to the compgeom mailing list that starts:&lt;/div&gt;
&lt;blockquote&gt;
Since its start 27 years ago, SoCG has always been affiliated to ACM. This means that the proceedings are published by ACM, and that the symposium is organized “sponsored by ACM” (more precisely, sponsored by ACM SIGACT &amp;amp; ACM SIGGRAPH) or “in cooperation with ACM”. The latter happened only a couple of times, namely when SoCG was in Korea in 2007 and when it was in Denmark in 2009. Being affiliated to ACM has certain advantages, but also certain disadvantages, as detailed below. Hence, at the business meeting of this year’s SoCG in Paris, an alternative was discussed: organizing SoCG as an independent symposium, with the proceedings being published by Dagstuhl in their LIPIcs series (see below). A straw poll was taken, and the vast majority of the participants wanted the Steering Committee to investigate this issue further, which we do through this opinion poll. We hope you want to participate in this important poll. &lt;/blockquote&gt;
&lt;br /&gt;
If you have an interest in how the relationship between SoCG and the ACM continues (or doesn't), I'd strongly encourage you to participate in the poll. The poll documents &lt;strike&gt;will eventually be&lt;/strike&gt; are posted on &lt;a href="http://computational-geometry.org/"&gt;http://computational-geometry.org&lt;/a&gt;, &lt;strike&gt;and in the meantime here's a google doc you can read.&amp;nbsp;&lt;/strike&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-3429627320989029288?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/WyH0nr-ghwptWksL2r1MBfWa_B0/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/WyH0nr-ghwptWksL2r1MBfWa_B0/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/WyH0nr-ghwptWksL2r1MBfWa_B0/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/WyH0nr-ghwptWksL2r1MBfWa_B0/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=x1vc1u0e9vA:-1kmr7UYKo8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=x1vc1u0e9vA:-1kmr7UYKo8:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/x1vc1u0e9vA" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/3429627320989029288/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=3429627320989029288" title="5 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3429627320989029288?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3429627320989029288?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/x1vc1u0e9vA/socg-and-acm-relationship-status.html" title="SoCG and ACM: Relationship status - Complicated." /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>5</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/11/socg-and-acm-relationship-status.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08DRXk5eip7ImA9WhRTE0o.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-227357962019751484</id><published>2011-11-03T22:10:00.000-06:00</published><updated>2011-11-03T22:11:14.722-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-03T22:11:14.722-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="community" /><category scheme="http://www.blogger.com/atom/ns#" term="publishing" /><title>Life in a  crowd-sourced research world</title><content type="html">(&lt;i&gt;Sung to the tune of "War", and with a &lt;a href="http://www.youtube.com/watch?v=r-bA9FYB8HY"&gt;Jackie Chan accent&lt;/a&gt; for bonus points&lt;/i&gt;)&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
Jo-ur-nals !&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
What are they good for !&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
Absolutely nothing !&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
There are popular tropes in current discussions on crowd-sourcing research. There's the "scientists as Mean Girls" view of the current state of affairs. There's the utopian "Let a thousand papers bloom in the open research garden". There's the anti-capitalist "Down with evil money-grubbing publishers", and there's of course the always popular "Everything tastes better with crowd-sourced reputation points and achievement badges".&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
But have we really thought through the implications of doing away with the current frameworks for "&lt;a href="http://agtb.wordpress.com/2011/11/02/the-problem-with-journals/"&gt;dissemination, verification and attention management&lt;/a&gt;" ?&lt;br /&gt;
&lt;br /&gt;
Here's a &lt;a href="http://www.urbandictionary.com/define.php?term=tl%3Bdr"&gt;tl;dr&lt;/a&gt; a la Cosma Shalizi:&amp;nbsp; &lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;i&gt;A more open research environment, where all work is published before review, and anyone is free to comment on any work in public without repercussions, is both valuable as well as&amp;nbsp; more chaotic and unpleasant than we might be ready for&lt;/i&gt;.&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;
&lt;br /&gt;
Consider a pure "publish-then-filter" world, in which you dumped your paper in a public repository that had commenting, reviewing, reputation features, achievement badges and whatever other technological goodies you wanted to throw in.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
You'd be in a world not unlike the world that writers and musicians live in today. Since big music/book publishers (read "journals") take a big cut of the royalty revenues ("journal subscriptions") in exchange for promotion/marketing (read "stamps of authenticity"), many authors and musicians have developed smaller but successful brands by going on the road themselves, doing online promotions, cultivating their fan base with special material, downloads, T-shirts, event tickets and what not, and relying on underground word-of-mouth to establish a presence. &lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;b&gt;Are you ready to do the same ? &lt;/b&gt;&lt;/blockquote&gt;
&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
It's naive to think that merely putting papers on a repository and waiting for attention to appear will actually work to disseminate your work. Attention is probably the most valuable resource available to us in this connected era, and the one most fiercely fought over by everyone. No one is going to even be able to pay attention to your work unless you promote it extensively, OR unless there are external ways of signalling value.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
If you think that reputation mechanisms will help, I will merely ask you to look at the attention garnered by the latest Ke$ha single compared to the attention given to &amp;lt;insert name of your favorite underground-not-selling-out-obscure-indie-band-that-will-set-the-world-on-fire here &amp;gt;&lt;insert band="" favorite-unappreciated-will-change-the-world-without-selling-out="" here="" name="" your=""&gt;.&amp;nbsp;&lt;/insert&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
Secondly, I think as researchers, we would cringe at the kind of explicit promotion that authors/musicians have to indulge in. Would you really want to sell tickets for the "1.46 approximation to graphic TSP paper tour?". How would you afford it ?&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
There's a third aspect to living in a crowd-sourced research world: a loss of mental space. While it should be clear to anyone who follows my blog/tweets/G+/comments on cstheory that I enjoy the modern networked world, it's also clear to me that actual research requires some distance.&lt;br /&gt;
&lt;br /&gt;
In &lt;a href="http://en.wikipedia.org/wiki/Anathem"&gt;Anathem&lt;/a&gt;, Neal Stephenson describes a monastery of mathematics, where monks do their own research, and at regular intervals (1/10/100/1000 years) open their doors to the "seculars" to reveal their discoveries to the outside world.&lt;br /&gt;
&lt;br /&gt;
Even with collaborations, skype, shared documents and github, you still need time (and space) to think. And in a completely open research environment where everything you post can be commented on by anyone,&amp;nbsp; I can assure you that you'll spend most of your time dealing with comment threads and slashdotting/reditting/HNing (if you're lucky). Are you ready to deploy a 24 hour rapid-response team to deal with the flaming your papers will get ? &lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
Let me be very clear about something. I think there are many academic institutions (journals and conferences especially) that are in desperate need of overhauls and the Internet makes much of this possible. I think it's possible (but I'm less convinced) that we are on the cusp of a new paradigm for doing research, and debates like the ones we are having are extremely important to shape this new paradigm if it comes into being. In that context, I think that what 
Timothy Gowers (&lt;a href="http://gowers.wordpress.com/2011/10/31/how-might-we-get-to-a-new-model-of-mathematical-publishing/"&gt;here&lt;/a&gt; and now &lt;a href="http://gowers.wordpress.com/2011/11/03/a-more-modest-proposal/"&gt;here&lt;/a&gt;) and Noam Nisan (&lt;a href="http://agtb.wordpress.com/2011/11/02/the-problem-with-journals/"&gt;here &lt;/a&gt;and &lt;a href="http://agtb.wordpress.com/2011/11/03/the-good-things-about-journals/"&gt;here&lt;/a&gt;) are trying to do is very constructive: not
 just complain about the current state of affairs or defend the status 
quo, but try to identify the key things that are good AND bad about our 
current system AND find a &lt;i&gt;path&lt;/i&gt; to a desired destination.&lt;br /&gt;
&lt;br /&gt;
But human nature doesn't change that quickly. New ways of disseminating, valuing and verifying research will definitely change what's valued and what's not, and can help open up the research enterprise to those who feel the current system isn't working (i.e most of us). But when you replace one evaluation system by another, don't be too sure that the new system is fairer - it might merely change what gets valued (i.e the peaks might change but not the distribution itself)&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-227357962019751484?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/jOgkG3zLx9cxdlYWDaZYlzfAQpA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/jOgkG3zLx9cxdlYWDaZYlzfAQpA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/jOgkG3zLx9cxdlYWDaZYlzfAQpA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/jOgkG3zLx9cxdlYWDaZYlzfAQpA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=VI0YNF8aS-Y:1eIU4-9oFmk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=VI0YNF8aS-Y:1eIU4-9oFmk:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/VI0YNF8aS-Y" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/227357962019751484/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=227357962019751484" title="9 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/227357962019751484?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/227357962019751484?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/VI0YNF8aS-Y/life-in-crowd-sourced-research-world.html" title="Life in a  crowd-sourced research world" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>9</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/11/life-in-crowd-sourced-research-world.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0QGR3szeip7ImA9WhRTEUU.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-442599902548814219</id><published>2011-11-01T17:15:00.000-06:00</published><updated>2011-11-01T17:15:26.582-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-01T17:15:26.582-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><category scheme="http://www.blogger.com/atom/ns#" term="clustering" /><title>Large-Data Clustering Part I: Clusters of clusters</title><content type="html">&lt;span style="font-style: italic;"&gt;(this is part of an &lt;/span&gt;&lt;a href="http://geomblog.blogspot.com/2009/06/clustering-occasional-series.html" style="font-style: italic;"&gt;occasional series of essays on clustering&lt;/a&gt;&lt;span style="font-style: italic;"&gt;: for all posts in this topic, &lt;/span&gt;&lt;a href="http://geomblog.blogspot.com/p/conceptual-view-of-clustering.html" style="font-style: italic;"&gt;click here&lt;/a&gt;&lt;span style="font-style: italic;"&gt;)&amp;nbsp;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
I don't know why, but I've been struggling with a series of posts on large-data clustering for a while now. Distilling out the key ideas in a way that is intuitive and yet rigorous has been trickier than I thought. But here goes ...&lt;br /&gt;
&lt;br /&gt;
Let's consider a prototypical clustering algorithm: $k$-means. In a $k$-means iteration, you have to scan each point and determine its nearest cluster center. Then you have to organize all the points that share a cluster center and compute a new one by averaging. Each iteration takes $nk$ time, and this isn't even accounting for high dimensions. &lt;br /&gt;
&lt;br /&gt;
This algorithm doesn't scale well to large data, needless to say. But clustering, like most data mining tasks, is needed most acutely when we're dealing with large data. So what do we do ?&lt;br /&gt;
&lt;br /&gt;
In the next series of posts, I'll try to walk you through some of the key principles that get used when designing large data clustering algorithms. As always, the twin principles of sampling and streaming will be the keys to the kingdom.&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;u&gt;&lt;b&gt;Clusters can be formed by clustering clusters. &lt;/b&gt;&lt;/u&gt;&lt;/div&gt;
&lt;br /&gt;
One of the ideas that makes large-data clustering tractable is that you can cluster clusters (say that three times quickly, will ya ?). $k$-means is a particularly good example of this. I can think of a cluster center as a "representative weighted point" and discard the data associated with it. This is critical, because now I can just worry about the cluster centers themselves, and repeat.&lt;br /&gt;
&lt;br /&gt;
The advantage of this is locality. I can take small chunks of data and cluster them efficiently. Then I can combine the cluster reps from each chunk to get a smaller set of reps, and so on.&amp;nbsp; Many of the "standard" large-data clustering methods (BIRCH, CLARA, 
CLARANS and the like) are based on this principle. As a concept, it's 
sound. But does it guarantee anything ?&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
The catch of course is error propagation. How do I make sure that in this repeated (hierarchical) clustering process, I didn't commit to a bad clustering&amp;nbsp; early on because of locality ? It seems like I could pay a lot of error at each level, limiting my ability to group data sets into small chunks (because small chunks mean many levels).&lt;br /&gt;
&lt;br /&gt;
It turns out that there are clever ways around this problem by careful merging. The first example of this is a &lt;a href="http://dl.acm.org/citation.cfm?id=1024021"&gt;paper from 1997 by Charikar, Chekuri, Feder and Motwani&lt;/a&gt;. They look at the $k$-&lt;i&gt;center&lt;/i&gt; problem in a general metric space (minimize the &lt;i&gt;maximum &lt;/i&gt;radius) and their technique illustrates this principle well.&lt;br /&gt;
&lt;br /&gt;
Their algorithm is quite elegant, and works in phases. In each phase, they first merge cluster centers together to ensure that all resulting cluster centers are far apart. Then they insert new points into clusters as long as they don't get too big, or create new clusters. This happens as long the number of clusters stays below $k$. When it crosses $k$, the phase ends, and then the process repeats.&lt;br /&gt;
&lt;br /&gt;
Note that the algorithm reads in points and processes them on the fly in a single "pass". The reason it works inspite of the repeated mergings is because of a nifty property of the $k$-center algorithm (which lies at the heart of the Gonzalez 2-approximation for $k$-center):&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
If you can produce $k+1$ points that are all at distance at least $d$ from each other, then the optimal $k$-clustering has diameter at least $d$. &lt;/blockquote&gt;
This is because of a 'quantitative pigeonhole principle': At least two of these points must be in a single cluster, which then has diameter at least $d$. &lt;br /&gt;
&lt;br /&gt;
There are some tricks needed to figure out how much you can expand each cluster when inserting, and how far apart cluster centers should be in the phase after the current one. I'll spare you the details, but they lead to an 8-approximation, which can be improved using a neat randomization trick to a $2e$-approximation.&lt;br /&gt;
&lt;br /&gt;
&lt;hr /&gt;
&lt;br /&gt;
The above trick works because we can build a witness structure that lower bounds OPT, and so the merging can be done carefully so that error doesn't propagate badly. What happens if you change the cost function to the $k$-&lt;i&gt;median&lt;/i&gt; though, where the witness isn't as straightforward ?&lt;br /&gt;
&lt;br /&gt;
An excellent source here is the work by &lt;a href="http://infolab.stanford.edu/%7Eloc/tkdepaper.pdf"&gt;Guha, Meyerson, Mishra, Motwani and O'Callaghan&lt;/a&gt;. This paper, which combines prior work on both the theory and practice of $k$-median clustering, presents the following idea:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
Break the data set into pieces. Cluster each set "loosely", using many more than $k$ clusters. Weight each cluster by the number of points assigned to it. Cluster these weighted cluster centers to find the desired $k$ clusters. &lt;/blockquote&gt;
The key observations they make are:&lt;br /&gt;
&lt;ol&gt;
&lt;li&gt;The total cost of clustering the pieces isn't too much larger than the overall clustering cost (so it gives a good lower bound). This follows from the triangle inequality.&lt;/li&gt;
&lt;li&gt;The new "weighted" instance admits a solution that again is not too much larger than the optimal cost (is actually within a factor of 6 for any partition). This again follows from the triangle inequality.&lt;/li&gt;
&lt;/ol&gt;
The last piece of this puzzle is: how to cluster each set "loosely" ? They propose using a bicriterion method, whose cost is compared to the optimal $k$-median cost, but is allowed to use more medians. Such bicriterion methods are often easier to design.&lt;br /&gt;
&lt;br /&gt;
Note that there's nothing stopping them from using their own algorithm recursively to cluster the weighted cluster centers. What's neat about this is that it can be adapted to a streaming setting using another standard trick.&lt;br /&gt;
&lt;br /&gt;
Specifically, if you have an algorithm that breaks things into pieces and then recombines them (a two-level scheme), you can first recursively apply it to get a multi-level hierarchical scheme, and then you can implement this in a streaming setting by updating (potentially) an entire side of the computation tree whenever a new item appears. This can also be viewed as a lazy implementation of the hierarchical strategy.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6555947-442599902548814219?l=geomblog.blogspot.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/HyKmmDNAShr6rug4QXxZ_3n5zIM/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/HyKmmDNAShr6rug4QXxZ_3n5zIM/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/HyKmmDNAShr6rug4QXxZ_3n5zIM/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/HyKmmDNAShr6rug4QXxZ_3n5zIM/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=k201Q0NN-rU:ReHjvIA1Gbc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=k201Q0NN-rU:ReHjvIA1Gbc:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/k201Q0NN-rU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/442599902548814219/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=442599902548814219" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/442599902548814219?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/442599902548814219?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/k201Q0NN-rU/large-data-clustering-part-i-clusters.html" title="Large-Data Clustering Part I: Clusters of clusters" /><author><name>Suresh Venkatasubramanian</name><uri>https://profiles.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://geomblog.blogspot.com/2011/11/large-data-clustering-part-i-clusters.html</feedburner:origLink></entry></feed>

