<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;DEIFSHc_eip7ImA9WhBaEEU.&quot;"><id>tag:blogger.com,1999:blog-6555947</id><updated>2013-05-20T15:48:39.942-06:00</updated><category term="journals" /><category term="clustering" /><category term="deadline" /><category term="big-data" /><category term="workshops" /><category term="icdm" /><category term="barriers" /><category term="rajeev motwani" /><category term="seminars" /><category term="jeff phillips" /><category term="books" /><category term="accountability" /><category term="latex" /><category term="duality" /><category term="funding" /><category term="polymath" /><category term="fonts" /><category term="community" /><category term="polytopes" /><category term="column" /><category term="game theory" /><category term="ecml-pkdd" /><category term="cs.DC" /><category term="quantum" /><category term="classification" /><category term="soda" /><category term="PPAD" /><category term="travel" /><category term="simons foundation" /><category term="combinatorial geometry" /><category term="xkcd" /><category term="memes" /><category term="stoc2012" /><category term="madalgo" /><category term="society" /><category term="polymath research" /><category term="bregman" /><category term="bibtex" /><category term="k-12" /><category term="video" /><category term="cs.CG" /><category term="social-networking" /><category term="postdocs" /><category term="fellowships" /><category term="acceptances" /><category term="p-vs-nc" /><category term="embarrassing" /><category term="cfp" /><category term="p-vs-np" /><category term="humor" /><category term="obituary" /><category term="expanders" /><category term="academy" /><category term="models" /><category term="acm" /><category term="IMA" /><category term="fwcg" /><category term="misc" /><category term="beamer" /><category term="geometry" /><category term="gpu" /><category term="8f-cg" /><category term="soda2011" /><category term="sdm2011" /><category term="software" /><category term="current-distance" /><category term="esa" /><category term="reviewing" /><category term="nsf" /><category term="topology" /><category term="nih" /><category term="blogging" /><category term="dimacs" /><category term="conferences" /><category term="talks" /><category term="shape" /><category term="svn" /><category term="randomness" /><category term="theory.SE" /><category term="partha niyogi" /><category term="media" /><category term="potd" /><category term="technology" /><category term="ipe" /><category term="gct" /><category term="shonan" /><category term="jmm" /><category term="utah" /><category term="cricket" /><category term="memorial" /><category term="alenex" /><category term="cs.DS" /><category term="massive" /><category term="active-learning" /><category term="graphs" /><category term="eda" /><category term="cs.LG" /><category term="complexity" /><category term="conf-blogs" /><category term="arxiv" /><category term="socg-2010" /><category term="announcement" /><category term="cstheory" /><category term="ams" /><category term="metrics" /><category term="dimensionality-reduction" /><category term="focs2012" /><category term="DBR" /><category term="posters" /><category term="coding-theory" /><category term="women-in-theory" /><category term="kernels" /><category term="productivity" /><category term="conjecture" /><category term="stoc" /><category term="cs.CC" /><category term="teaching" /><category term="GIA" /><category term="cra" /><category term="empirical" /><category term="miscellaneous" /><category term="research" /><category term="personal" /><category term="distributions" /><category term="nips" /><category term="submissions" /><category term=".02" /><category term="programming" /><category term="wads" /><category term="focs2010" /><category term="streaming" /><category term="implementation" /><category term="multicore" /><category term="data-mining" /><category term="knuth" /><category term="alenex2011" /><category term="ICS" /><category term="graph minors" /><category term="analco" /><category term="deolalikar" /><category term="quant-ph" /><category term="socg2012" /><category term="math.PR" /><category term="publishing" /><category term="focs" /><category term="turing" /><category term="hirsch" /><category term="candes" /><category term="jobs" /><category term="blogosphere" /><category term="twitter" /><category term="surveys" /><category term="advising" /><category term="godel" /><category term="morris" /><category term="history" /><category term="awards" /><category term="parallelism" /><category term="hangouts" /><category term="distributed-learning" /><category term="coffee" /><category term="career" /><category term="traffic" /><category term="writing" /><category term="socg" /><category term="large-data" /><category term="outreach" /><category term="sampling" /><title>The Geomblog</title><subtitle type="html">Ruminations on computational geometry, algorithms, theoretical computer science and life</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://geomblog.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>1196</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/TheGeomblog" /><feedburner:info uri="thegeomblog" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><feedburner:browserFriendly>This is an XML content feed. It is intended to be viewed in a newsreader or syndicated to another site, subject to copyright and fair use.</feedburner:browserFriendly><entry gd:etag="W/&quot;CkAHQ3c_fSp7ImA9WhBaEE0.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6453436952237520645</id><published>2013-05-19T15:58:00.001-06:00</published><updated>2013-05-19T15:58:52.945-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T15:58:52.945-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="misc" /><category scheme="http://www.blogger.com/atom/ns#" term="humor" /><title>21st century proverbs</title><content type="html">Having nothing better to do, I decided to create a twitter meme: updating proverbs for the 21st century. Use the hashtag #21stcentproverbs and join in the fun below.&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; 


&lt;a class="twitter-timeline" data-widget-id="336238922613018624" href="https://twitter.com/search?q=%2321stcentproverbs"&gt;Tweets about "#21stcentproverbs"&lt;/a&gt;
&lt;script&gt;!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");&lt;/script&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=SXX6ok5KB4Q:U0xzcA0wnEM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=SXX6ok5KB4Q:U0xzcA0wnEM:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/SXX6ok5KB4Q" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6453436952237520645/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6453436952237520645" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6453436952237520645?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6453436952237520645?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/SXX6ok5KB4Q/21st-century-proverbs.html" title="21st century proverbs" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/05/21st-century-proverbs.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUMBSH06eip7ImA9WhBbGUk.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-661681186324737511</id><published>2013-05-19T01:10:00.004-06:00</published><updated>2013-05-19T01:10:59.312-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T01:10:59.312-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="announcement" /><title>Coding, Complexity and Sparsity 2013 (SPARC) 2013. </title><content type="html">Atri Rudra reminds us that the 3rd incarnation of SPARC is soon to be upon us (disclaimer: I'll be speaking at the event):
&lt;br /&gt;
&lt;blockquote&gt;
Efficient and effective transmission, storage, and retrieval of information on a large-scale are among the core technical problems in the modern digital revolution. The massive volume of data necessitates the quest for mathematical and algorithmic methods for efficiently describing, summarizing, synthesizing, and, increasingly more critical, deciding when and how to discard data before storing or transmitting it. Such methods have been developed in two areas: coding theory, and sparse approximation (SA) (and its variants called compressive sensing (CS) and streaming algorithms).&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote&gt;
Coding theory and computational complexity are both well established fields that enjoy fruitful interactions with one another. On the other hand, while significant progress on the SA/CS problem has been made, much of that progress is concentrated on the feasibility of the problems, including a number of algorithmic innovations that leverage coding theory techniques, but a systematic computational complexity treatment of these problems is sorely lacking. The workshop organizers aim to develop a general computational theory of SA and CS (as well as related areas such as group testing) and its relationship to coding theory. This goal can be achieved only by bringing together researchers from a variety of areas.&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote&gt;
The Coding, Complexity and Sparsity workshop (SPARC 13) will be held in Ann Arbor, MI on Aug 5-7.&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote&gt;
These will be hour-long lectures designed to give students an introduction to coding theory, complexity theory/pseudo-randomness, and compressive sensing/streaming algorithms. We will have a poster session during the workshop and everyone is welcome to bring a poster but graduate students and postdocs are especially encouraged to give a poster presentation.&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote&gt;
This is the third incarnation of the workshop and the previous two workshops were also held in Ann Arbor in August of 2011 and 2012.&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote&gt;
Confirmed speakers:
&lt;br /&gt;
&lt;div class="p1"&gt;
* Jin Yi Cai (University of Wisconsin, Madison)&lt;/div&gt;
&lt;div class="p1"&gt;
* Shafi Goldwasser (MIT)&lt;/div&gt;
&lt;div class="p1"&gt;
* Piotr Indyk (MIT)&lt;/div&gt;
&lt;div class="p1"&gt;
* Swastik Kopparty (Rutgers University)&lt;/div&gt;
&lt;div class="p1"&gt;
* Dick Lipton (Georgia Tech)&lt;/div&gt;
&lt;div class="p1"&gt;
* Andrew McGregor (University of Massachusetts, Amherst)&lt;/div&gt;
&lt;div class="p1"&gt;
* Raghu Meka (IAS)&lt;/div&gt;
&lt;div class="p1"&gt;
* Jelani Nelson &amp;nbsp;&amp;nbsp;&amp;nbsp;(Harvard)&lt;/div&gt;
&lt;div class="p1"&gt;
* Eric Price (MIT)&lt;/div&gt;
&lt;div class="p1"&gt;
* Christopher Ré (University of Wisconsin, Madison)&lt;/div&gt;
&lt;div class="p1"&gt;
* Shubhangi Saraf (Rutgers University)&lt;/div&gt;
&lt;div class="p1"&gt;
* Suresh Venkatasubramanian (University of Utah)&lt;/div&gt;
&lt;div class="p1"&gt;
* David Woodruff (IBM)&lt;/div&gt;
&lt;div class="p1"&gt;
* Mary Wootters &amp;nbsp;&amp;nbsp;&amp;nbsp;(Michigan)&lt;/div&gt;
* Shuheng Zhou (Michigan)&lt;br /&gt;
&lt;div class="p1"&gt;
&lt;br /&gt;&lt;/div&gt;
We have some funding for graduate students and postdocs with preference given to those who will be presenting posters. For registration and other details, please look at the workshop webpage:

&lt;a href="http://eecs.umich.edu/eecs/SPARC2013/"&gt;http://eecs.umich.edu/eecs/SPARC2013/&lt;/a&gt;&lt;/blockquote&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=IngjsFHCRFg:3HMzpz1AohQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=IngjsFHCRFg:3HMzpz1AohQ:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/IngjsFHCRFg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/661681186324737511/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=661681186324737511" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/661681186324737511?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/661681186324737511?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/IngjsFHCRFg/coding-complexity-and-sparsity-2013.html" title="Coding, Complexity and Sparsity 2013 (SPARC) 2013. " /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/05/coding-complexity-and-sparsity-2013.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ak8HRXY_fSp7ImA9WhBbEUs.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6385442411434432906</id><published>2013-05-10T00:38:00.001-06:00</published><updated>2013-05-10T00:53:54.845-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-10T00:53:54.845-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="gpu" /><title>On GPU algorithms</title><content type="html">&lt;a href="http://blog.computationalcomplexity.org/2013/05/gpu-computing.html"&gt;Lance talks about GPU algorithms&lt;/a&gt; in his latest post:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
The theory community hasn't seem to catch on yet. There should be some nice theoretical model that captures the vector and other operations of a GPGPU and then we should search for algorithms that make the best use of the hardware. The theory world writes algorithms for non-existent quantum computers but not for the machines that we currently use.&lt;/blockquote&gt;
Well.&lt;br /&gt;
&lt;br /&gt;
As someone who was doing GPU algorithms back when this meant flipping state bits on the fixed function pipeline (this is the GPU analog of "I wrote video games in assembly"), maybe a little perspective is in order.&lt;br /&gt;
&lt;br /&gt;
Aside: I actually gave a talk on GPU algorithms at Chicago when Lance was there back in 2004. Clearly I wasn't interesting enough to get him to take note :).&lt;br /&gt;
&lt;br /&gt;
Back in the early 2000s, I got quite interested in GPU algorithms. This came about from a summer project that &lt;a href="http://sma.epfl.ch/~moustafa/"&gt;Nabil Mustafa&lt;/a&gt; was doing with &lt;a href="http://www2.research.att.com/~krishnas/"&gt;Shankar Krishnan&lt;/a&gt; and myself on real-time map simplification. Shankar had this cool idea to try and accelerate the &lt;a href="http://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm"&gt;Douglas-Peuker&lt;/a&gt; algorithm by implementing it on a GPU, which at the time meant trying to retrofit the algorithm to a very strange OpenGL rendering pipeline.&lt;br /&gt;
&lt;br /&gt;
One thing led to another, and it got us &lt;a href="http://arxiv.org/abs/cs/0310002"&gt;thinking more abstractl&lt;/a&gt;y about what kinds of computations could be done on the GPU. This was pre-CUDA, which meant that essentially we could abstract the GPU machine model as a massively parallel SIMD architecture in which each "processor" ran a simple straight line program (no loops, limited conditionals, and small memory - much like a streaming algorithm). The parallelism lent itself nicely to geometric problems, and we wrote papers on &lt;a href="http://www.tandfonline.com/doi/abs/10.1080/13658810500390794#.UYySrCsjpz0"&gt;map simplification&lt;/a&gt;, &lt;a href="http://dl.acm.org/citation.cfm?id=545381.545456"&gt;depth contours&lt;/a&gt;, and &lt;a href="http://link.springer.com/chapter/10.1007/978-3-540-39658-1_50#page-1"&gt;general geometric optimization&lt;/a&gt;. We also &lt;a href="http://www.cs.utah.edu/~suresh/papers/csg/csg.pdf"&gt;designed an algorithm for a CSG rendering problem&lt;/a&gt; that had buried inside it a simple streaming model for proving lower bounds: in fact we were able to show a exponential gap (in the number of streaming passes needed) between deterministic and randomized median finding in this model (the conference wasn't interested in the lower bounds though :)).&lt;br /&gt;
&lt;br /&gt;
In a bout of perfect timing, I stopped working on GPUs a year or so before CUDA. At the time, my reasons were simple. I was tired of the computational model changing on me every six months, and didn't have the resources to keep buying the latest and greatest graphics cards (although AT&amp;amp;T was very generous in getting me a state of the art card originally). It was also annoying that in order to really exploit the power of the hardware, I needed secret sauce that was only revealed if you had NVIDIA inside connections (or went to the NVIDIA U events).&lt;br /&gt;
&lt;br /&gt;
Then CUDA came along and everyone went nuts. If you go to &lt;a href="http://hgpu.org/"&gt;hgpu.org&lt;/a&gt; (originally gpgpu.org) you'll see the number of papers being published every year on GPU-accelerated methods. CUDA changed (quite radically) the way in which we thought about GPU algorithms: the memory model became more sophisticated, and the SIMD component was more powerful, although it was still a good idea to avoid excessive looping and conditionals.&lt;br /&gt;
&lt;br /&gt;
Eventually I got dragged back into the GPU world. I&lt;a href="http://www.cs.utah.edu/~suresh/web/2011/02/12/evaluating-graph-colorings-on-the-gpu-poster/"&gt; collaborated on a &amp;nbsp;paper&lt;/a&gt; on graph coloring which was very instructive in demonstrating the differences between a GPU and straight up parallel machine. I also did a &lt;a href="http://www.madalgo.au.dk/html_sider/2_5_Events/SS2012/Course_material2012.html"&gt;series of lectures on GPU algorithms&lt;/a&gt; at a 2012 MADALGO summer school on new models of computing. Incidentally, folks at Utah spend a lot of time working on issues relating to GPUs: &lt;a href="http://www.cs.utah.edu/~mhall/"&gt;Mary Hall's group&lt;/a&gt; has some nifty autotuning tools for mapping algorithms to the GPU, and &lt;a href="http://www.cs.utah.edu/~ganesh/"&gt;Ganesh Gopalakrishnan&lt;/a&gt; has been thinking about memory consistency issues for GPUs.&lt;br /&gt;
&lt;br /&gt;
Is there a good "GPU model" that theoreticians can study ? I think this is the wrong question. I think the GPU is one point (multi-core CPUs are another) in a space of tradeoffs between local computation, memory latency, and communication. I&lt;a href="http://geomblog.blogspot.com/2012/04/distributed-learning-new-model.html"&gt;'ve been saying for a while now&lt;/a&gt; that communication appears to be the key resource to understand theoretically when dealing with our new networked/distributed/multiprocessor world. GPU algorithms are interesting to study as one example of how communication affects a computation. But the "right" way to deal with this involves focusing on communication, and not the GPU model per se. The latter has many idiosyncracies and what I'd consider distractions that take away from a clean formal understanding.&lt;br /&gt;
&lt;br /&gt;
I should add that I'm not even the only person from theory-land who's been thinking about GPU-derived models. There's a great body of work on multicore models (see &lt;a href="http://www.madalgo.au.dk/html_sider/2_5_Events/SS2012/Course_material2012.html"&gt;Phil Gibbons' lecture notes &lt;/a&gt;at the same summer school), and I recall Michael Mitzenmacher having &lt;a href="http://mybiasedcoin.blogspot.com/2009/10/gpu-news.html"&gt;recent work on GPU-accelerated hashing&lt;/a&gt;. Uzi Vishkin had a &lt;a href="http://www.umiacs.umd.edu/conferences/tmc2009/"&gt;STOC workshop&lt;/a&gt; a few years ago on multicore models as well. (Turing Award winner)&amp;nbsp;&lt;a href="http://people.seas.harvard.edu/~valiant/bridging-2010.pdf"&gt;Valiant's bridging model&lt;/a&gt;&amp;nbsp;clearly&amp;nbsp;needs to get a lot more attention as well so that people don't think no one's paying attention to the models.&lt;br /&gt;
&lt;br /&gt;
For more on this, also see my colleague &lt;a href="http://www.cs.utah.edu/~jeffp/teaching/cs7960.html"&gt;Jeff Phillips' lecture notes on GPU&lt;/a&gt;s&amp;nbsp;from his class on new models of computation.&lt;br /&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=sOvLARYt9eU:Ql6kdfCrxhE:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=sOvLARYt9eU:Ql6kdfCrxhE:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/sOvLARYt9eU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6385442411434432906/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6385442411434432906" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6385442411434432906?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6385442411434432906?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/sOvLARYt9eU/on-gpu-algorithms.html" title="On GPU algorithms" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/05/on-gpu-algorithms.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEEMRHg6cCp7ImA9WhBXEU0.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-3891713037308746311</id><published>2013-03-23T23:24:00.002-06:00</published><updated>2013-03-23T23:24:45.618-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-23T23:24:45.618-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="technology" /><title>Free, Freemium, and Paid</title><content type="html">There was a time when I'd bridle at the idea of having to pay for software or services. But I browse the iTunes app store now, and see people pleading to have the chance to pay for an app that they like, so that the authors won't stop updating it. The whole kerfuffle with Google Reader, Keep and Evernote is another example of how people have begun to prefer to pay for products, rather than rely on something free.&lt;br /&gt;
&lt;br /&gt;
It feels like the end of an era where open source and free software (not the same thing, but often referred to in the same breath) were the default. Maybe we've come to the realization that nothing is really ever free, and that it's more realistic to get the costs out in the open rather than "being the product".&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=pvU2rtOudBI:3KKlGEhFhMI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=pvU2rtOudBI:3KKlGEhFhMI:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/pvU2rtOudBI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/3891713037308746311/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=3891713037308746311" title="7 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3891713037308746311?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3891713037308746311?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/pvU2rtOudBI/free-freemium-and-paid.html" title="Free, Freemium, and Paid" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>7</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/03/free-freemium-and-paid.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkACSH0zeCp7ImA9WhBQE0U.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1849237238317162806</id><published>2013-03-15T17:06:00.000-06:00</published><updated>2013-03-15T17:06:09.380-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-15T17:06:09.380-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="column" /><title>The SIGACT CG column</title><content type="html">As you might have just discovered, I'm the second half of the two-headed monster that's taken over the SIGACT Geometry Column after Joe O'Rourke stepped down (&lt;a href="http://www.cs.uwm.edu/faculty/ad/"&gt;Adrian Dumitrescu&lt;/a&gt;, who hopefully does not mind being referred to as the head of a monster, is the other half). &lt;a href="http://www.cs.utah.edu/~suresh/web/2013/03/13/new-developments-in-matrix-factorization/"&gt;My first column is up&lt;/a&gt;, and it talks about some recent fascinating developments in nonnegative matrix factorization.&lt;br /&gt;
&lt;br /&gt;
My hope with the pieces I write is to cover areas of geometry that may have not had sufficient representation in the past (especially things closer to problems I'm interested in). My next column is due in August, and apart from doing a wrapup on SoCG, other things that come to mind include Laplacians and graph geometry, reproducing kernels, or even Bregman geometry.&lt;br /&gt;
&lt;br /&gt;
But everything is now mobile, crowdsourced and social networked, so I'm looking for your suggestions on interesting topics to cover, new emerging areas, results that I'm not tracking, and so on. So post away here or on G+.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=lXZuBbilJLg:We45tiAiCTQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=lXZuBbilJLg:We45tiAiCTQ:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/lXZuBbilJLg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1849237238317162806/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1849237238317162806" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1849237238317162806?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1849237238317162806?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/lXZuBbilJLg/the-sigact-cg-column.html" title="The SIGACT CG column" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>1</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/03/the-sigact-cg-column.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C04HSXg9eCp7ImA9WhBSGEw.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6014943820403411796</id><published>2013-02-25T10:05:00.001-07:00</published><updated>2013-02-25T10:05:38.660-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-02-25T10:05:38.660-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><title>Data analysis, interpretation and explanations</title><content type="html">There was a recent data-related kerfuffle between the New York Times and the makers of the Tesla electric car. If you haven't read the articles, (and the &lt;a href="http://publiceditor.blogs.nytimes.com/2013/02/18/problems-with-precision-and-judgment-but-not-integrity-in-tesla-test/"&gt;NYT public editor's post&lt;/a&gt; on this has good links), the crude summary is this:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;NYT reporter takes Tesla on long test drive, and reports problems with running out of charge.&lt;/li&gt;
&lt;li&gt;Tesla Motors CEO Elon Musk writes a long takedown of the reporter's review, complete with graphs generated from &lt;b&gt;data the car recorded during the drive&lt;/b&gt;.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;NYT comes back with their own interpretation of data, rebutting Musk's claims.&lt;/li&gt;
&lt;li&gt;Others attempt to reproduce the reporter's experience and fail, but arguably in different weather conditions that might or might not make a difference.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
In an &lt;a href="http://towcenter.org/blog/what-the-tesla-affair-tells-us-about-data-journalism/"&gt;insightful meta-analysis&lt;/a&gt; of the dustup, Taylor Owen of the Tow Center for Digital Journalism discusses the implications for journalism in a data-driven world. He also references an &lt;a href="http://www.nytimes.com/2013/02/19/opinion/brooks-what-data-cant-do.html?ref=davidbrooks&amp;amp;_r=0"&gt;article by David Brooks&lt;/a&gt; that makes the point:&lt;/div&gt;
&lt;blockquote class="tr_bq"&gt;
People are really good at telling stories that weave together multiple causes and multiple contexts. Data analysis is pretty bad at narrative and emergent thinking...&lt;/blockquote&gt;
I've felt for a while now that it's time to design mechanisms for providing context and narrative to data analysis*. Some of the research &lt;a href="http://www.cs.utah.edu/~praman/index.html"&gt;my student Parasaran&lt;/a&gt; does is on metaclustering: essentially the meta-analysis of different perspectives (clusterings) of data to draw out a larger meaning. We've also just submitted a paper on how to derive &lt;i&gt;local&lt;/i&gt;&amp;nbsp;markers of prediction validity in clustering (rather than just relying on global measures of quality). And &lt;a href="http://geomblog.blogspot.com/2013/01/accountability-in-data-mining-new.html"&gt;my seminar this semester&lt;/a&gt; is about the larger problems of explanations and accounting in data mining.&lt;br /&gt;
&lt;br /&gt;
I think as computer scientists, we have a lot to offer in the realm of data mining - not just in the design of tools for prediction, but in the design of tools to facilitate better &lt;b&gt;understanding&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;* None of this is surprising to experimental scientists, who will routinely attack a problem from multiple directions in order to acquire a larger understanding of a phenomenon rather than just the ability to predict.&amp;nbsp;&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=bHtDQ65L3Hg:jRYSOLEU1k4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=bHtDQ65L3Hg:jRYSOLEU1k4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/bHtDQ65L3Hg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6014943820403411796/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6014943820403411796" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6014943820403411796?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6014943820403411796?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/bHtDQ65L3Hg/data-analysis-interpretation-and.html" title="Data analysis, interpretation and explanations" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/02/data-analysis-interpretation-and.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkEGQ3c-fCp7ImA9WhBTE0k.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6221394620595534627</id><published>2013-02-08T10:30:00.001-07:00</published><updated>2013-02-08T10:30:22.954-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-02-08T10:30:22.954-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="hangouts" /><title>TCS+ hangout on the TSP polytope.</title><content type="html">&lt;a class="g-profile" href="http://plus.google.com/109856372312173887558" target="_blank"&gt;+Thomas Vidick&lt;/a&gt;&amp;nbsp; and the folks at the&amp;nbsp;&lt;a class="g-profile" href="http://plus.google.com/109603448835568460155" target="_blank"&gt;+TCS+&lt;/a&gt;&amp;nbsp;community have started a new experiment in G+ hangout talks. &lt;a href="https://sites.google.com/site/plustcs/past-talks"&gt;The first talk in this series&lt;/a&gt; was by Ronald de Wolf on the &lt;a href="http://arxiv.org/abs/1111.0837"&gt;STOC 2012 paper&lt;/a&gt; that showed that there was no polynomial-sized lifted representation of the TSP polytope.&lt;br /&gt;
&lt;br /&gt;
Overall, it was a very pleasant experience. I was able to reserve one of the 10 slots (a Hangout limit) for myself and my students at the University of Utah to attend and interact with the speaker, and from &lt;a href="http://mycqstate.wordpress.com/2013/02/07/tcs-inaugural-seminar/"&gt;Thomas's post-review&lt;/a&gt; it seems that many more were signed on for "view-only" access to the stream.&lt;br /&gt;
&lt;br /&gt;
There were very few technical hiccups, and&amp;nbsp;&lt;a class="g-profile" href="http://plus.google.com/108565376679287396378" target="_blank"&gt;+Oded Regev&lt;/a&gt;&amp;nbsp;did a great job making sure that people were muted when not talking, and had a chance to ask questions in an orderly way. The chat sidechannel was a great help.&lt;br /&gt;
&lt;br /&gt;
The talk itself was the best part: de Wolf did an excellent job conveying the main ideas of the proof without getting bogged down in details, and it felt as comfortable as listening to a talk live at a conference. Given the number of people listening in, this was already approaching medium-sized-workshop levels.&lt;br /&gt;
&lt;br /&gt;
I'm looking forward to more of these events, and I'm glad that the&amp;nbsp;&lt;a class="g-profile" href="http://plus.google.com/109603448835568460155" target="_blank"&gt;+TCS+&lt;/a&gt;&amp;nbsp; folks are doing this. I also hope they can try more experiments with Google Hangout. For example, two ideas come to mind:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;A reddit-style AMA ("Ask Me Anything"). One way to make this work is that the speaker would do a short presentation (maybe 5-10 minutes) and then would open up the floor for questions. To keep things manageable, people could write in questions on chat, and the moderator could filter them and ask the questions live. With sufficient preparation, some questions could be submitted ahead of time.&lt;/li&gt;
&lt;li&gt;A live panel discussion with a moderator and a few participants, which again could have questions from the audience moderated by the moderator.&lt;/li&gt;
&lt;/ul&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=m01H15HzbMs:Cln6nKBW_3g:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=m01H15HzbMs:Cln6nKBW_3g:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/m01H15HzbMs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6221394620595534627/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6221394620595534627" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6221394620595534627?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6221394620595534627?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/m01H15HzbMs/tcs-hangout-on-tsp-polytope.html" title="TCS+ hangout on the TSP polytope." /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/02/tcs-hangout-on-tsp-polytope.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Dk8BRHg4fyp7ImA9WhNbGE0.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-8574098108185036724</id><published>2013-01-21T14:34:00.002-07:00</published><updated>2013-01-21T14:47:35.637-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-21T14:47:35.637-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><category scheme="http://www.blogger.com/atom/ns#" term="accountability" /><title>Accountability in Data Mining: A new seminar</title><content type="html">&lt;div class="tr_bq"&gt;
&lt;a href="http://blogs.oregonstate.edu/glencora/2013/01/20/what-would-aaron-swartz-want-you-to-do/"&gt;Glencora Borradaile makes a number of interesting points&lt;/a&gt; about the implications of Aaron Swartz's life and work for us academic computer scientists.&amp;nbsp;&lt;/div&gt;
&lt;blockquote&gt;
As computer science academics we are in a very powerful position.  We are trusted with shaping the next generation that will make very important decisions that will have far-reaching social implications.  Decisions like those over Facebook’s privacy defaults, motivating technology that enables autonomous private vehicles at the expense of the public interest, defining ownership of electronic media.  We make those decisions ourselves in our research; what we research, how we allow our research to be used.&lt;/blockquote&gt;
&amp;nbsp;Whether or not you agree with her particular policy preferences, the point remains that the technologies we develop can have lasting consequences for the "shape" of the world to come. And if we don't speak up (either through our work, or through our own advocacy), others will take the technologies we develop and find their own ways of using or misusing them.&lt;br /&gt;
&lt;br /&gt;
All of this leads up to my current interests in data mining. I've been thinking about the problems of accountability in data mining for a while now: initially mostly in private, but now more and more in public (along with &lt;a class="g-profile" href="http://plus.google.com/116054197444205448081" target="_blank"&gt;+Graham Cormode&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a class="g-profile" href="http://plus.google.com/105468202851549303893" target="_blank"&gt;+Andrew McGregor&lt;/a&gt;)&amp;nbsp;as I see the importance of the issue.&lt;br /&gt;
&lt;br /&gt;
What is accountability in data mining ? It means many things really, but for me, for now, it means &lt;a href="http://www.cs.utah.edu/~suresh/web/teaching/accountability-in-data-mining/"&gt;this&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
The fruits of data mining pervade every aspect of our life. We are recommended books and movies; given differential pricing for insurance; screened for potential terror threats; diagnosed with various diseases; and targeted for political advertising. The ability to sift through massive data sets with sophisticated algorithms has resulted in applications with impressive predictive power.&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;
Since the internals of a learning process can be complex and opaque, it is important to verify that the results satisfy the properties claimed by the algorithm. The importance of this goes beyond merely checking the algorithm itself. Validation mechanisms also provide a way for users to demand accountability from authorities who might use the results of mining to affect users in some way (by changing their insurance rates, or even putting them on a terrorism watchlist). As the results of data mining affect more and more of our lives, the more crucial it is that the user be able to validate decisions made on their behalf and that affect them.&amp;nbsp;&lt;/blockquote&gt;
The above is an introduction to a &lt;a href="http://www.cs.utah.edu/~suresh/web/teaching/accountability-in-data-mining/"&gt;seminar I'm running this semester on this topic&lt;/a&gt;. I'm a little nervous about it, because the topic is vast and unstructured, and almost anything I see nowadays on data mining appears to be "in scope". I encourage you to check out the outline, and comment on topics you think might be missing, or on other things worth covering. Given that it's a 1-credit seminar that meets once a week, I obviously can't cover everything I'd like, but I'd like to flesh out the readings with related work that people can peruse later.&lt;br /&gt;
&lt;br /&gt;
It's entirely possible that we don't care about our privacy any more (as Facebook seems to think*). But we will care about the consequences of the use of our data. My perspective is that a better understanding of what is technically possible (and not possible) to demand and get accountability will be critical to informing larger policy discussions on this topic.&lt;br /&gt;
&lt;br /&gt;
* In an earlier version of this post I wrongly attributed this sentiment to an individual. Apologies.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=CykL5jfAwgo:XzlTboUkcoQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=CykL5jfAwgo:XzlTboUkcoQ:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/CykL5jfAwgo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/8574098108185036724/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=8574098108185036724" title="6 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8574098108185036724?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8574098108185036724?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/CykL5jfAwgo/accountability-in-data-mining-new.html" title="Accountability in Data Mining: A new seminar" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>6</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/accountability-in-data-mining-new.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8AR3s4eip7ImA9WhNbFEk.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-1973751073100239033</id><published>2013-01-16T20:51:00.000-07:00</published><updated>2013-01-17T10:30:46.532-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-17T10:30:46.532-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="math.PR" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="sampling" /><title>A sampling gem: sampling from $\ell_p$ balls.</title><content type="html">A while back, &lt;a href="http://geomblog.blogspot.com/2005/10/sampling-from-simplex.html"&gt;I had written about&lt;/a&gt; uniform sampling from the $d$-simplex. In brief, sample exponentially distributed random variables, and normalize them. Note that the simplex is one piece of the $d$-dimensional $\ell_1$ unit sphere.&lt;br /&gt;
&lt;br /&gt;
I had also mentioned a similar trick to sample from the ($\ell_2$) unit sphere: sample Gaussian random variables, and normalize them.&lt;br /&gt;
&lt;br /&gt;
As I discovered today, these are really just special cases of a much more general result that includes both of these approaches, as well as providing a general way to sample from the $d$-dimensional unit &lt;i&gt;ball&lt;/i&gt;&amp;nbsp;(as well as sphere) under any $\ell_p$ norm.&lt;br /&gt;
&lt;br /&gt;
The result is due to &lt;a href="http://arxiv.org/abs/math/0503650"&gt;Barthe, Guedon, Mendelson and Naor&lt;/a&gt;, and is breathtakingly elegant. Here it is:&lt;br /&gt;
&lt;br /&gt;
To sample from the unit ball under the $\ell_p$ norm in $d$ dimensions, randomly sample $d$ coordinates $x_1, \ldots, x_d$ i.i.d from a density proportional to $\exp(-|x|^p)$. Also sample $Y$ from the exponential distribution with parameter $\lambda = 1$. Then the desired sample is&lt;br /&gt;
$$\frac{(x_1, x_2, \ldots, x_d)}{\Bigl(\sum_i |x_i|^p&amp;nbsp;+ Y\Bigr)^{1/p}}$$&lt;br /&gt;
Notice that merely eliminating the $Y$ recovers the procedure for sampling from the unit sphere and includes both of the above sampling procedures as a special case. It's far more efficient than either rejection sampling, or even the metropolis-sampling method from the theory of sampling from convex bodies. Also, if you only want to sample from an $\ell_2$ ball you can do it without this hammer using the sphere sampling technique and a nonuniform sampling of the length, but this seems so much more elegant.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=x5VdeBQSAKE:zyzP6gEw3Do:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=x5VdeBQSAKE:zyzP6gEw3Do:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/x5VdeBQSAKE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/1973751073100239033/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=1973751073100239033" title="7 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1973751073100239033?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/1973751073100239033?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/x5VdeBQSAKE/a-sampling-gem-sampling-from-ellp-balls.html" title="A sampling gem: sampling from $\ell_p$ balls." /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>7</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/a-sampling-gem-sampling-from-ellp-balls.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEYCSH44cSp7ImA9WhNUGEg.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-7859677320086169592</id><published>2013-01-10T14:09:00.002-07:00</published><updated>2013-01-10T14:09:29.039-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-10T14:09:29.039-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term=".02" /><category scheme="http://www.blogger.com/atom/ns#" term="teaching" /><title>LEGO and math teaching</title><content type="html">I was discussing &lt;a href="http://www.paultough.com/the-books/how-children-succeed/"&gt;Paul Tough's book&lt;/a&gt; with my wife yesterday at breakfast, and we somehow got onto my pet peeve: the mechanical way in which math is taught in schools, and how math gets reduced to arithmetic and counting. Of course the definitive word on this is Paul Lockhart's &lt;a href="http://www.amazon.com/Mathematicians-Lament-School-Fascinating-Imaginative/dp/1934137170"&gt;A Mathematician's Lament&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
I looked over the floor of the living room, strewn with random bits of deadly adult-foot-stabbing Lego, and had a minor epiphany. As anyone with small children knows, LEGO sells a lot more Hollywood tie-in kits now compared to the "bags of bricks" that it used to sell. You can buy the Millenium Falcon, an X-wing fighter, a spiderman action scene, or some kit related any cartoon on Nickelodeon.&lt;br /&gt;
&lt;br /&gt;
Which is a pain. Lots of specialized parts, key pieces that if misplaced causes massive bouts of tears and anguished searching, and so on.&lt;br /&gt;
&lt;br /&gt;
But how do kids play with them ? They put it together carefully from the instructions the first time. This lasts about a day or so. Then they get bored, take it apart and mix up all the pieces with the OTHER kits they have, and spend many happy days building all kinds of weird contraptions.&lt;br /&gt;
&lt;br /&gt;
Here's the thing about the kits. They show you how to build fairly complicated devices and doohickeys. The kids pick up on this, and they mix and match the doohickeys in their own new constructions. Essentially they figure out the functions of the different parts, and realize how to build brand new things with them.&lt;br /&gt;
&lt;br /&gt;
But suppose they were told that all they could do was repeatedly assemble the kits (and maybe even different kits) over and over again. It's unlikely they'd learn anything more than just how to assemble kits really quickly. They'd also get terribly bored. In fact, the popular toys in our house are multipurpose objects: any single-purpose toy gets discarded rather quickly.&lt;br /&gt;
&lt;br /&gt;
To cut a long story short, it feels to me that a lot of math (and theoryCS) education at the school and college level involves looking at various "kits", and seeing how they get put together. You get rewarded for putting kits together correctly, but not for playing with the kits and creating bizarre new objects. But finding a way to let students (in an unstructured way) play with mathematical concepts is the key to having them really understand the concepts and their different facets.&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=hkODZ3a5xJQ:QZlH4i_7now:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=hkODZ3a5xJQ:QZlH4i_7now:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/hkODZ3a5xJQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/7859677320086169592/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=7859677320086169592" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7859677320086169592?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7859677320086169592?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/hkODZ3a5xJQ/lego-and-math-teaching.html" title="LEGO and math teaching" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>2</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/lego-and-math-teaching.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0ANQXY8eip7ImA9WhNUFko.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-8314397864105963877</id><published>2013-01-08T13:09:00.003-07:00</published><updated>2013-01-08T13:09:50.872-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-08T13:09:50.872-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="postdocs" /><category scheme="http://www.blogger.com/atom/ns#" term="jobs" /><title>ICERM and Simons postdocs</title><content type="html">Two upcoming TCS postdoc deadlines:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;u&gt;ICERM Program on Network Science and Graph Algorithms&lt;/u&gt;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;u&gt;&lt;br /&gt;&lt;/u&gt;&lt;/b&gt;
This is a &lt;a href="http://icerm.brown.edu/sp-s14"&gt;program out of Brown organized by Kelner, Klein, Mathieu, Shmoys and Upfal&lt;/a&gt;. It sounds quite fascinating if you're doing anything with graph data and spectral analysis. Deadlines for postdoc applications is &lt;b&gt;Jan 14&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;u&gt;Simons special program on the theory of big data.&amp;nbsp;&lt;/u&gt;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;u&gt;&lt;br /&gt;&lt;/u&gt;&lt;/b&gt;
As I've mentioned before, the Simons Institute is running a semester long program on the theory of big data. &lt;a href="http://simons.berkeley.edu/fellows.html"&gt;The deadline for applying for postdocs&lt;/a&gt; is soon (middle of January)&lt;br /&gt;
&lt;br /&gt;
These two programs are coordinating, and if you're so inclined you might be able to spend one semester at Berkeley and another in Providence. Please let the organizers know if this is something you're interested in.&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=QrjBDYXWi3g:9Dm40PP6yqs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=QrjBDYXWi3g:9Dm40PP6yqs:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/QrjBDYXWi3g" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/8314397864105963877/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=8314397864105963877" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8314397864105963877?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8314397864105963877?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/QrjBDYXWi3g/icerm-and-simons-postdocs.html" title="ICERM and Simons postdocs" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>1</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/icerm-and-simons-postdocs.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUcESX05cSp7ImA9WhNUFk8.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6685947596828725690</id><published>2013-01-07T23:36:00.004-07:00</published><updated>2013-01-07T23:36:48.329-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-07T23:36:48.329-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="conf-blogs" /><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><title>SODA 2013 4/n: Business</title><content type="html">If you haven't been following my live-tweets at the SODA business meeting, here's a summary of the unusually quiet meeting:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Attendance at the conference was 311, which is quite low, but is a local high for New Orleans (third time in the last 10 years).&amp;nbsp;&lt;/li&gt;
&lt;li&gt;135 papers were accepted out of a net 459 submissions. This is an acceptance rate of nearly 30%, which should strike fear into the hearts of tenure-track assistant professors everywhere.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;PC members had to review "only" 42 papers each. Yes, I know this is insane. And no, we're not going to do anything about it.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Shiri Chechik received one of two best student papers for her paper "&lt;a href="http://knowledgecenter.siam.org/0236-000006/0236-000006/1"&gt;New Additive Spanners&lt;/a&gt;". Bernhard Haeupler received the other for "&lt;a href="http://knowledgecenter.siam.org/0236-000055/0236-000055/1"&gt;Simple, fast and deterministic gossip and rumor spreading&lt;/a&gt;".&lt;/li&gt;
&lt;li&gt;I've already mentioned the two best papers, on &lt;a href="http://geomblog.blogspot.com/2013/01/soda-2013-part-in.html"&gt;graph minors&lt;/a&gt; and &lt;a href="http://geomblog.blogspot.com/2013/01/soda-2013-part-3n.html"&gt;dynamic connectivity&lt;/a&gt;.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;SODA 2014 is in Portland, land of beer, books and &lt;a href="http://www.putabirdonit.com/"&gt;birds&lt;/a&gt;. Chandra Chekuri is chair.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;After Salt Lake City observers had to withdraw because of serious voting irregularities, the &lt;strike&gt;trumped up unfair&lt;/strike&gt;&amp;nbsp;winner of the SODA 2015 sweepstakes was San Diego. But since we never go with our first choice location (Honolulu, anyone?), San Francisco was listed as a second choice, with DC as a third choice.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;David Johnson is handing the baton over to Cliff Stein, after running SODA since before I knew it wasn't a fizzy drink.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=g7QQsUuM8hA:8652SnfH97s:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=g7QQsUuM8hA:8652SnfH97s:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/g7QQsUuM8hA" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6685947596828725690/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6685947596828725690" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6685947596828725690?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6685947596828725690?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/g7QQsUuM8hA/soda-2013-4n-business.html" title="SODA 2013 4/n: Business" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>4</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/soda-2013-4n-business.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck4AQXw6eSp7ImA9WhNUFk8.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-8342956888620103347</id><published>2013-01-07T21:55:00.001-07:00</published><updated>2013-01-07T21:55:40.211-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-07T21:55:40.211-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="conf-blogs" /><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><title>SODA 2013: Part 3/n</title><content type="html">I just attended Valerie King's talk on &lt;a href="http://knowledgecenter.siam.org/0236-000016/0236-000016/1"&gt;her paper with Bruce Kapron and Ben Mountjoy&lt;/a&gt; that does dynamic connectivity in worst-case polylog time (randomized). This paper received a Best Paper award (along with the Grohe et al paper on graph minors I mentioned yesterday).&lt;br /&gt;
&lt;br /&gt;
The problem is a classic: given a stream of updates (edge insertions/deletions) to a graph, maintain a data structure that can answer "Are u and v connected" efficiently. There's been a slew of work on the problem: if you want worst-case bounds for updates, the best till now was $O(\sqrt{n})$ by Eppstein, Galil, Italiano and Nissenzweig (from 1992). There are polylogarithmic &lt;i&gt;amortized&lt;/i&gt;&amp;nbsp;bounds, but they can incur worst-case update times of $\Theta(n)$.&lt;br /&gt;
&lt;br /&gt;
In this paper, after 20 years, they knock down the worst-case update times to &lt;b&gt;polylogarithmic&lt;/b&gt;: the algorithms are randomized (with 1-sided error: it might occasionally declare two nodes disconnected when they were connected).&lt;br /&gt;
&lt;br /&gt;
The idea itself is very elegant, and it connects to techniques in streaming algorithms in a way that I think is neat. WARNING: IANAE in this area, so my rendition might be slightly naïve, and is drawn from my understanding of Valerie King's excellent presentation.&lt;br /&gt;
&lt;br /&gt;
The basic problem is this: I can maintain connectivity by maintaining a collection of trees. Adding an edge can be done without too much difficulty (indeed, if all you want to do is insert things, then you're back to union-find). It's insertion and deletion together that makes things hard: imagine that you delete an edge and disconnect a tree: you need to quickly determine if there's some other non-tree edge that can be used to reconnect the pieces, or if the two sides are now really disconnected.&lt;br /&gt;
&lt;br /&gt;
More generally, consider the cutset maintainence problem. You have updates as before, but now a query is: given a set S, find a cut edge if one exists between S and V - S. It's not hard to see that this is the core routine you need for the tree.&lt;br /&gt;
&lt;br /&gt;
The first elegant idea is this: suppose you represent each edge by a bit string consisting of the encoding of one endpoint followed by an encoding of the other. For example, the edge 2-3 might be written as 010011. Now for each vertex, compute the XOR of all the edges adjacent to it and call this its signature.&lt;br /&gt;
&lt;br /&gt;
If you XOR all the signatures of vertices in a set, and if the set has exactly one cut edge, then what you get is the signature of that edge ! This is because each edge that's inside S will occur twice and therefore will zero out.&lt;br /&gt;
&lt;br /&gt;
So this works if your cut set is of size 1. But suppose it's not ? Now you maintain $O(\log n)$ sets for each vertex. Each edge adjacent to the vertex is thrown into the $i^{\text{th}}$ set with probability $1/2^i$. With some appropriate duplication, you can show that if the cut set is of size $k$, then w.h.p the $\log k$th cut set will have exactly one element in it, and that's the element you can retrieve when you need to find a replacement edge.&lt;br /&gt;
&lt;br /&gt;
There's a lot more technical work to do to get the whole algorithm working, and I won't go into that. But those of you who are familiar with streaming will recognize something here.&lt;br /&gt;
&lt;br /&gt;
In essence, they've created a &lt;i&gt;sketch&lt;/i&gt;&amp;nbsp;for the set of edges adjacent to the vertex, and they have a way to represent the set compactly and retrieve individual elements from it. The trick with exponentially decaying levels is standard in streaming count estimation, and the GF(2) trick for storing the sketch is very similar to the trick used by &lt;a href="http://people.cs.umass.edu/~mcgregor/papers/12-dynamic.pdf"&gt;Ahn, Guha and McGregor&lt;/a&gt; in their SODA 2012 paper on graph sketching.&lt;br /&gt;
&lt;br /&gt;
And that's what I thought was the best part: that ideas that really have power in streaming can be used to help solve a longstanding open problem in graph algorithms. I should point out that the connection is post-facto: this paper was developed independently of the streaming work. But one does have to wonder what other connections we might be able to establish between sketching tricks on graphs and basic graph algorithms.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=7eE-24x3A_0:W9CvVv8LLqI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=7eE-24x3A_0:W9CvVv8LLqI:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/7eE-24x3A_0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/8342956888620103347/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=8342956888620103347" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8342956888620103347?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8342956888620103347?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/7eE-24x3A_0/soda-2013-part-3n.html" title="SODA 2013: Part 3/n" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>3</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/soda-2013-part-3n.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUMER3w8fip7ImA9WhNUFUQ.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-2207460940139143654</id><published>2013-01-07T14:32:00.002-07:00</published><updated>2013-01-07T15:23:26.276-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-07T15:23:26.276-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="conf-blogs" /><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><title>SODA 2013: Part 2/n</title><content type="html">&lt;a href="http://geomblog.blogspot.com/2013/01/soda-2013-part-in.html"&gt;&lt;i&gt;My previous post is here&lt;/i&gt;&lt;/a&gt;.&lt;br /&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Day 2 of SODA, and the tenth time I've been asked "are you chairing all the sessions" ? No, just that many of my PC colleagues didn't (or couldn't) show up :), so those of us who did are doing more lifting. In reward, we got a nice dinner in the French quarter, and I tasted &lt;a href="http://en.wikipedia.org/wiki/Boudin#In_the_United_States"&gt;boudin&lt;/a&gt; for the first time (and maybe the last).&amp;nbsp;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
An interesting talk today morning by Dror Aiger on reporting near neighbors. They were able to show a not-super-exponential relation between the number of points at unit $\ell_\infty$ distance from a query, and the number of points at unit $\ell_2$ distance. This was wrapped into a fast algorithm for reporting Euclidean near neighbors in high dimensions that has some interesting (if preliminary) experimental backing as well in comparison with ANN, FLANN and LSH.&amp;nbsp;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Jan Vondrák gave the invited talk on submodular optimization. I &lt;a href="http://geomblog.blogspot.com/2012/12/nips-ruminations-i.html"&gt;mentioned Satoru Fujishige's talk at NIPS&lt;/a&gt;, and this was an excellent complement. Fujishige's talk focused on the geometric intuition behind submodular functions (especially the associated polymatroid). Vondrák's talk focused on the algorithmic implications of submodularity, and he gave very convincing arguments for why it can be viewed as discrete convexity OR discrete concavity, or even neither. He pointed out how the Lovasz extension is useful for minimization and the multilinear extension is more useful for maximization, and gave a number of "recipes" for designing algorithms that optimize submodular functions. I hope the slides go online at some point: they were very clear and well-balanced.&amp;nbsp;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
There was some discussion over whether next year's SODA should adopt the two-tier PC that STOC is currently experimenting. The jury's still out on that, and since the STOC PC is not done with their work, we don't yet have formal feedback. I will admit to being a little frustrated with the level of conservativeness on display here: it's not as if EVERY OTHER COMMUNITY IN CS doesn't do this and doesn't have best practices that we can learn from, and given our reviewing loads, it's really crazy that we aren't desperately trying things to alleviate the problem.&amp;nbsp;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_WW2yhknEHo:wbVRvkRRVmQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_WW2yhknEHo:wbVRvkRRVmQ:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/_WW2yhknEHo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/2207460940139143654/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=2207460940139143654" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2207460940139143654?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2207460940139143654?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/_WW2yhknEHo/soda-2013-part-2n.html" title="SODA 2013: Part 2/n" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>2</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/soda-2013-part-2n.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C04NSH4-cSp7ImA9WhNUFU4.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-4262849488938128839</id><published>2013-01-06T21:13:00.000-07:00</published><updated>2013-01-06T21:13:19.059-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-06T21:13:19.059-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="conf-blogs" /><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><title>SODA 2013, Part I/n</title><content type="html">&lt;i&gt;On twitter, it's common to post longer thoughts in 140 character bursts. If you don't know how long the thought will be, you label them as 1/n, 2/n, and so on, so as not to exceed an arbitrary limit, but also imply that there is a finite limit.&lt;/i&gt;&amp;nbsp;&lt;br /&gt;
&lt;br /&gt;
So we're back in the Big Easy for SODA. This time the conference has merged ALENEX and ANALCO with the main program, so at least for today and tomorrow, we have four parallel tracks. Having just come back from NIPS, SODA feels like a small cozy cocktail party in comparison (&lt;i&gt;ed: not that I know anything about cocktail parties: I have small children&lt;/i&gt;)&lt;br /&gt;
&lt;br /&gt;
The highlight was Bob Sedgewick's tribute to Philippe Flajolet. &lt;a href="http://geomblog.blogspot.com/2007/01/soda-2007-day-1.html"&gt;I've talked about&lt;/a&gt; their work on analytic combinatorics before, but it was nice to hear some of it again. Flajolet's contributions to the field are immense and valuable. He did big data before it was cool: his &lt;a href="http://algo.inria.fr/flajolet/Publications/FlMa85.pdf"&gt;$\ell_0$ estimation work with Nigel Martin&lt;/a&gt; is now a seminal paper in the world of streaming, and his &lt;a href="http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf"&gt;HyperLogLog paper on counting a stream&lt;/a&gt; (with Fusy, Gandouet and Meunier) is the best-in-class on this problem. Bob quoted one of Flajolet's students as saying, "Read any of his papers. You will learn something".&lt;br /&gt;
&lt;br /&gt;
I chaired a geometry session in the morning, and one of the papers there caught my eye a while back. The Fréchet distance problem (commonly known as the dog-walking problem) is the problem of computing a monotone mapping between two curves that has minimim maximum length (you can think of this as a man on one curve walking a dog on another, and asking for the minimum leash length when both man and dog can walk forward at arbitrary speeds).&lt;br /&gt;
&lt;br /&gt;
There's a relatively straightforward dynamic program that can compute the Fréchet distance between two polygonal chains in time $O(nm\log (nm))$ (if $n$ and $m$ are the number of links in the two chains). But it's been a big open problem to figure out whether this can be done in subquadratic time. &lt;a href="http://arxiv.org/abs/1204.5333"&gt;The paper, (by Agarwal, Ben Avraham, Kaplan and Sharir)&lt;/a&gt; shows that for the &lt;i&gt;discrete&lt;/i&gt;&amp;nbsp;version of the problem (that Rinat Ben Avraham in her talk calls the frog hopping problem: the frogs hop from vertex to vertex) you can get a slightly subquadratic time algorithm. The main idea involves adapting "string-like" methods for the problem, which is hard because the "alphabet" is infinitely large.&lt;br /&gt;
&lt;br /&gt;
It will be interesting to see if this spurs more progress. There's already &lt;a href="http://arxiv.org/abs/1209.4403"&gt;a newer paper by Buchin et al&lt;/a&gt; that gets a slightly improved (but still super-quadratic) algorithm for the &lt;i&gt;continuous&lt;/i&gt;&amp;nbsp;Fréchet distance (i.e the original problem). If for no other reason, please click the link to see their awesome title.&lt;br /&gt;
&lt;br /&gt;
Martin Grohe gave an award talk for &lt;a href="http://www.automata.rwth-aachen.de/~grohe/pub/grokawree13.pdf"&gt;his work with Kawarabayashi and Reed&lt;/a&gt; on an improved algorithm for graph minor decompositions. The problem is as follows: given a graph G and a graph H supposedly excluded by G, compute a decomposition of G promised by the graph minor theorem, or produce an H-minor.&lt;br /&gt;
&lt;br /&gt;
The improvement reduces the dependency on the size of $G$ to quadratic (from cubic) and makes use of the &lt;a href="http://en.wikipedia.org/wiki/Courcelle's_theorem"&gt;wonderful and mysterious Courcelle's theorem&lt;/a&gt;. The dependence on the size of $H$ is equally mysterious (at least Martin declined to tell us), but is nowhere near as wonderful.&lt;br /&gt;
&lt;br /&gt;
Please post papers you thought were interesting (and why) in the comments, and I'll add them as updates.&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=hHaDDqg7oG8:Nutka5PtWD0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=hHaDDqg7oG8:Nutka5PtWD0:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/hHaDDqg7oG8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/4262849488938128839/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=4262849488938128839" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/4262849488938128839?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/4262849488938128839?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/hHaDDqg7oG8/soda-2013-part-in.html" title="SODA 2013, Part I/n" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/soda-2013-part-in.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkAHQXs_eip7ImA9WhNUEEs.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-2801021029642172419</id><published>2013-01-01T11:25:00.001-07:00</published><updated>2013-01-01T11:25:30.542-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-01-01T11:25:30.542-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="soda" /><title>A SODA announcement, and a happy new year !</title><content type="html">While you're all recovering from New Year's eve revelries, here's an important note from the ALENEX/ANALCO organizers regarding the debauchery in New Orleans otherwise known as SODA:&lt;br /&gt;
&lt;br /&gt;








&lt;br /&gt;
&lt;blockquote&gt;
Dear SODA attendees,&lt;br /&gt;We want to make sure that you are aware of a change in the format of SODA/ALENEX/ANALCO. &amp;nbsp;In recent years, ALENEX and ANALCO have been held on the Saturday before SODA and had separate registration. &amp;nbsp;This year we are trying a different format. &amp;nbsp;ANALCO and ALENEX will both be held in parallel with SODA; ANALCO on Sunday and ALENEX on Monday. &amp;nbsp;There is no longer an separate registration for ALENEX and ANALCO, everyone is automatically registered for all three. &amp;nbsp;That is, all SODA attendees are welcome to attend &amp;nbsp;ANALCO and&amp;nbsp; ALENEX talks. &amp;nbsp;We encourage you to look at the program and attend any talks that interest you. &amp;nbsp;We also welcome any feedback on this new format, after you have attended the conference.&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote&gt;
Regards,&lt;br /&gt;Bob Sedgewick (ANALCO steering committee chair)&lt;br /&gt;Cliff Stein (ALENEX steering committee chair)&lt;/blockquote&gt;
&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_xC82fygis8:Iox1qAQGxtU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_xC82fygis8:Iox1qAQGxtU:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/_xC82fygis8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/2801021029642172419/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=2801021029642172419" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2801021029642172419?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2801021029642172419?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/_xC82fygis8/a-soda-announcement-and-happy-new-year.html" title="A SODA announcement, and a happy new year !" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2013/01/a-soda-announcement-and-happy-new-year.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MBQX8zfyp7ImA9WhNWFUg.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-3607330288143560055</id><published>2012-12-15T00:53:00.000-07:00</published><updated>2012-12-15T01:17:30.187-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-12-15T01:17:30.187-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="nips" /><category scheme="http://www.blogger.com/atom/ns#" term="conf-blogs" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><title>NIPS II: Deep Learning and the evolution of data models</title><content type="html">&lt;i&gt;(tl;dr: some rambles and musings on deep learning and data, as I attempt to sort out in my head what this all means)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Over the years, as we've engaged with "big data" more and more, the way we construct mental models of data has changed. &lt;a href="http://geomblog.blogspot.com/2012/11/data-dimensions-and-geometry-oh-my.html"&gt;And as I've argued before&lt;/a&gt;, understanding how we think about data, and what shape we give it, is key to the whole enterprise of finding patterns in data.&lt;br /&gt;
&lt;br /&gt;
The model that one always starts with is Euclidean space. Data = points, features = dimensions, and so on. And as a first approximation of a data model, it isn't terrible.&lt;br /&gt;
&lt;br /&gt;
There are many ways to modify this space. You can replace the $\ell_2$ norm by $\ell_1$. You can normalize the points (again with $\ell_2$ or $\ell_1$, sending you to the sphere or the simplex). You can weight the dimensions, or even do a wholesale scale-rotation.&lt;br /&gt;
&lt;br /&gt;
But that's not all. Kernels take this to another level. You can encode weak nonlinearity in the data by assuming that it's flat once you lift it. In a sense, this is still an $\ell_2$ space, but a larger class of spaces that you can work with. The entire SVM enterprise was founded on this principle.&lt;br /&gt;
&lt;br /&gt;
But that's not all either. The curse of dimensionality means that it's difficult to find patterns in such high dimensional data. Arguably, "real data" is in fact NOT high dimensional, or is not generated by a process with many parameters, and so sparsity-focused methods like compressed sensing start playing a role.&lt;br /&gt;
&lt;br /&gt;
But it gets even more interesting. Maybe the data is low-dimensional, but doesn't actually lie in a subspace. This gets you into manifold learning and variants: the data lies on a low-dimensional curved sheet of some kind, and you need to learn on that space.&lt;br /&gt;
&lt;br /&gt;
While the challenge for geometry (and algorithms) is to keep up with the new data models, the challenge for data analysts is to design data models that are realistic and workable.&lt;br /&gt;
&lt;br /&gt;
So what does this have to do with deep learning ?&lt;br /&gt;
&lt;br /&gt;
Deep learning networks "work" in that they appear to be able to identify interesting semantic structures in data that can be quite noisy. But to me it's not entirely clear why that is. So I've been mulling the question of what kind of structure in data might be "visible" to such a network. In the absence of formal results ("if your data can be separated in some RKHS, then an SVM will find it"), what follows is some speculation, based on talks I attended and conversations I had with NIPs attendees.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Observation I&lt;/b&gt;: Both Stephané Mallat's talk and a &lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_1248.pdf"&gt;nice paper&lt;/a&gt; by Coates, Karpathy and Ng talk about the idea of "first-level" features that identify (low-dimensional) invariants and eliminate noise. In the Coates et al paper, they start with a very fine $k$-means clustering ($k$ very large), and attempt to "glue" the centers together into low dimensional subspace pieces. These are then propagated up a network of higher-order feature processors.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Observation 2&lt;/b&gt;: Yoshua Bengio's &lt;a href="http://www.nowpublishers.com/product.aspx?doi=2200000006&amp;amp;product=MAL"&gt;survey of deep learning&lt;/a&gt; (a very readable account) points to &lt;a href="http://www.cs.toronto.edu/~hinton/absps/fastnc.pdf"&gt;work by Geoff Hinton&lt;/a&gt; as part of the reinvigoration of the field. A central idea of this work is that deep belief networks can be trained "layer by layer", where each layer uses features identified from the previous layer.&lt;br /&gt;
&lt;br /&gt;
If you stare at these things long enough, you begin to see a picture not of sparse data, or low-rank data, or even manifold data. What you see is a certain hierarchical collection of subspaces, where low-dimensional spaces interact &lt;i&gt;in a low dimensional way&lt;/i&gt;&amp;nbsp;to form higher level spaces, and so on. So you might have a low-level "lip" feature described by a collection of 2-3 dimensional noisy subspaces in an image space. These "lip" features in turn combine with "eye" features and so on.&lt;br /&gt;
&lt;br /&gt;
This might seem rather far fetched, and a strange way to think about data. But I can't claim originality even here. Back in June, &lt;a href="http://www.math.sunysb.edu/~bishop/SoCG12/Maggioni.pdf"&gt;Mauro Maggioni gave a talk&lt;/a&gt; at CGWeek in &lt;a href="http://www.math.sunysb.edu/~bishop/SoCG12/"&gt;Chris Bishop's workshop on the connection between analysis and geometry&lt;/a&gt;. In this talk, he described a multi-resolution view on data that admits structure at different scales, and admits &lt;i&gt;different&lt;/i&gt;&amp;nbsp;structures at these scales.&lt;br /&gt;
&lt;br /&gt;
The upshot of all of this: it is possible that deep learning is trying to capture hierarchies of low dimensional subspaces that interact in a low dimensional way. This would explain how one is able to avoid the curse of dimensionality, and might also explain why it sometimes can find structure that other methods (kernels, manifold methods, etc) might miss.&lt;br /&gt;
&lt;br /&gt;
Problem is: I'm not sure how one tests this hypothesis. Almost any data set could be viewed this way if you allow enough features and enough "depth" in the hierarchy.&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=XPmND4MH1GI:XLoLRkds52k:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=XPmND4MH1GI:XLoLRkds52k:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/XPmND4MH1GI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/3607330288143560055/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=3607330288143560055" title="9 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3607330288143560055?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3607330288143560055?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/XPmND4MH1GI/nips-ii-deep-learning-and-evolution-of.html" title="NIPS II: Deep Learning and the evolution of data models" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>9</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/12/nips-ii-deep-learning-and-evolution-of.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEUFRHg6eip7ImA9WhNWEUg.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-603431325713012951</id><published>2012-12-10T02:14:00.000-07:00</published><updated>2012-12-10T09:16:55.612-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-12-10T09:16:55.612-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="nips" /><category scheme="http://www.blogger.com/atom/ns#" term="conf-blogs" /><title>NIPS ruminations I</title><content type="html">&lt;i&gt;(tl;dr a bucket load of trends/papers that I found interesting at the conference)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
I just returned from &lt;a href="http://nips.cc/Conferences/2012/"&gt;NIPS&lt;/a&gt; in Lake Tahoe. For those not in the know, NIPS is one of the premier machine learning conferences, and has been growing rapidly over the last few years. This year's incarnation (the first of at least 7 in Lake Tahoe) has over 1600 attendees, a gazillion workshops, 4 tutorials, and more papers each DAY than all of STOC.&lt;br /&gt;
&lt;br /&gt;
The format of the conference is very interesting. There are keynotes each morning and afternoon, a few selected 20 minute talks, and a few more more 5 minute "spotlight" talks, all in single session mode. All accepted papers get a poster slot one of of four days, and the poster session is a mammoth event from 7pm to midnight each night (yes, you read that correctly).&lt;br /&gt;
&lt;br /&gt;
I'll say more about the format in a later post. For the next few posts, I'll highlight some of the papers and trends that were interesting to me. For more conference blogging, check out Anand Sarwate's sequence (&lt;a href="http://ergodicity.net/2012/12/04/nips-2012-day-one/"&gt;I&lt;/a&gt;, &lt;a href="http://ergodicity.net/2012/12/05/nips-2012-day-two/"&gt;II&lt;/a&gt;), and &lt;a href="http://nlpers.blogspot.com/2012/12/nips-stuff.html"&gt;Hal Daumé's recap&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Perhaps the most obvious trend at the conference was on deep learning. While it's been in the news recently because of the &lt;a href="http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/archive/unsupervised_icml2012.pdf"&gt;Google untrained search for youtube cats&lt;/a&gt;, the methods of deep learning (basically neural nets without lots of back propagation) have been growing in popularity over a long while, and I was awash in autoencoders, pooling, and other jargon from the area. It was amusing to see speakers apologize for NOT talking about deep learning.&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://nips.cc/Conferences/2012/Program/event.php?ID=3127"&gt;Stéphane Mallat's keynote&lt;/a&gt; on this topic was a thing of beauty. While I understood very little of the technical details, the central idea was that by using certain convolution-based encodings, and looking for "invariant substructures", you could essentially filter out irrelevant noise in the feature space, generating new features for the next level of the learning network. This simple idea is quite powerful: there was a &lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_1248.pdf"&gt;paper on learning faces from videos&lt;/a&gt; from Coates, Karpathy and Ng that used a simple version of this idea by using k-means with lots of clusters to identify these low-D subspaces and factor them out.&lt;br /&gt;
&lt;br /&gt;
Another trend that's been around for a while, but was striking to me, was the detailed study of optimization methods. &amp;nbsp;Optimization is a major theme in machine learning. This is primarily because so many machine learning problems can be formulated as convex/nonconvex optimizations. Solving these problems takes a lot of ingenuity and engineering, and after you've been swimming in a pond of regularization, sparsity, mixed norms, stochastic gradient descent, and alternating optimization for a while, things can get crazy. There are at least two different workshops on optimization in machine learning (&lt;a href="http://discml.cc/"&gt;DISC&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a href="http://opt.kyb.tuebingen.mpg.de/cfp.html"&gt;OPT&lt;/a&gt;), and numerous papers that very carefully examined the structure of optimizations to squeeze out empirical improvements.&lt;br /&gt;
&lt;br /&gt;
If I had to mount a critique of this large body of work, it would only be that a lot of this appears to be black magic (a criticism that seems even more true for deep learning). There are number of tricks in the optimization toolbox, and you can always try any number of them, but it's not always clear which methods work best, and why.&lt;br /&gt;
&lt;br /&gt;
Quick hits on papers that interested me:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Bregman divergences&lt;/b&gt;:&amp;nbsp;&lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_1327.pdf"&gt;A paper by Iyer and Bilmes&lt;/a&gt; show how to extend &lt;a href="http://geomblog.blogspot.com/2012/04/approximate-bregman-near-neighbors.html"&gt;Bregman divergences&lt;/a&gt; to the case when the generating function $\phi$ is not convex, but sub modular. This is a little tricky, because in such cases there's no unique gradient, and so technically the Bregman divergence is a function of both the function and its subdifferential. One interesting variant is the Lovasz Bregman divergence, which comes from using the Lovasz extension of a submodular function as the convex generator for a Bregman divergence. An application of these constructions comes in ranking, and it's interesting that things like the Hamming distance can be constructed.&lt;br /&gt;
&lt;br /&gt;
On a separate note, &lt;a href="http://www.kurims.kyoto-u.ac.jp/~fujishig/"&gt;Satoru Fujishige&lt;/a&gt; gave a wonderful invited lecture at DISC on submodular functions. I particularly liked the geometric interpretation of submodularity via its &lt;a href="http://en.wikipedia.org/wiki/Polymatroid"&gt;polymatroid&lt;/a&gt;&amp;nbsp;and how a matroid can be viewed as the object you get when the submodular function is monotone. Of course this has been known for over 40 years, but I "got it" for the first time. If his &lt;a href="http://www.kurims.kyoto-u.ac.jp/~fujishig/Book1a.html"&gt;book&lt;/a&gt; is anything like his talk, I really want to get it.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Kernel distances:&amp;nbsp;&lt;/b&gt;Ever since I discovered &amp;nbsp;the &lt;a href="http://arxiv.org/abs/1103.1625"&gt;kernel distance&lt;/a&gt;&amp;nbsp;(as in, found it, not invented it)&amp;nbsp;I've been fascinated by how it behaves more or less like the earth mover distance, but is so much easier to compute. Scott Aaronson (at his &lt;a href="http://nips.cc/Conferences/2012/Program/event.php?ID=3129"&gt;NIPS invited talk&lt;/a&gt;) made this joke about how nature loves $\ell_2$. The &amp;nbsp;kernel distance is "essentially" the $\ell_2$ variant of EMD (which makes so many things easier).&lt;br /&gt;
&lt;br /&gt;
There's been a series of papers by Sriperumbudur et al. on this topic, and in a series of works they have shown that (a) &lt;a href="http://arxiv.org/pdf/1205.0411v2.pdf"&gt;the kernel distance captures&lt;/a&gt; the notion of "&lt;a href="http://arxiv.org/pdf/0803.4101.pdf"&gt;distance covariance&lt;/a&gt;" that has become popular in statistics as a way of testing independence of distributions. (b) as an estimator of distance between distributions, &lt;a href="http://projecteuclid.org/DPubS/Repository/1.0/Disseminate?view=body&amp;amp;id=pdfview_1&amp;amp;handle=euclid.ejs/1347974672"&gt;the kernel distance has more efficient estimators&lt;/a&gt; than (say) the EMD because its estimator can be computed in closed form instead of needing an algorithm that solves a transportation problem and (c ) the kernel that optimizes the efficient of the two-sample estimator can also be determined (&lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_0592.pdf"&gt;the NIPS paper&lt;/a&gt;).&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Metrics for PSD: &lt;/b&gt;In my continuing quest to find strange geometries to design algorithms for, I had this paper some time ago on &lt;a href="http://arxiv.org/abs/0912.1580"&gt;doing geometry on the Riemannian manifold&lt;/a&gt; of positive definite matrices. It's a tricky business, and life gets sad quickly when you can't describe the convex hull of three points finitely.&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_0093.pdf"&gt;Suvrit Sra proposed a new metric&lt;/a&gt; for the space of $n \times n$ positive definite matrices. It's what you get if you apply the Jensen-Shannon construction to the Burg entropy on matrices. It has the property of being more easily computable than the standard Riemannian metric, and also shares with that metric a nice closed form for the midpoint of two matrices. What remains open is the kind of geometry this metric induces: a weird property is that the associated kernel exp(-\gamma d^2) is p.d iff $\gamma$ is half integral below $n-1$, and any real above it.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Distance metric learning and MKL.&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Our presentation at OPT was about &lt;a href="http://opt.kyb.tuebingen.mpg.de/papers/opt2012_paper_20.pdf"&gt;multiple kernel learning&lt;/a&gt;, which is closely related to the general problem of distance metric learning (especially when the distance metric is defined by a kernel). There were a good number of &amp;nbsp;distance metric learning at NIPS - different approaches that either did &lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_0997.pdf"&gt;local learning of "multi-metrics"&lt;/a&gt; or &lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_1223.pdf"&gt;learned a metric based on k-NN points&lt;/a&gt;, or even &lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_0514.pdf"&gt;Hamming distance metric learning&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Distributed learning&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
Distributed learning (in a communication-restricted environment) seems to be a growing concern, &lt;a href="http://geomblog.blogspot.com/2012/04/distributed-learning-new-model.html"&gt;which of course makes me happy&lt;/a&gt;. There were a few papers on different kinds of communication-limited learning, including one on doing this for &lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_1324.pdf"&gt;probabilistic PCA&lt;/a&gt; (which didn't have formal bounds on the amount of communication though) and one on &lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_0140.pdf"&gt;distributed "non-stochastic" experts&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=kl4YlrR6GNc:MwqYBDh2E0U:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=kl4YlrR6GNc:MwqYBDh2E0U:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/kl4YlrR6GNc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/603431325713012951/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=603431325713012951" title="5 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/603431325713012951?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/603431325713012951?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/kl4YlrR6GNc/nips-ruminations-i.html" title="NIPS ruminations I" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>5</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/12/nips-ruminations-i.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUcGRH8yfCp7ImA9WhNRF04.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-3949393483333088144</id><published>2012-11-07T09:00:00.000-07:00</published><updated>2012-11-12T09:17:05.194-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-11-12T09:17:05.194-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><category scheme="http://www.blogger.com/atom/ns#" term="cs.CG" /><title>Data, Dimensions and Geometry oh my !</title><content type="html">&lt;i&gt;The following is a summary of a talk I gave to undergraduates interested in going on to graduate school. It's targeted at the layperson, and tries to convey a sense of the interplay between data mining and geometry. I gave this talk partly because I realized that things that we take utterly for granted in the rarified world of high dimensional data mining are completely foreign to people who don't think about this for a living.&amp;nbsp;&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;tl;dr:&lt;/b&gt; High dimensional geometry (and non standard geometry) is the unifying language that binds data mining problems together.&lt;br /&gt;
&lt;br /&gt;
We're trying to find patterns all over the place, with very different kinds of data. A search engine is trying to find patterns in web pages that might indicate that they have similar content. Brain researchers are throwing MRIs of patients with diseases into an algorithm that attempts to infer patterns in brain scans that might yield clues about pathology and diagnosis. &lt;a href="http://en.wikipedia.org/wiki/Genome-wide_association_study"&gt;Genome-wide analysis&lt;/a&gt; takes what are essentially long strings of letters and tries to explain why certain populations might be susceptible to certain diseases.&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://pandora.com/"&gt;Pandora&lt;/a&gt;, &lt;a href="http://www.shazam.com/"&gt;Shazam&lt;/a&gt; and other music sites analyze fragments of music to find related artists, or even just match a tune. While an &lt;a href="http://static.echonest.com/InfiniteGangnamStyle/"&gt;infinite gangnam-style video&lt;/a&gt; might be a frivolous use of data mining on video streams, video scans are being analyzed by &lt;a href="http://en.wikipedia.org/wiki/Google_driverless_car"&gt;robots trying to drive unmanned cars&lt;/a&gt; or even just &lt;a href="http://www.robocup.org/"&gt;play soccer.&lt;/a&gt; Social networks are all the rage: Facebook, Twitter and Google are desperate to understand your circle of friends in order to sell things to you more effectively.&lt;br /&gt;
&lt;br /&gt;
How are we able to find patterns, clusters and trends in such different kinds of data ? The key step in all of this is the idea of &lt;i&gt;features&lt;/i&gt;. The trick is to describe each object we are studying as a sequence (or set) of features. For example, a feature set for a document could be the number of times each particular word appears. The feature set for an image could be the count of different colors. For a tune, it might be a &lt;a href="http://www.pandora.com/about/mgp"&gt;collection of features identified by hiring artists to list out which of more than 700 characteristics a piece of music has&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
And so on, and so on. What this allows us to do is represent each object as a set of features, whether it's a web page, a brain scan, a video, or even a social graph. Once we do that, we no longer have to worry about the original data (well, kind of !), and different types of data are all on an equal footing.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
But what now ? Here's where a cool trick that dates back to the 1600s comes in.&lt;br /&gt;
&lt;br /&gt;
I doubt that &lt;a href="http://en.wikipedia.org/wiki/Ren%C3%A9_Descartes"&gt;R&lt;/a&gt;&lt;a href="http://en.wikipedia.org/wiki/Ren%C3%A9_Descartes"&gt;ené Descartes&lt;/a&gt; ever heard of the term "data mining". But in a strange way, he's the father of the field. One of Descartes' claim to fame was establishing a link between geometry and algebra. He said that if we wanted to represent points, lines and other shapes, we could do so not abstractly as Euclid and other classical geometers did, but using algebra. A "point" in the plane could be described by two coordinates (x,y), and a line in the plane could be described by the equation y = mx + c.&lt;br /&gt;
&lt;br /&gt;
This is a very simple idea - children learn it in middle school. And yet like most simple ideas, it was revolutionary, and completely changed our perspective on geometry. The unification of algebra and geometry is so powerful and so natural, we use it almost unconsciously today.&lt;br /&gt;
&lt;br /&gt;
But what does Descartes have to do with data mining ?&lt;br /&gt;
&lt;br /&gt;
Remember those features I was telling you about ? That every object can be represented by a sequence of numbers, each number describing some aspect of the object.&lt;br /&gt;
&lt;br /&gt;
Let's do the opposite of what Descartes proposed ! In other words, let's pretend that the objects are actually points in a space. Now this space is not the plane (unless we had only two features). It's a high dimensional space, with one feature for each dimension.&lt;br /&gt;
&lt;br /&gt;
This process is entirely artificial. There is no real reason why we must think of objects as points in a high dimensional space. But here's the cool thing that happens. &lt;i&gt;We can now express all basic mining primitives as geometric concepts,&lt;/i&gt; by translating the language of the primitive to this high dimensional space.&lt;br /&gt;
&lt;br /&gt;
A clustering of data becomes a grouping of points so that "nearby" points are grouped together. A classification of reviews into positive and negative statements becomes a way to separate "positive" points and "negative" points by a line. Finding trends in data becomes the problem of fitting a straight line to a collection of points.&lt;br /&gt;
&lt;br /&gt;
It is hard to emphasize how utterly bizarre this is. There is no underlying "geometry" in the problem we're solving. We're essentially creating a castle out of thin air in order to understand the problem. And yet it works,&amp;nbsp; and is the bedrock of how we think about data mining.&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp;But wait ! there's more. What exactly does it mean to say that points are "nearby" or they are "separated" ? To answer this question, it's not enough to view objects as points in a high dimensional space. You have to give this space a shape - a geometry (and also a topology, but I'll skip that for now).&lt;br /&gt;
&lt;br /&gt;
For example, if I have two feature lists, how do I measure the distance between them ? If they were points on a map, I could do the usual straight line distance. But does that really capture how far apart they are ? After all, if you're in downtown Salt Lake with its grids, a "crow flies" distance doesn't help estimate how far things are. If you're on the surface of the earth, you can't really tunnel through the center to get from one point to another.&lt;br /&gt;
&lt;br /&gt;
So we have to create the shape of the space by defining how far apart two points are. And this is one of the trickiest parts of data mining. Either we have to use some domain knowledge to estimate which features are significant and control more of the distance between objects, or we have to try and &lt;i&gt;learn&lt;/i&gt; what seems like the right distance between objects based on user estimates or other information. &lt;br /&gt;
&lt;br /&gt;
The good thing is that we have a huge collection of shapes to play with, and different shapes tend to work well for different classes of objects. Some are easy to interpret, others are easy to compute, and so on. So a good part of the "art" in data mining is understanding the data and estimating the right geometry for it, so that our tasks (expressed geometrically) give us meaningful answers. &lt;br /&gt;
&lt;br /&gt;
Or as Dorothy famously said to her dog, "Toto, I don't think we're in the plane any more!"&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=R4mX9wkqoDo:ZH6UCbYVsOo:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=R4mX9wkqoDo:ZH6UCbYVsOo:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/R4mX9wkqoDo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/3949393483333088144/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=3949393483333088144" title="10 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3949393483333088144?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/3949393483333088144?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/R4mX9wkqoDo/data-dimensions-and-geometry-oh-my.html" title="Data, Dimensions and Geometry oh my !" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>10</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/11/data-dimensions-and-geometry-oh-my.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQMSXc6fyp7ImA9WhNREUQ.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-8737929797805464605</id><published>2012-11-06T02:33:00.000-07:00</published><updated>2012-11-06T02:33:08.917-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-11-06T02:33:08.917-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="data-mining" /><title>On the elections, Nate Silver, and lessons for data mining</title><content type="html">One interesting side story from this election has been the &lt;a href="http://markcoddington.com/2012/10/31/nate-silver-journalism-politics-knowledge-epistemology/"&gt;intense focus&lt;/a&gt; on Nate Silver's election predictions, and the matter of aggregate polling statistics. While there's certainly a partisan element to much of the discussion, there's also a larger sense of unease about what the predictions are actually saying.&lt;br /&gt;
&lt;br /&gt;
There are lessons here for the greater goal of using data mining for prediction and modelling, and this will get more and more important the more predictive analytics intrude on our lives.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;People don't quite understand probability, even though they understand odds.&amp;nbsp;&lt;/b&gt;&lt;br /&gt;
There's a lot of confusion about what it means for a one-time event to have a probability associated with it, even though people are quite comfortable with the idea of odds in (for example) sports. This to me reflects a deeper confusion between the frequentist and Bayesian view of probability: namely, is probability the long-run average of the frequency of an event, or an &lt;i&gt;a priori&lt;/i&gt; expression of uncertainty in the likelihood of an event.&lt;br /&gt;
&lt;br /&gt;
I'm not going to say anything here that hasn't been said for more than a century, but from the point of view of &lt;i&gt;interpreting&lt;/i&gt; the results of mining, this distinction will be important.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Aggregation is a GOOD THING.&amp;nbsp;&lt;/b&gt;&lt;br /&gt;
It is entirely likely that all the poll aggregators will have egg on their face tomorrow when the results come in. But it does seem that aggregation helps smooth out the natural irregularities and biases in individual polls. This is a theme that comes up time and again in data mining, and especially in clustering: rather than trusting a single algorithm, you should try to run algorithms that return diverse answers and aggregate them in some fashion.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;It's not enough to predict: you must also explain.&amp;nbsp;&lt;/b&gt;&lt;br /&gt;
Among the good reasons to feel uneasy about the aggregate predictions is that they don't give much insight into why things are going the way they are. To be fair, Nate Silver laid out some economic markers back in the spring, and tied them to possible outcomes via regression. While this provides some &amp;nbsp;insight, it's still pattern matching as opposed to a mechanism.&lt;br /&gt;
&lt;br /&gt;
More generally, it is very difficult to convince people that an answer is pertinent or "correct" unless there's some way to explain the answer without listing a sequence of coefficients or writing down a collection of centers. Much of the popularity of decisions trees comes from the ease of explanation it seems to provide.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Conclusion.&amp;nbsp;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;
Most of the controversy around data mining in the public sphere has centered around privacy issues. Indeed, privacy issues become a concern precisely because people worry that the mining algorithms are too accurate. But in fact we don't really understand the behavior of many algorithms that we use, and that is dangerous regardless of privacy concerns. The angst over the methods used to predict this election are an illustration of what will happen when the predictions we make start to matter, and matter to many people.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=hGYpow48IfQ:26ZS1htJpw4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=hGYpow48IfQ:26ZS1htJpw4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/hGYpow48IfQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/8737929797805464605/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=8737929797805464605" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8737929797805464605?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/8737929797805464605?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/hGYpow48IfQ/on-elections-nate-silver-and-lessons.html" title="On the elections, Nate Silver, and lessons for data mining" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/11/on-elections-nate-silver-and-lessons.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEcEQ3wzfyp7ImA9WhJaF0w.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6248061110420783467</id><published>2012-10-08T11:06:00.001-06:00</published><updated>2012-10-08T11:06:42.287-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-10-08T11:06:42.287-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="big-data" /><category scheme="http://www.blogger.com/atom/ns#" term="jobs" /><title>On why I'm excited about "big data"</title><content type="html">I was in Aarhus recently for a &lt;a href="http://www.madalgo.au.dk/html_sider/2_5_Events/SS2012/FrontPage_SS2012.html"&gt;MADALGO workshop on large-scale parallel and distributed models&lt;/a&gt;, where I did a sequence of lectures on &lt;a href="http://www.madalgo.au.dk/html_sider/2_5_Events/SS2012/Course_material2012.html"&gt;GPU algorithms&lt;/a&gt;. I was briefly interviewed by a university reporter for an article, and did a little video on why I think big data/big iron problems are interesting.&lt;br /&gt;
&lt;br /&gt;
At the risk of embarrassing myself even more than I usually do, &lt;a href="http://katrinebjerg.au.dk/news/artikel/madalgo-summer-school/"&gt;here's the video&lt;/a&gt;. Note that this was recorded at a time of great crisis across the globe, when all hair styling products had mysteriously disappeared for a few days. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=2ZKQI70GOy8:UU8Qdexeycs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=2ZKQI70GOy8:UU8Qdexeycs:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/2ZKQI70GOy8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6248061110420783467/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6248061110420783467" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6248061110420783467?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6248061110420783467?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/2ZKQI70GOy8/on-why-im-excited-about-big-data.html" title="On why I'm excited about &quot;big data&quot;" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>2</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/10/on-why-im-excited-about-big-data.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEECRXo6eCp7ImA9WhJaEkU.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-6039199345611351920</id><published>2012-10-03T11:51:00.000-06:00</published><updated>2012-10-03T11:51:04.410-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-10-03T11:51:04.410-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="jobs" /><title>We're hiring FIVE (count 'em, FIVE) faculty this year.</title><content type="html">We had an incredible hiring season two years ago, making seven offers and hiring seven new faculty. And now we're doing it again !&lt;br /&gt;
&lt;br /&gt;
Our department is looking to hire five new faculty (at least four are at the assistant professor level). I'm particularly excited that we're hiring two people in the general area of big data (following up on our data mining and database hires from two years ago).&lt;br /&gt;
&lt;br /&gt;
One slot is in what I'll call "big data meets big performance": I'll have more to say about this shortly, but the challenges of large data analysis are not just about managing the data, but about managing large numbers of machines to crunch this data (MapReduce is perhaps the most well known example of this). We're looking for people who can "speak extreme data and extreme processors" fluently - these could be on the data/systems management side, or on the analysis side, or the modelling side.&lt;br /&gt;
&lt;br /&gt;
Utah has a strong presence in high performance computing (the &lt;a href="http://sc12.supercomputing.org/"&gt;Supercomputing confererence&lt;/a&gt; is happening in Salt Lake, and &lt;a href="http://www.cs.utah.edu/~mhall/"&gt;Mary Hall&lt;/a&gt; is the general chair), and we're one of the few places that has a good understanding of both sides of the large data story (i.e machines and bits).&lt;br /&gt;
&lt;br /&gt;
The second slot is in machine learning, with ties to language. Text (and language) provide one of the best large data sources for scalable machine learning, and we're looking for people interested in the challenges of doing ML at scale, especially when dealing with NLP. If you're that person, you'll be coming into a department that has the entire range of data analysis folks from algorithms to analysis to systems to NLP (with many other faculty that are heavy users of ML technology).&lt;br /&gt;
&lt;br /&gt;
Our plan, once we fill these slots, is to make Utah a one-stop shop for large-scale data analysis and visualization - in addition to the above slots, we're also looking to hire in information visualization to complement our strong viz presence. &lt;br /&gt;
&lt;br /&gt;
In addition to the above slots, we are also hiring in computer security and HCI/user interfaces. While I'm equally excited about these positions, I know much less about the areas :). I will point out that we have a big systems group that covers many aspects of security (language security, verification, and network security) already. We've also had strong demand from students and industry for research in HCI, which will complement our info-viz efforts (and even our data mining work)&lt;br /&gt;
&lt;br /&gt;
For more details on how to apply, &lt;a href="http://www.cs.utah.edu/people/faculty-hiring/"&gt;see our ad&lt;/a&gt;. &lt;b&gt;We'll start reviewing applications after Dec 1&lt;/b&gt;. Feel free to email me if you have questions about the slots (but don't send me your application material - &lt;a href="http://www.cs.utah.edu/people/faculty-hiring/"&gt;send them in directly&lt;/a&gt;)&lt;br /&gt;
&lt;br /&gt;
Disclaimer: the above views are my own personal views, and don't represent the views of the department or the hiring subcommittees.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=ykLuEmHavvM:tpcL4f9df6Y:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=ykLuEmHavvM:tpcL4f9df6Y:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/ykLuEmHavvM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/6039199345611351920/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=6039199345611351920" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6039199345611351920?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/6039199345611351920?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/ykLuEmHavvM/were-hiring-five-count-em-five-faculty.html" title="We're hiring FIVE (count 'em, FIVE) faculty this year." /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>1</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/10/were-hiring-five-count-em-five-faculty.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEMNQXg8eSp7ImA9WhJaF04.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-4582283063278490988</id><published>2012-10-01T02:18:00.000-06:00</published><updated>2012-10-08T16:48:10.671-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-10-08T16:48:10.671-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="memes" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="community" /><title>Things a _____ should do at least once...</title><content type="html">Without quite realizing it, I managed to create a (tiny) meme in the rarefied circles of TCS/math with my post "&lt;a href="http://geomblog.blogspot.com/2012/09/things-tcser-should-have-done-at-least.html"&gt;Things a TCSer should have done at least once&lt;/a&gt;".&lt;br /&gt;
&lt;br /&gt;
Firstly, you should &lt;a href="https://plus.google.com/112165457714968997350/posts/AedydgaZrsX"&gt;check out the G+ post&lt;/a&gt; for even more scathing commentary on my original post. &lt;br /&gt;
&lt;br /&gt;
Next, you should see the followups (in case you haven't already):&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Complexity blog: &lt;a href="http://blog.computationalcomplexity.org/2012/09/things-complexity-theorist-should-do-at.html"&gt;Things a complexity theorist....&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Turing's invisible hand: &lt;a href="http://agtb.wordpress.com/2012/09/28/things-a-agteer-should-do-at-least-once/"&gt;Things an AGTer....&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Peter Krautzberger (on Boole's rings): &lt;a href="http://boolesrings.org/krautzberger/2012/09/30/n-things-a-set-theorist-should-have-done-at-least-once/"&gt;Things a set theorist..&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://computationalcombinatorics.wordpress.com/2012/09/28/things-a-graph-theorist-should-do-at-least-once/"&gt;Computational Combinatorics: &lt;/a&gt;Things a graph theorist...&lt;/li&gt;
&lt;li&gt;&lt;a href="http://dabacon.org/pontiff/?p=6584"&gt;Quantum Pontiff&lt;/a&gt;: Things a quantum information theorist .... &lt;/li&gt;
&lt;/ul&gt;
Let me know if there are more - &lt;strike&gt;I'm still waiting for a quantum computing version.&lt;/strike&gt;(thanks, Pontiff!)&lt;strike&gt;&lt;br /&gt;&lt;/strike&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=s5cYqhdaX30:Hui6HloQ61U:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=s5cYqhdaX30:Hui6HloQ61U:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/s5cYqhdaX30" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/4582283063278490988/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=4582283063278490988" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/4582283063278490988?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/4582283063278490988?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/s5cYqhdaX30/things-should-do-at-least-once.html" title="Things a _____ should do at least once..." /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>4</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/10/things-should-do-at-least-once.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D08GQnk4eSp7ImA9WhJbEEo.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-2651875496273218048</id><published>2012-09-19T11:30:00.001-06:00</published><updated>2012-09-19T11:30:23.731-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-09-19T11:30:23.731-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="focs2012" /><category scheme="http://www.blogger.com/atom/ns#" term="community" /><title>FOCS 2012 Student Support</title><content type="html">I've been asked by Rafail Ostrovsky and David Shmoys to highlight that s&lt;a href="http://dimacs.rutgers.edu/FOCS12/travel-support.html"&gt;tudent support for travelling to FOCS 2012&lt;/a&gt; is still available, and the deadline is this Friday. &lt;br /&gt;
&lt;br /&gt;
They note that they can fund 24-25 people, so people in the US &lt;a href="http://dimacs.rutgers.edu/FOCS12/travel-support.html"&gt;should apply &lt;/a&gt;&lt;b&gt;EVEN IF they don't have a paper&lt;/b&gt;. &lt;br /&gt;
&lt;br /&gt;
The NSF has been very generous of late in supporting student travel to conferences, and the best way to encourage them to continue is to show that we use the money they give us :). My students have taken advantage of these opportunities in the past, and it's been a great experience for them.&lt;br /&gt;
&lt;br /&gt;
So if you're a student reading this (and I know there are many of you!) or if you're an advisor who may not have the funding to send your student(s) to FOCS, do take advantage of this chance.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=sB2jS7WYMU4:oGt-vdhmFjw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=sB2jS7WYMU4:oGt-vdhmFjw:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/sB2jS7WYMU4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/2651875496273218048/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=2651875496273218048" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2651875496273218048?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/2651875496273218048?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/sB2jS7WYMU4/focs-2012-student-support.html" title="FOCS 2012 Student Support" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/09/focs-2012-student-support.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkIDSXw-cSp7ImA9WhJUEUU.&quot;"><id>tag:blogger.com,1999:blog-6555947.post-7669180708879974570</id><published>2012-09-09T02:49:00.004-06:00</published><updated>2012-09-09T02:49:38.259-06:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-09-09T02:49:38.259-06:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><title>Things a TCSer should have done at least once</title><content type="html">There are many discussions about what things a TCS grad student should know. I thought it might be useful instead to list out some things a theoretician should have done at some point early in their career.&lt;br /&gt;
&lt;br /&gt;
Rules of the game:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;You lose points if you do it as part of a class. If you decide to round an LP in a class on approximations, that's something you're being taught to do. But if you do it as part of solving a problem, then you've also done the difficult job of &lt;i&gt;recognizing &lt;/i&gt;that an LP needs to be rounded. That latter skill demonstrates some mastery. &lt;/li&gt;
&lt;li&gt;The goal is not complete coverage of all of TCS; rather, it's coverage of techniques that will come up fairly often, no matter what kinds of problems you look at.&lt;/li&gt;
&lt;li&gt;This list is necessarily algorithms-biased. I doubt you'll need many of these if you're doing (say) structural complexity.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;A similar caveat applies to classical vs quantum computing. While it seems more and more important even for classical computations that one knows a little quantumness, I don't know enough about quantum algorithm design to add the right elements. Comments ?&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
With those caveats out of the way, here's my list:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Show that a problem is NP-hard (for bonus points, from some flavor of SAT via gadgets)&lt;/li&gt;
&lt;li&gt;Show a hardness of approximation result (bonus points for going straight from a PCP)&lt;/li&gt;
&lt;li&gt;Prove a lower bound for a randomized algorithm&lt;/li&gt;
&lt;li&gt;Prove a lower bound via communication complexity or even information theory&lt;/li&gt;
&lt;li&gt;Round an LP (bonus points for not just doing the obvious rounding)&lt;/li&gt;
&lt;li&gt;Show an integrality gap for an LP&lt;/li&gt;
&lt;li&gt;Design a primal-dual algorithm&lt;/li&gt;
&lt;li&gt;Use projective duality to solve a problem (bonus points for using convex duality)&lt;/li&gt;
&lt;li&gt;Apply a Chernoff bound (bonus for using negative dependence, Janson's inequality, and an extra bonus for Talagrand's inequality)&lt;/li&gt;
&lt;li&gt;Design an FPT algorithm (maybe using treewidth, bonus for using bidimensionality or kernelization)&lt;/li&gt;
&lt;li&gt;Design a nontrivial exponential time algorithm (i.e an algorithm that doesn't just enumerate, but does something clever)&lt;/li&gt;
&lt;li&gt;Do an amortized analysis (for extra bonus get a log*n bound)&lt;/li&gt;
&lt;li&gt;use an advanced data structure - something beyond van emde Boas trees (extra bonus for exploiting word-size)&lt;/li&gt;
&lt;li&gt;invoke VC dimension to solve a problem&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
What else ?&amp;nbsp; &lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_8V9ZwGllvY:PZi2CgJSfe4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/TheGeomblog?a=_8V9ZwGllvY:PZi2CgJSfe4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/TheGeomblog?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/TheGeomblog/~4/_8V9ZwGllvY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://geomblog.blogspot.com/feeds/7669180708879974570/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6555947&amp;postID=7669180708879974570" title="9 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7669180708879974570?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6555947/posts/default/7669180708879974570?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/TheGeomblog/~3/_8V9ZwGllvY/things-tcser-should-have-done-at-least.html" title="Things a TCSer should have done at least once" /><author><name>Suresh Venkatasubramanian</name><uri>https://plus.google.com/112165457714968997350</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-ONBPD2_QxOQ/AAAAAAAAAAI/AAAAAAAAAAA/h0HoqjO7eSA/s512-c/photo.jpg" /></author><thr:total>9</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://geomblog.blogspot.com/2012/09/things-tcser-should-have-done-at-least.html</feedburner:origLink></entry></feed>
