<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;DEcGQ348cCp7ImA9WhRUFkQ.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631</id><updated>2012-01-27T15:27:02.078-05:00</updated><category term="clustering" /><category term="meetup" /><category term="data mining" /><category term="funny" /><category term="comic" /><category term="privacy" /><category term="structural models." /><category term="bayesian" /><category term="FROC" /><category term="presentation" /><category term="academia" /><category term="psychology" /><category term="string matching" /><category term="adwords" /><category term="market for lemons" /><category term="spam" /><category term="wisdom of the crowds" /><category term="presidential elections 2008" /><category term="open access" /><category term="lda" /><category term="probability" /><category term="cfp" /><category term="fraud" /><category term="humor" /><category term="frequentist" /><category term="power law" /><category term="deduplication" /><category term="keyword bidding" /><category term="mechanical turk" /><category term="advice" /><category term="reviews" /><category term="customer service" /><category term="outliers" /><category term="acm" /><category term="dagstuhl" /><category term="data cleaning" /><category term="honda" /><category term="hcomp" /><category term="prediction markets" /><category term="incentives" /><category term="human computation" /><category term="call for papers" /><category term="drm" /><category term="peer reviewing" /><category term="online advertising" /><category term="efficient markets" /><category term="payment" /><category term="quality" /><category term="statistics" /><category term="crowdsourcing" /><category term="google" /><category term="pricing" /><category term="yahoo" /><category term="education" /><category term="aca" /><category term="newsweek" /><category term="slides" /><category term="Rudy Giuliani" /><category term="extreme value theory" /><category term="reputation" /><category term="youtube" /><category term="mind maps" /><category term="conference" /><category term="demo" /><category term="large datasets" /><category term="propublica" /><category term="ranked xml querying" /><category term="evaluation" /><category term="cheating" /><category term="charity" /><category term="information extraction" /><category term="amazon" /><category term="wikis" /><category term="tagasauris" /><category term="industry analysis" /><category term="Mitt Romney" /><category term="embed" /><category term="teaching" /><category term="powerpoint" /><category term="computer science" /><category term="research" /><category term="reduced models" /><category term="tutorial" /><category term="wikipedia" /><category term="economics" /><category term="www2011" /><category term="surveys" /><category term="csdm" /><category term="minimum wage" /><category term="intellectual property" /><category term="search" /><category term="businessweek" /><category term="microsoft" /><category term="Hillary Clinton" /><category term="online labor" /><category term="independence" /><category term="readability" /><category term="machine learning" /><category term="ROC" /><category term="publishers" /><category term="cognitive dissonance" /><category term="gmail" /><category term="dirichlet" /><category term="assembly line" /><category term="merger" /><category term="typesetting" /><title>A Computer Scientist in a Business School</title><subtitle type="html">Random thoughts of a computer scientist who is now working behind the enemy lines. And lately he turned into a double agent.</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://www.behind-the-enemy-lines.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>191</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/AComputerScientistInABusinessSchool" /><feedburner:info uri="acomputerscientistinabusinessschool" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><geo:lat>40.72596</geo:lat><geo:long>-73.998345</geo:long><link rel="license" type="text/html" href="http://creativecommons.org/licenses/by/3.0/" /><logo>http://creativecommons.org/images/public/somerights20.gif</logo><feedburner:emailServiceId>AComputerScientistInABusinessSchool</feedburner:emailServiceId><feedburner:feedburnerHostname>http://feedburner.google.com</feedburner:feedburnerHostname><entry gd:etag="W/&quot;D0YCSXY5fyp7ImA9WhRUEko.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1124533443098454258</id><published>2012-01-19T00:48:00.001-05:00</published><updated>2012-01-22T18:32:48.827-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-22T18:32:48.827-05:00</app:edited><title>Identify Verification (and how to bypass it)</title><content type="html">Most marketplaces, in order to function properly, require some form of identification of their users. It is a well-known problem that the ability of participants to generate easily new identities can lead to many problems.&lt;br /&gt;
&lt;br /&gt;
At the very basic level, if participants can easily create new identities, then reputation systems lose part of their power: If a user gets scores that are mediocre or bad, then it is often preferable to abandon the account with the bad scores and start again. Even more importantly, Sybil attacks, where one participant generates multiple accounts, can fool many systems that rely on peer evaluation, or assume that users are independent of each other.&lt;br /&gt;
&lt;br /&gt;
For example, in services such as Mechanical Turk, which rely on redundancy to ensure high-quality answers, many spammers create multiple accounts and try to attack simple tasks by entering the same answer in all questions. I also remember Luis von Ahn was describing an attack against reCAPTCHA, where &lt;a href="http://www.4chan.org/"&gt;4chan&lt;/a&gt; users attacked reCAPTCHA by trying to guess which of the two words was the known one, and entering "penis" as the other word :-)&lt;br /&gt;
&lt;br /&gt;
It is therefore not surprising the most marketplaces attempt to have some form of identification service. A form of identification that is considered strong is to ask for unique ID element from the registering users, e.g., the SSN of the participant, asking for place of birth, etc. Interestingly enough, it is trivially easy to bypass many such identity tests.&lt;br /&gt;
&lt;br /&gt;
Go and check the website &lt;a href="http://www.fakenamegenerator.com/"&gt;Fake Name Generator&lt;/a&gt;. You can specify the characteristics of the name that you want, and you get back an entry that you want. Someone with Japanese heritage living in the US? Sure thing, here is the entry for Mr. Souma Miura:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div style="margin-left: 1em; margin-right: 1em; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-f59XQcNx5jI/TxerPUi_00I/AAAAAAAAub4/7RH6vSdNMoM/s1600/fake-name-generator.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/-f59XQcNx5jI/TxerPUi_00I/AAAAAAAAub4/7RH6vSdNMoM/s400/fake-name-generator.PNG" width="379" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;ul&gt;
&lt;/ul&gt;
&lt;br /&gt;
You prefer something more exotic? May a person of Icelandic origin living in Cyrpus? No problem:
&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-VtgJouSNgYg/TxerwETSVDI/AAAAAAAAucE/vWIQIPfYKoU/s1600/fake-name-generator2.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="396" src="http://4.bp.blogspot.com/-VtgJouSNgYg/TxerwETSVDI/AAAAAAAAucE/vWIQIPfYKoU/s400/fake-name-generator2.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
Interestingly enough, I was able to fool quite a few websites that supposedly guarantee for the identity of their participants. All of them accepted without problems the fake identities, and in some cases even the credit card numbers (not for actual charges but the fake credit card numbers were accepted as legitimate credit cards to create a profile). For obvious reasons, I will not reveal the names of the victims :-)
&lt;br /&gt;
&lt;br /&gt;
So, how can a market secure better against identity attacks? Here a few examples that I encountered:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;On&amp;nbsp;&lt;a href="http://embeemobile.com/"&gt;Embee Mobile&lt;/a&gt;&amp;nbsp;the payment to the workers is free talk time for their cell phone. While it is definitely possible to change the SIM card and the phone number, this is definitely not a cheap generation of identities.&lt;/li&gt;
&lt;li&gt;On &lt;a href="http://www.odesk.com/"&gt;oDesk&lt;/a&gt;, as part of the identification, participants are asked to send scans of their driving license and of their bank statements, in order to unlock the ability to apply to large (more than 5) projects. While it is certainly possible to fake those, it is unclear what someone can do with the money collected to an account if the cash cannot be withdrawn to a bank.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
Perhaps in the future we will see the emergence of identification services for individuals. We already have such services for websites (e.g., Verisign). It is conceivable that someone will be able to guarantee for the identify of a person, but you can see already the Big Brother concerns that such a service will raise.&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1124533443098454258?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=bRe0xhbNe-E:-RafOqJkHYI:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=bRe0xhbNe-E:-RafOqJkHYI:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/bRe0xhbNe-E" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1124533443098454258/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/01/identify-verification-and-how-to-bypass.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1124533443098454258?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1124533443098454258?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/bRe0xhbNe-E/identify-verification-and-how-to-bypass.html" title="Identify Verification (and how to bypass it)" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-f59XQcNx5jI/TxerPUi_00I/AAAAAAAAub4/7RH6vSdNMoM/s72-c/fake-name-generator.PNG" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/01/identify-verification-and-how-to-bypass.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0ACQ3g-cSp7ImA9WhRVEUk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-8650467326217864970</id><published>2012-01-09T14:55:00.003-05:00</published><updated>2012-01-09T17:56:02.659-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-09T17:56:02.659-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="hcomp" /><title>HCOMP 2012: 4th Human Computation Workshop</title><content type="html">I am happy to announce that the Fourth&amp;nbsp;Human Computation Workshop (HCOMP 2012), will be organized this year together with the AAAI conference, on&amp;nbsp;July 22 or 23. The conference is in Toronto.&lt;br /&gt;
&lt;br /&gt;
The deadline for submitting a paper is March 30, 2012. You can submit either a "long" 6-page paper, or a "short" 2-page poster submission.&lt;br /&gt;
&lt;br /&gt;
You can see the &lt;a href="http://www.humancomputation.com/2012/Welcome.html"&gt;official web site&lt;/a&gt; or check the detailed&amp;nbsp;&lt;a href="http://www.humancomputation.com/2012/About_the_Workshop.html"&gt;Call for Papers&lt;/a&gt; at&amp;nbsp;&lt;a href="http://www.humancomputation.com/2012/"&gt;http://www.humancomputation.com/&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Last year, we had a big room for the workshop, which was jam-packed, with more than 100 people at the room at some point, and &lt;span id="goog_867069063"&gt;&lt;/span&gt;the program was full of excellent papers&lt;span id="goog_867069064"&gt;&lt;/span&gt;. So, if you have ideas about human computation, crowdsourcing, or on merging human and machine intelligence, you want to send a paper at HCOMP!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-8650467326217864970?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VNuaWTrx2vM:Pk_RLR-4eBs:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VNuaWTrx2vM:Pk_RLR-4eBs:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/VNuaWTrx2vM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/8650467326217864970/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/01/hcomp-2012-4th-human-computation.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8650467326217864970?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8650467326217864970?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/VNuaWTrx2vM/hcomp-2012-4th-human-computation.html" title="HCOMP 2012: 4th Human Computation Workshop" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/01/hcomp-2012-4th-human-computation.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck4FSH4-eyp7ImA9WhRSFEo.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6844707407349129307</id><published>2011-11-15T21:19:00.001-05:00</published><updated>2011-11-16T14:15:19.053-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-16T14:15:19.053-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="quality" /><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="pricing" /><category scheme="http://www.blogger.com/atom/ns#" term="reputation" /><title>Does lack of reputation help the crowdsourcing industry?</title><content type="html">&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;span style="color: #990000;"&gt;Can the &lt;i&gt;lack &lt;/i&gt;of a public reputation system on Amazon Mechanical Turk be the reason behind the &lt;i&gt;success &lt;/i&gt;of current crowdsourcing companies?&lt;/span&gt;&lt;/b&gt; I present an analysis that points to this direction. Unfortunately, this "feature" also leads to a stagnating crowdsourcing market with limited potential for growing.&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;b&gt;Low salaries and market for lemons&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
A contentious issue about crowdsourcing, and specifically about Amazon Mechanical Turk, is that wages are very low. It is not uncommon to see effective wages of \$1/hr, or even lower. Why is that?&lt;br /&gt;
&lt;br /&gt;
I have argued in the past that Mechanical Turk is an example of a "&lt;a href="http://www.behind-the-enemy-lines.com/2010/07/mechanical-turk-low-wages-and-market.html"&gt;market for lemons&lt;/a&gt;". Good workers are drowning in the anonymity of the crowd. Since the good workers cannot differentiate themselves from bad workers &lt;i&gt;before &lt;/i&gt;working on a task, they are doomed to receive the same level of compensation as the bad workers.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
This is not a fault of the employers: when a new employer joins the market, it is almost necessary for the employer to test the incoming workers to ensure the quality of the work. During this testing period, high-quality workers are completing the tasks side-by-side with low-quality workers, and everyone receives a low salary.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
The counter argument that I often hear is: "But the market, in the long run, should see an increase in salaries, as good workers demonstrate their quality to employers". Of course, &lt;a href="http://en.wikiquote.org/wiki/John_Maynard_Keynes"&gt;in the long run we are all dead&lt;/a&gt;. But even at the long run, and even after we are all dead, the market does not seem to be on a path to convergence to fair salaries.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Why? Here is the brief summary:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;High-quality workers are much more valuable than low-quality ones&lt;/li&gt;
&lt;li&gt;Lack of a shared reputation system depresses salaries pushing all salaries close to the level of low-quality workers&lt;/li&gt;
&lt;li&gt;Employers build their own, &lt;b&gt;&lt;i&gt;private &lt;/i&gt;&lt;/b&gt;reputation systems, learning the quality of the workers&lt;/li&gt;
&lt;li&gt;&lt;b&gt;&lt;i&gt;&lt;span style="color: #990000;"&gt;With the private quality information, employers can retain good workers by paying higher wages compared to the low-quality workers, but still lower than their "fair" quality-adjusted wage.&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;New employers cannot compete with incumbents since they do not have access to the privately built reputation systems and have to face the cost of learning the quality of the workers, while incumbents enjoy their advantage of already knowing who the good workers are&lt;/li&gt;
&lt;li&gt;Incumbents can enjoy a strong cost advantage, effectively blocking newcomers from entering the industry&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
Below I expand these arguments in a little bit more higher level of detail.&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Quality equivalence of low- and high-quality workers&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
First, let's examine the differences in payment between high- and low-quality workers. Let's take a very simple setting: Suppose that you have workers performing a task with two answers: Yes or no. The low quality are accurate $lq$% of the time. The high-quality workers are accurate $hq$% of the time. How many workers of low quality do we need to emulate one worker of high quality?&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Working in the simplest possible case, assume that we have we have $k$ low-quality workers, and each gives with probability $q$ the correct answer. We take the majority vote to be the the aggregate answer. What is the probability $P(q,k)$ that the the majority will be correct? We have that:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;div style="text-align: center;"&gt;
$P(q,k) = \sum_{i = \lceil \frac{k+1}{2} \rceil}^k \binom{k}{i} \cdot q^i \cdot(1-q)^{k-i}$&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="font-size: x-small;"&gt;(Assume, for the sake of simplicity that $k$ is odd. Otherwise, we need to add the term &lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="font-size: x-small;"&gt; $\frac{1}{2}\cdot \left( \lceil \frac{k+1}{2} \rceil - \lceil \frac{k}{2} \rceil \right) \cdot \binom{k}{k/2}\cdot q^{k/2}\cdot (1-q)^{k/2}$ in the above equation, to allocate ties appropriately)&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Given the above, we can find how many low-quality workers of quality $lq$ we need to emulate a single high-quality worker of quality $hq$: We just need to solve the equation:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;div style="text-align: center;"&gt;
$P(lq, k) = P(hq, 1)$&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Here are a few indicative pairs: To reach the 95% quality level we need:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;3 workers of quality 90%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;7 workers of quality 80%.
&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;9 workers of quality 75%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;15 workers of quality 70%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;67 workers of quality 60%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;269 workers of quality 55%.&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
If our goal is to reach the 99% quality level, we need:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;3 workers of quality 95%&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;5 workers of quality 90%&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;13 workers of quality 80%&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;31 workers of quality 70%&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
This means that &lt;b&gt;the fair wage of a single worker that is accurate at the 95% quality level should be ~9 times higher than the wage of the worker who is 75% accurate&lt;/b&gt;. A worker who is 99% accurate should demand 13x higher salary than someone who is 80% accurate. Notice that as the quality of the low-quality workers drops, the difference in fair wages between the high-quality and low-quality increases in a very fast rate.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Employers learning the quality of workers&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Suppose that we have an employer called PanosLabs that has worked for a long period of time with workers. At this point, 
PanosLabs has a long track record for many workers, and the quality estimates for each worker are pretty solid. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Now, this knowledge of worker quality allows PanosLabs to pay the good workers higher salaries. Let's assume that 
PanosLabs decided to be very "generous". For the high-quality 99%-accurate workers, PanosLabs &lt;b&gt;&lt;i&gt;quadruples &lt;/i&gt;&lt;/b&gt;the salary, compared to the general pool. Similarly, for workers that are 95%-accurate, PanosLabs &lt;b&gt;&lt;i&gt;triples &lt;/i&gt;&lt;/b&gt;the salary compared to the general pool. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Assuming that the general pool of workers is at the 80% accuracy level, PanosLabs gets the following bargain: It is now possible to cut costs significantly, while maintaining the same quality level. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Initially, PanosLabs was hiring 13 workers per case, paying each \$1/hr; this is an effective wage of $13/hr for reaching the 99% quality level. &lt;b&gt;Now, PanosLabs can have the 99% quality level by just employing a single 99% worker, for the cost of \$4/hr. This is a cost reduction of 70%! &lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Great bargain eh? This is the benefit of knowing thy worker...&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Increasing the barriers to entry&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Now let's assume that a new employer, called RotisLabs arrives at the market. The high-quality workers are now happily employed at PanosLabs, receiving a salary that is 4X the running market salary for their task.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
RotisLabs coming to the crowdsourcing market, is in a pickle. RotisLabs has no way of identifying and attracting the high quality workers without attracting the workers to work for RotisLabs first. Why?&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;There is no history of employment.&lt;/b&gt; In the "real world" knowing that an engineer worked at, say, Google gives some signal of quality. In our setting RotisLabs cannot check if a worker has worked for PanosLabs.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;It is not possible to check how much the workers get paid for other tasks&lt;/b&gt;. In the "real world" prices serve as signals. An employee that gets a high salary also signals to other employers that is a high performer. However, RotisLabs cannot check the prices that workers receive.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Check now the situation of RotisLabs: The competitor, PanosLabs, generates 99% accurate work at the cost of \$4/hr. What are the options of RotisLabs?&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;First option: RotisLabs can pay \$1/hr.&lt;/b&gt; This option attracts the following workers: The low-quality, 80%-accurate workers that did not get increases by PanosLabs, and, &lt;i&gt;if lucky&lt;/i&gt;, some new 99%-accurate workers that just arrived in the market. &lt;b&gt;&lt;i&gt;However, this pay rate does not attract the high-quality workers that stick with PanosLabs, severely limiting the pool of good workers accessible to RotisLabs&lt;/i&gt;&lt;/b&gt;. Notice that, at this pay level, RotisLabs has a cost of \$13/hr to reach the 99%-quality level, while competing with PanosLabs that has 70% lower cost of production, i.e., \$4/hr. If RotisLabs has enough cash &lt;i&gt;and &lt;/i&gt;patience, will stick to the market until learning the quality of workers. In most cases, though, RotisLabs will just realize that it is not possible to compete.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Second option: RotisLabs can pay \$4/hr. &lt;/b&gt;This option may attract the 99%-accurate workers that work for PanosLabs. But this will also attract the 80% workers! Our dear friend, RotisLabs, cannot separate the two. Therefore, to ensure the 99%-quality level, RotisLabs needs to still hire 13 workers per case, to account for the cases where many 80% workers work on an example. This increases the overall cost of production at \$52/hr. Ooops! PanosLabs can reach the same level of quality with a cost of just \$4/hr.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
You can see that knowing the quality of the workers can give a &lt;b&gt;tremendous &lt;/b&gt;benefit to the incumbent players that invest into learning the quality of the workers. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Interestingly enough, due to the depressed salaries that is a direct consequence of the lack of reputation systems, the established employer effectively passed the search costs to the employees: While learning the quality of the workers, the employer is paying salaries corresponding to the lowest expected level of quality. It is up to the workers to carry the burden of low salaries until proving themselves (again and again, for every single employer...)&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Lack of shared reputation system: The foundation of the crowdsourcing industry?&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
The lack of  a (shared) reputation system is a godsend for companies that enjoy a first movers advantage. They can keep their costs down, while keeping their own employers happy, (in a relative sense: &lt;i&gt;"cant you see how much better I am paying you compared to the general pool?"&lt;/i&gt;). &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
The anonymity generates the conditions for "market of lemons" salaries, which keep the costs down. At the same time, the smart and established employers can find and reach out to the high quality workers. By paying these workers "generously", the smart employers can lock-in the workers into "golden cages": offer salaries that are higher than those for the general population, but still much much lower than the level of the fair wages for the produced quality levels. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
When even these 4x or 5x (unrealistic and fictional) salary increases, mentioned in the example above, are great bargains, you can imagine the margins that crowdsourcing companies can command. &lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="color: #990000;"&gt;&lt;b&gt;In a very perverse manner, the anonymity imposed by Mechanical Turk is now effectively serving as the foundation of the current crowdsourcing industry. The anonymity keeps worker costs down, allowing most companies to offer solutions that are very cost competitive compared to alternatives. At the same time, this policy is hurting the Amazon MTurk marketplace by effectively generating huge barriers to entry for newcomer employers, and depressing the salaries of newcomer employees. &lt;/b&gt;&lt;i&gt;(The Masters qualification is a step in the right direction, but too crude to serve as an effective signalling mechanism.)&lt;/i&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;The future?&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Let's see who will manage to generate the appropriate market for crowdsourcing that will resolve these issues.  One thing is clear: the direction towards improving crowdsourcing markets requires salaries to increase significantly. Interestingly enough, this is expected to lower the overall cost of production as well, as the cost of  quality control will be significantly lower.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
As &lt;a href="http://www.slideshare.net/ipeirotis/crowdsourcing-lessons-from-henry-ford"&gt;I said in the crowdsourcing panel&lt;/a&gt; at the WWW2011 conference last Spring:&lt;/div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;It is not about the cost! &lt;/li&gt;
&lt;li style="text-align: justify;"&gt;It is not about the crowd! &lt;/li&gt;
&lt;li style="text-align: justify;"&gt;It is not about simple tasks! &lt;/li&gt;
&lt;li style="text-align: justify;"&gt;Crowdsourcing is best for “parallel, scalable, automatic interviews” and for finding quickly good workers&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span style="color: #990000;"&gt;Find the best trained workers, fast,  pay them well, and keep them!&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6844707407349129307?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=_bcOxins_zQ:wc_FtLgWHho:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=_bcOxins_zQ:wc_FtLgWHho:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/_bcOxins_zQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6844707407349129307/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/11/does-lack-of-reputation-help.html#comment-form" title="8 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6844707407349129307?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6844707407349129307?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/_bcOxins_zQ/does-lack-of-reputation-help.html" title="Does lack of reputation help the crowdsourcing industry?" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>8</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/11/does-lack-of-reputation-help.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkIGQ344cSp7ImA9WhRSFEs.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6875940019748112594</id><published>2011-11-10T22:18:00.001-05:00</published><updated>2011-11-16T12:28:42.039-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-16T12:28:42.039-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="businessweek" /><title>BusinessWeek...</title><content type="html">&lt;a href="http://www.businessweek.com/magazine/humans-plus-computers-equals-better-crowdsourcing-11102011.html"&gt;BusinessWeek on my research&lt;/a&gt;&amp;nbsp;:-)&lt;br /&gt;
&lt;br /&gt;
Special thanks to my collaborators that made this research possible: &lt;a href="http://people.stern.nyu.edu/fprovost/"&gt;Foster Provost&lt;/a&gt;, &lt;a href="http://pages.stern.nyu.edu/~jwang5/"&gt;Jing Wang&lt;/a&gt;, &lt;a href="http://www.linkedin.com/in/joshattenberg"&gt;Josh Attenberg&lt;/a&gt;, &lt;a href="http://uca.edu/computerscience/facultystaff/shengli-victor-sheng/"&gt;Shengli Sheng&lt;/a&gt;. Additional thanks go to&amp;nbsp;&lt;a href="http://adsafemedia.com/"&gt;AdSafe Media&lt;/a&gt; and, of course,&amp;nbsp;&lt;a href="http://www.tagasauris.com/"&gt;Tagasauris&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6875940019748112594?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=XtXhz6HDcEw:G0gW8LJhgM8:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=XtXhz6HDcEw:G0gW8LJhgM8:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/XtXhz6HDcEw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6875940019748112594/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/11/businessweek.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6875940019748112594?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6875940019748112594?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/XtXhz6HDcEw/businessweek.html" title="BusinessWeek..." /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>4</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/11/businessweek.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkYGRHg7eSp7ImA9WhdbE0s.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-913252986112122025</id><published>2011-10-11T16:15:00.001-04:00</published><updated>2011-10-11T16:15:25.601-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-11T16:15:25.601-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="conference" /><category scheme="http://www.blogger.com/atom/ns#" term="cfp" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="call for papers" /><category scheme="http://www.blogger.com/atom/ns#" term="aca" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><title>Collective Intelligence 2012: Deadline November 4, 2011</title><content type="html">&lt;br /&gt;
For all those of you interested in crowdsourcing, I would like to bring your attention to a new conference,&amp;nbsp;named &lt;a href="http://www.ci2012.org/"&gt;Collective Intelligence 2012&lt;/a&gt;,&amp;nbsp;being organized at MIT this spring (April 18-20, 2012) by &lt;a href="http://cci.mit.edu/malone/"&gt;Tom Malone&lt;/a&gt; and &lt;a href="http://www.cs.cmu.edu/~biglou/"&gt;Luis von Ahn&lt;/a&gt;. The conference is expected to have a set of 15-20 invited speakers (disclaimer: I am one of them), and also accepts papers submitted for publication. &lt;b&gt;The deadline is November 4th, 2011&lt;/b&gt;, so if you have something that you would be willing to share with a wide audience interested in collective intelligence, this may be a place to consider.&lt;br /&gt;
&lt;br /&gt;
The call for papers follows:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Overview&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Collective intelligence has existed at least as long as humans have, because families, armies, countries, and companies have all--at least sometimes--acted collectively in ways that seem intelligent. But in the last decade or so a new kind of collective intelligence has emerged: groups of people and computers, connected by the Internet, collectively doing intelligent things. For example, Google technology harvests knowledge generated by millions of people creating and linking web pages and then uses this knowledge to answer queries in ways that often seem amazingly intelligent. Or in Wikipedia, thousands of people around the world have collectively created a very large and high quality intellectual product with almost no centralized control, and almost all as volunteers!&lt;br /&gt;
&lt;br /&gt;
These early examples of Internet-enabled collective intelligence are not the end of the story but just the beginning. And in order to understand the possibilities and constraints of these new kinds of intelligence, we need a new interdisciplinary field. Forming such a field is one of the goals of this conference.&lt;br /&gt;
&lt;br /&gt;
We seek papers about behavior that is both collective and intelligent. &amp;nbsp;By collective, we mean groups of individual actors, including, for example, people,&lt;br /&gt;
computational agents, and organizations. &amp;nbsp;By intelligent, we mean that the collective behavior of the group exhibits characteristics such as, for example,&lt;br /&gt;
perception, learning, judgment, or problem solving.&lt;br /&gt;
&lt;br /&gt;
Topics of interest include but are not limited to:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;human computation&lt;/li&gt;
&lt;li&gt;social computing&lt;/li&gt;
&lt;li&gt;crowdsourcing&lt;/li&gt;
&lt;li&gt;wisdom of crowds (e.g., prediction markets)&lt;/li&gt;
&lt;li&gt;group memory and problem-solving&lt;/li&gt;
&lt;li&gt;deliberative democracy&lt;/li&gt;
&lt;li&gt;animal collective behavior&lt;/li&gt;
&lt;li&gt;organizational design&lt;/li&gt;
&lt;li&gt;public policy design (e.g., regulatory reform)&lt;/li&gt;
&lt;li&gt;ethics of collective intelligence (e.g., "digital sweatshops")&amp;nbsp;&lt;/li&gt;
&lt;li&gt;computational models of group search and optimization&lt;/li&gt;
&lt;li&gt;emergence and evolution of intelligence&lt;/li&gt;
&lt;li&gt;new technologies for making groups smarter&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
For a more complete description of the scope, please click here. For any questions, please email contact@ci2012.org.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Dates and Location&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The conference will be held April 18-20, 2012 on the MIT campus in Cambridge, MA. &amp;nbsp;Accommodations in nearby hotels will be available for conference attendees.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Format&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The conference will consist of:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;invited talks from prominent researchers in different areas related to collective intelligence&lt;/li&gt;
&lt;li&gt;oral paper presentations&lt;/li&gt;
&lt;li&gt;poster sessions&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Submission&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Papers of three types are invited:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Reports of original research results&lt;/li&gt;
&lt;li&gt;Reviews of previous research in one or more fields relevant to collective intelligence&lt;/li&gt;
&lt;li&gt;Position papers about research agendas for the field of collective intelligence&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
Some of the papers submitted will be invited for oral presentation, others for presentation as posters.&lt;br /&gt;
&lt;br /&gt;
Papers may be up to 8 pages in length. The deadline for submission is November 4, 2011. Download the submission format. Papers shall be submitted by email to submissions@ci2012.org.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Important Dates&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Paper submission deadline: November 4, 2011&lt;/li&gt;
&lt;li&gt;Notification of paper acceptance / rejection: January 15, 2012&lt;/li&gt;
&lt;li&gt;Camera-ready papers due: February 15, 2012&lt;/li&gt;
&lt;li&gt;Conference dates: April 18-20, 2012&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-913252986112122025?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=a9HJ1Qphe08:mh8B09brzog:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=a9HJ1Qphe08:mh8B09brzog:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/a9HJ1Qphe08" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/913252986112122025/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/10/collective-intelligence-2012-deadline.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/913252986112122025?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/913252986112122025?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/a9HJ1Qphe08/collective-intelligence-2012-deadline.html" title="Collective Intelligence 2012: Deadline November 4, 2011" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/10/collective-intelligence-2012-deadline.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUICRXk8fyp7ImA9WhdVFEU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3153650136136268757</id><published>2011-09-03T22:04:00.000-04:00</published><updated>2011-09-19T21:52:44.777-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-19T21:52:44.777-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="pricing" /><category scheme="http://www.blogger.com/atom/ns#" term="probability" /><title>Probabilities and MTurk Executives: A Troubled Story</title><content type="html">On the Mechanical Turk blog, there is a blog post that &lt;a href="http://mechanicalturk.typepad.com/blog/2011/09/cooking-.html"&gt;describes the need to build custom qualifications for workers&lt;/a&gt;. (&lt;b&gt;Update&lt;/b&gt;: the old link was removed after I posted this analysis, and the new version does not contain any of the problematic math analysis.)&amp;nbsp;While the argument is correct, it is backed by some horrendous math analysis. For your viewing pleasure, here is the quote:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;
This difference in accuracy is magnified if you’re using plurality.  Supposed you use plurality of 2 (asking 2 Workers the same question).  With Masters, if 2 Workers with average accuracy (99%) agree on an answer there is a 98% probability that it’s the correct answer.  With the broader group, if 2 Workers with average accuracy (90%) agree on an answer there is an 81% probability that it’s the correct answer.   And if you happen to get 2 68% accurate Workers submitting assignments for the same HIT, the probability the answer is accurate is only 46%!&lt;/blockquote&gt;
&lt;br /&gt;
Dear Sharon: &lt;br /&gt;
&lt;br /&gt;
We do appreciate your efforts on improving MTurk and on giving correct advice.&lt;br /&gt;
&lt;br /&gt;
But this analysis that attempts to back a correct argument, is absolutely wrong. It is so wrong that it hurts. Just think at a very intuitive level: how is it possible to ask two workers of certain accuracy, see that they agree, and expect the accuracy of the &lt;b&gt;&lt;i&gt;corroborated &lt;/i&gt;&lt;/b&gt;answer to be lower? This is simply not possible!&lt;br /&gt;
&lt;br /&gt;
Here is the correct analysis:&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;br /&gt;
Supposed you use plurality of 2 (asking 2 Workers the same question).  With Masters, if 2 Workers with average accuracy (99%) agree on an answer then the probability that this answer is incorrect is &lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{incorrect}|\mathit{agreement}) =&amp;nbsp;\frac{Pr(\mathit{worker1\ incorrect\ and\ worker2\ incorrect})}{Pr(\mathit{agreement})}$.&lt;br /&gt;
&lt;br /&gt;
Assuming (conditional) independence of the workers:&lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{incorrect}|\mathit{agreement}) = $&lt;br /&gt;
&lt;br /&gt;
$\frac{Pr(\mathit{worker1\ incorrect}) \cdot  Pr(\mathit{worker2\ incorrect})}{Pr(\mathit{agreement})}=\frac{(1-p)^2}{p^2+(1-p)^2}$.&lt;br /&gt;
&lt;br /&gt;
where $p$ is the probability of a worker being correct.&lt;br /&gt;
&lt;br /&gt;
With 99% accuracy, the probability of a worker being correct is $p=0.99$. So:&lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{incorrect}|\mathit{agreement}) = \frac{0.01 \cdot 0.01}{0.01 \cdot 0.01 +&amp;nbsp;0.99 \cdot 0.99}$&lt;br /&gt;
&lt;br /&gt;
$\Rightarrow Pr(\mathit{incorrect}|\mathit{agreement}) = 0.000101$.&lt;br /&gt;
&lt;br /&gt;
Since $Pr(\mathit{correct}|\mathit{agreement}) = 1-Pr(\mathit{incorrect}|\mathit{agreement})$, therefore, with Masters, if 2 Workers with average accuracy (99%) agree on an answer, there is a $1-0.000101 \approx 99.99\%$ probability that it’s the correct answer.&lt;br /&gt;
&lt;br /&gt;
With the broader group, if 2 Workers with average accuracy (90%) agree on an answer there is an $1-\frac{ 0.1 \cdot 0.1}{0.1 \cdot 0.1 +&amp;nbsp;0.9 \cdot 0.9} \approx&amp;nbsp;&amp;nbsp;98.78\%$ probability that it’s the correct answer. And if you happen to get two 68%-accurate workers submitting assignments for the same HIT (and they both agree), the probability the answer is accurate is only $1-\frac{ 0.32 \cdot 0.32}{0.32 \cdot 0.32 +&amp;nbsp;0.68 \cdot 0.68} \approx&amp;nbsp;&amp;nbsp;81.87\%$!&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;br /&gt;
How Sharon got confused? The analysis that she presents calculates not the accuracy of the answer when the workers agree, but instead it calculates &lt;b&gt;&lt;i&gt;how often the two workers will agree &lt;span class="Apple-style-span" style="color: #990000;"&gt;and &lt;/span&gt;agree on the correct answer&lt;/i&gt;&lt;/b&gt;. Indeed, with workers that have 68% accuracy, we will observe agreement on the correct answer only 48% of the time. (And, 10% of the time, they will agree on the incorrect answer.) More importantly, though, 42% of the time, the two workers will disagree, and we will need to bring an extra worker, increasing the cost by 50%.&lt;br /&gt;
&lt;br /&gt;
Why Sharon got confused? One explanation is that she is victim of the&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Conjunction_fallacy"&gt;conjunction&amp;nbsp;fallacy&lt;/a&gt;&amp;nbsp;or that she does not understand conditional probabilities.&amp;nbsp;However, I believe it is not that. I bet that she did not get puzzled by the results because the presented math confirmed another (correct) intuition that she had about the market:&amp;nbsp;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;redundancy when relying on low-quality workers is not cost-effective&lt;/span&gt;&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
Consider this: if you have 3 workers of 68% accuracy, the combination of the three (e.g., using majority vote) will result in an &lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;average &lt;/span&gt;&lt;/i&gt;&lt;/b&gt;accuracy of only 75%. In other words only 3 out of 4 times the majority will generate the correct answer. To reach 90% accuracy, we need 11 workers with 68% accuracy each. And&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; to reach 99% accuracy, we need 39 workers of 68% accuracy&lt;/span&gt;&lt;/b&gt;! (I will present the math in a later blog post.)&lt;br /&gt;
&lt;br /&gt;
Even using "moderately&amp;nbsp;high quality" workers, simulating a worker that is 99% accurate tends to be an expensive proposition. &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;We need five workers that are 90% accurate to get 99% accuracy&lt;/span&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, yes, the high-quality Masters workers are worth their extra price. In fact, they are worth their weight in gold. Paying only 20% more to access a guaranteed pool of high-quality "Masters" workers is a &lt;b&gt;great&lt;/b&gt; bargain, given the quality differences with the general worker pool.&lt;br /&gt;
&lt;br /&gt;
Actually, if I were a 99% accurate worker I would feel offended that I do not get at least double or triple the running wage for the common workers. There is a great mispricing of the services provided by high-quality workers, and most requesters today exploit just this fact to keep the wages down, while still managing to get high quality results from the tested, reliable workers.&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3153650136136268757?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Jw6Hm7ELKGE:YHCdsH08NQw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Jw6Hm7ELKGE:YHCdsH08NQw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/Jw6Hm7ELKGE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3153650136136268757/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/09/probabilities-and-mturk-executives.html#comment-form" title="6 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3153650136136268757?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3153650136136268757?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/Jw6Hm7ELKGE/probabilities-and-mturk-executives.html" title="Probabilities and MTurk Executives: A Troubled Story" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>6</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/09/probabilities-and-mturk-executives.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkYMSHw4eSp7ImA9WhdXFUk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1453769556608044343</id><published>2011-08-28T11:09:00.001-04:00</published><updated>2011-08-28T11:09:49.231-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-28T11:09:49.231-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="reviews" /><category scheme="http://www.blogger.com/atom/ns#" term="surveys" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><title>The impact of online reviews: An annotated bibliography</title><content type="html">A few weeks back, I received some questions about online consumer reviews, their impact on sales, and other related questions. At that point, I realized that while I had a good grasp of the technical literature within Computer Science venues, my grasp of the overall empirical literature within Marketing and Information Systems venues was rather shaky, so I had to do a better work in preparing a literature review.&lt;br /&gt;
&lt;br /&gt;
So, I did whatever a self-respecting professor would do in such a situation: I asked my PhD student, &lt;a href="http://pages.stern.nyu.edu/~bli/"&gt;Beibei Li&lt;/a&gt;, to compile a list of such papers, write a brief summary of each, and send me the list. She had passed her qualification exam by studying exactly this area, so she was the resident expert in the topic.&lt;br /&gt;
&lt;br /&gt;
Beibei did not disappoint me. A few hours later I had a very good list of papers in my mailbox, together with the description. It was so good, that I thought that many other people would be interested in the list. &lt;br /&gt;
&lt;br /&gt;
So, without further ado, I present you Beibei's annotated bibliography about online reviews and their business impact.&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;User behavior and online reviews&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Nan Hu, Paul Pavlou and Jie Zhang, in their paper "&lt;a href="http://dx.doi.org/10.1145/1562764.1562800"&gt;Overcoming the J-shaped distribution of product reviews&lt;/a&gt;" have shown that the graphical representation of product reviews has a J-shaped distribution: mostly 5-star ratings, some 1-star ratings, and hardly any ratings in between. What can explain this distribution? They attribute this rating distribution into two biases:&lt;/li&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;Purchasing bias&lt;/b&gt;: People that buy a product do not constitute a random sample of the population. People buy products that they believe they will enjoy. So, the reviews are written by people that are more likely to like the product. Since only people with higher product valuations purchase a product, those with lower valuations are less likely to purchase the product, and they will not write a (negative) product review. Purchasing bias causes the positive skewness in the distribution of product reviews and inflates the average.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Underreporting bias&lt;/b&gt;: Among people who purchased a product, those with extreme ratings (5-star or 1-star) are more likely to express their views to “brag or moan” than those with moderate views.&lt;/li&gt;
&lt;/ul&gt;&lt;li&gt;Xinxin Li and Lorin Hitt, in their 2008 paper "&lt;a href="http://ericchaing.org/files/Li_2008_ISR.pdf"&gt;Self-Selection and Information Role of Online Product Reviews&lt;/a&gt;" have found that online reviews may be subject to a &lt;b&gt;self-selection bias&lt;/b&gt;: products are not randomly assigned to reviewers. Rather, early buyers (buyers who also post the first reviews) self-select product that they believe they may enjoy, in the absence of any existing information. This is in contrast to other buyers that wait for more signals about the quality of a product to emerge, before being convinced to buy, and therefore have a lower prior expectation about the product quality. As a consequence, the preferences of early buyers systematically differ from the broader consumer population, the early reviews can be biased, either in a positive or negative way. Such bias in reviews will affect sales and reduce consumer surplus, even if all reviews are truthful.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Wendy W. Moe and Michael Trusov in their paper "&lt;a href="http://www.rhsmith.umd.edu/faculty/wmoe/moe_trusov.pdf"&gt;Measuring the Value of Social Dynamics in Online Product Ratings Forums&lt;/a&gt;", looked into how social influences affect the subsequent ratings and sales. They demonstrated that reviewer rating behavior is significantly affected by previous ratings. In other words, product reviews not only reflect the customers' experience with the product, but they also affect the ratings of later reviews as well.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Chrysanthos Dellarocas, Guodong (Gordon) Gao, and Ritu Narayan in their paper "&lt;a href="http://www.dellarocas.com/images/papers/jmis2010.pdf"&gt;Are Consumers More Likely to Contribute Online Reviews for Hit or Niche Products?&lt;/a&gt;" show that consumers tend to prefer posting reviews for obscure movies but also for hit movies that have already a large number of online reviews. The recommendation of the authors to owners of review websites is that volume of previously posted reviews should become less prevalent in order to encourage posting of reviews for lesser-known products.&lt;/li&gt;
&lt;/ul&gt;&lt;b&gt;Online product reviews and product sales&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Judy Chevalier and Dina Mayzlin, in their 2006 paper "&lt;a href="http://www.journals.marketingpower.com/doi/abs/10.1509/jmkr.43.3.345?journalCode=jmkr"&gt;The Effect of Word of Mouth on Sales: Online Book Reviews&lt;/a&gt;" have first demonstrated that online ratings have significant impact on book sales. The key trick was to monitor the sales of the same book in parallel on Amazon.com and on Barnes &amp;amp; Noble. Since the two sites were selling the same book, any external effect would be similar to both websites. However, reviews posted on Amazon or on BN.com would influence sales only on the respective websites. Through this "differences in differences" method, Chevalier and Mayzlin could isolate and measure the effect of product reviews, without worrying about other confounding factors.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Yong Liu, in the 2006 paper "&lt;a href="http://www.journals.marketingpower.com/doi/abs/10.1509/jmkg.70.3.74?journalCode=jmkg"&gt;Word of Mouth for Movies: Its Dynamics and Impact on Box Office Revenue&lt;/a&gt;" have looked at the same topic, but focused on the movie box office. Different from Chevalier and Mayzlin, his finding suggested that the valence of reviews does not matter for box office sales, however the review volume does.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Pradeep K. Chintagunta, Shyam Gopinath and Sriram Venkataraman, in their 2010 paper "&lt;a href="http://mktsci.journal.informs.org/content/early/2010/05/27/mksc.1100.0572.abstract"&gt;The Effects of Online User Reviews on Movie Box Office Performance: Accounting for Sequential Rollout and Aggregation Across Local Markets&lt;/a&gt;" have further studied the impact (valence, volume, and variance) of online reviews by looking at the local geographic movie box office, rather than the national-level aggregate box office performance. After accounting for various potential complications in the analysis, they suggested that it is the valence that seems to matter and not the volume.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Jonah Berger, Alan T. Sorensen and Scott J. Rasmussen, in their 2010 paper "&lt;a href="http://marketing.wharton.upenn.edu/documents/research/Negative_Publicity.pdf"&gt;Positive Effects of Negative Publicity: When Negative Reviews Increase Sales&lt;/a&gt;" found that negative reviews can boost sales for unknown books, but hurt sales for books with established authors. This happens because negative reviews bring visibility to unknown books. Whereas for authors who are already well known, publicity does not boost the awareness of their books, instead, the valence of the publicity becomes more important.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Chris Forman, Anindya Ghose and Batia Wiesenfeld, in their 2008 paper "&lt;a href="http://isr.journal.informs.org/content/19/3/291.abstract"&gt;Examining the Relationship Between Reviews and Sales: The Role of Reviewer Identity Disclosure in Electronic Markets&lt;/a&gt;" have looked at the role of reviewer identity disclosure (e.g., real name and location of the reviewer) in examining the relationship between Amazon book reviews and sales. They found that the prevalence of reviewer disclosure of identity information is associated with increases in helpfulness rating of the review and the subsequent online product sales. This is because community members more positively assess reviewers who disclose identity-descriptive information, and then use their assessment of reviewers as a heuristic shaping their evaluation of the product reviewed.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Nikolay Archak, Anindya Ghose and Panagiotis G. Ipeirotis (yours truly), in the 2011 paper "&lt;a href="http://mansci.journal.informs.org/content/57/8/1485.short"&gt;Deriving the Pricing Power of Product Features by Mining Consumer Reviews&lt;/a&gt;", examine the idea that the textual content of the product reviews is an important determinant of consumers' choices, over and above the valence and volume of reviews. Using text mining tools, they incorporated review text by decomposing textual reviews into segments describing different product features. This work demonstrates how textual data can be used to learn consumers' relative preferences for different product features and also how text can be used for predictive modeling of future changes in sales.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Anindya Ghose and Panagiotis G. Ipeirotis (yours truly, again), in the 2011 paper "&lt;a href="http://pages.stern.nyu.edu/~panos/publications/tkde2010-usefulness.pdf"&gt;Estimating the Helpfulness and Economic Impact of Product Reviews: Mining Text and Reviewer Characteristics&lt;/a&gt;", explored online review's impact on helpfulness and product sales, using multiple aspects of review text, such as subjectivity levels, various measures of readability and extent of spelling errors. The analysis has revealed that the extent of subjectivity, informativeness, readability, and linguistic correctness in reviews matters in influencing sales and perceived usefulness. See also the &lt;a href="http://behind-the-enemy-lines.blogspot.com/2010/01/did-you-find-this-helpful.html"&gt;related blog post&lt;/a&gt; that I wrote in January 2010 (yes, even after acceptance, it took 1.5 years for the paper to appear in print).&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Yubo Chen, Qi Wang and Jinhong Xie, in their paper "&lt;a href="http://www.journals.marketingpower.com/doi/abs/10.1509/jmkr.48.2.238?journalCode=jmkr"&gt;Online Social Interactions: A Natural Experiment on Word of Mouth Versus Observational Learning&lt;/a&gt;" studied how word-of-mouth (WOM, i.e., others’ opinions) differs from observational learning (i.e., others’ purchase actions) in influencing sales. They have found that :&lt;/li&gt;
&lt;ul&gt;&lt;li&gt;negative WOM is more influential than positive WOM;&lt;/li&gt;
&lt;li&gt;positive observational learning information significantly increases sales but negative one has no effect (e.g., reporting purchase statistics help popular products, without hurting niche ones);&lt;/li&gt;
&lt;li&gt;the sales impact of observational learning increases with WOM volume&lt;/li&gt;
&lt;/ul&gt;&lt;li&gt;Michael Luca, in his "job market paper" "&lt;a href="http://people.bu.edu/mluca/JMP.pdf"&gt;Reviews, Reputation, and Revenue: The Case of Yelp.com&lt;/a&gt;" used a nice trick for estimating the causal effect of consumer reviews from Yelp.com on restaurant demand. Using revenue data from the state of Washington, he examined what is the effect of having an extra "half star" in Yelp. The key trick is to exploit the discontinuity in the way that Yelp assigns aggregate scores: A restaurant with 3.76 average review rating gets a 4-star review, while a restaurant with 3.74 average review rating gets a 3.5-star review. So, if there is a big gap in the revenues between restaurants with scores of 3.76 and 3.74, then this revenue gap (which actually exists) can be attributed to Yelp, and to its summary rating. (&lt;a href="http://afinetheorem.wordpress.com/2011/06/30/reviews-reputation-and-revenue-the-case-of-yelp-com-m-luca-2010/"&gt;This blog posts presents further analysis of the paper&lt;/a&gt;, and also mentions similar use of this discontinuity trick to study the effect of sanitary scores in NYC: a restaurant may get an "A" score with $x$ penalty points, and another get a "B" with $x+1$ penalty points). Luca found discontinuous jumps in restaurant sales that follow the discontinuous jumps in the ratings around the rounding thresholds. This finding strongly suggested that changes in ratings (e.g., from just below a rounding threshold to just above a rounding threshold) can have significant causal impact on restaurant demand.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&amp;nbsp;&lt;b&gt;Online word of mouth and firms&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Michael Trusov, Randolph E. Bucklin, and Koen Pauwels in their 2009 paper "&lt;a href="http://bear.warrington.ufl.edu/weitz/mar7786/Articles/Trusov%20et%20al%202009%20social%20network.pdf"&gt;Effects of Word-of-Mouth Versus Traditional Marketing: Findings from an Internet Social Networking Site&lt;/a&gt;" compared the effects of word-of-mouth marketing versus traditional marketing, as judged from the member growth at an Internet social networking site. They found that WOM referrals (i.e., invitations) not only produce a substantially higher short-term response, but also have substantially longer carryover effects in the long run than traditional marketing actions (e.g., promotion events, media appearances).&lt;/li&gt;
&lt;li&gt;&amp;nbsp;David Godes and Dina Mayzlin, in their 2009 paper "&lt;a href="http://dx.doi.org/10.1287/mksc.1080.0444"&gt;Firm-Created Word-of-Mouth Communication: Evidence from a Field Test&lt;/a&gt;" examined how a firm should try to create useful word-of-mouth. They looked at who creates WOM and what kind WOM and matters. They found that for a product with a low initial awareness level, WOM that is most effective at driving sales is created by less loyal (not highly loyal) customers and occurs between acquaintances (not friends). They also found that although "opinion leadership" is useful in identifying potentially effective spreaders of WOM among very loyal customers, it is less useful for the sample of less loyal customers.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Jackie Y. Luan and Scott Neslin, in their paper "&lt;a href="http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1462336"&gt;The Development and Impact of Consumer Word of Mouth in New Product Diffusion&lt;/a&gt;" focused on how word-of-mouth (WOM) influences new product adoption in the video game market. Specifically, they were able to measure how effectively firms' marketing efforts generate WOM (buzz) and to determine whether WOM influences product adoption primarily through an informative role (i.e., helping the consumer learn product quality) or a persuasive role (i.e., exerting a direct impact on sales, for example, by increasing awareness).&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
If you have any other papers that you think that should be included in the list, please add your recommendation in the comments, together with a brief description of the conceptual and methodological contribution of the paper.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1453769556608044343?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=ml840wQUkDk:-iSnrldUYzs:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=ml840wQUkDk:-iSnrldUYzs:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/ml840wQUkDk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1453769556608044343/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/08/impact-of-online-reviews-annotated.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1453769556608044343?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1453769556608044343?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/ml840wQUkDk/impact-of-online-reviews-annotated.html" title="The impact of online reviews: An annotated bibliography" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/08/impact-of-online-reviews-annotated.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0YDRHs9eCp7ImA9WhdSFkk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-7314073683116632672</id><published>2011-07-25T18:28:00.002-04:00</published><updated>2011-07-25T22:59:35.560-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-25T22:59:35.560-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="reputation" /><title>Native vs Grapevine Reputation on MTurk</title><content type="html">The Mechanical Turk blog has a new entry today, by Sharon (Chiarella), titled &lt;a href="http://mechanicalturk.typepad.com/blog/2011/07/cooking-with-sharon-tip-3-manage-your-reputation.html"&gt;"Cooking with Sharon" &amp;amp; Tip #3 Manage Your Reputation&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
In the article, Sharon encourages requesters to do the following:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;i&gt;&lt;b&gt;Pay well&lt;/b&gt; - Don’t be fooled into underpaying Workers by comparing your HITs to low priced HITs that aren’t being completed.&lt;/i&gt;&lt;/li&gt;
&lt;li&gt;&lt;i&gt;&lt;b&gt;Pay fairly&lt;/b&gt; – Don’t reject an Assignment unless you’re SURE it’s the Worker who is wrong.&lt;/i&gt;&lt;/li&gt;
&lt;li&gt;&lt;i&gt;&lt;b&gt;Pay quickly&lt;/b&gt; – If you approve or reject Assignments once a week, Workers may do a few HITs and then wait to see if they are paid before doing more.  This is especially true if you’re a new Requester and haven’t established your reputation yet.&lt;/i&gt;&lt;/li&gt;
&lt;/ul&gt;Sharon then explains that workers do talk with each other in the forums, on Turkopticon, and so on, and collectively establish the reputation of the requester based on these factors.&amp;nbsp;While there is nothing wrong with this "grapevine"-based reputation, it also illustrates some obvious features that the Mechanical Turk platform is missing.&lt;br /&gt;
&lt;br /&gt;
Instead of outsourcing the task to third-party forums, Amazon should provide features that make the reputation of the requester more transparent, visible, and objective.&lt;br /&gt;
&lt;br /&gt;
For example, each requester could have a profile, in which the workers can see:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;The total number of HITs, and rewards posted by the requester&lt;/li&gt;
&lt;li&gt;The rejection rate for the requester&lt;/li&gt;
&lt;li&gt;The distribution of working time for the HITs of the requester&lt;/li&gt;
&lt;li&gt;The effective hourly wage for the tasks completed for the requester&lt;/li&gt;
&lt;li&gt;The payment lag from completion of the task until payment&lt;/li&gt;
&lt;/ul&gt;These are all elements that workers would find useful. They are statistics that contribute to the transparency of the market, and their objective nature&amp;nbsp;makes the establishment of reputation much faster. Such objective characteristics are complementing the more subjective features used in the the grapevine-based reputation systems (Turker Nation, Turkopticon, etc), where only a subset of workers contribute and measure personal perceptions (e.g., was this task "well-paid" or not?). Of course, subjective reputation systems will continue to play their role, providing information that cannot be easily quantified. But they should not be the only reputation signal for the market.&lt;br /&gt;
&lt;br /&gt;
Could there be side-effects if such a system is deployed? Yes. I can see some cases where this profile can introduce strange incentives in the market. (For example, it may be good to have a few of my tasks spammed and still pay immediately for the results, so that I can have high acceptance rate, HITs that require only a little bit of time to be completed, and show a high hourly wage.) But these are just details that can be addressed. There is no way that overall the market could suffer when such statistics become publicly available. &lt;span class="Apple-style-span" style="font-size: x-small;"&gt;(Sorry Mr \$0.23/hr-requester, you are not &lt;i&gt;that &lt;/i&gt;valuable.)&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
Markets operate based on trust and are better with increased information efficiency. Any step towards this direction is a good step for the market participants and, by extension, for the market owner.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-7314073683116632672?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=IHN26gtHr3E:7Z9-uy4R95A:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=IHN26gtHr3E:7Z9-uy4R95A:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/IHN26gtHr3E" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/7314073683116632672/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/07/native-vs-grapevine-requester.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/7314073683116632672?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/7314073683116632672?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/IHN26gtHr3E/native-vs-grapevine-requester.html" title="Native vs Grapevine Reputation on MTurk" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/07/native-vs-grapevine-requester.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8MRHs5eCp7ImA9WhdSFUw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3925690082343510518</id><published>2011-07-22T18:57:00.002-04:00</published><updated>2011-07-24T09:08:05.520-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-24T09:08:05.520-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="evaluation" /><category scheme="http://www.blogger.com/atom/ns#" term="education" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="teaching" /><category scheme="http://www.blogger.com/atom/ns#" term="cheating" /><category scheme="http://www.blogger.com/atom/ns#" term="incentives" /><title>A tale about parking</title><content type="html">The media attention to my &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/07/why-i-will-never-pursue-cheating-again.html"&gt;prior blog post &lt;/a&gt;was really not something that I enjoyed. Not so much for the attention itself but for focusing on exactly the wrong issues. That post was NOT about me and my evaluation. This is not the main point. I thought that the salary issue was worth mentioning (apparently, it was not) but it was, indeed, a MINOR part of the issue. &lt;br /&gt;
&lt;br /&gt;
In fact, after reflecting on this point, I realized the following: &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;Even if I had received a $1M bonus from NYU for my efforts, the basic problem would still be there: the teaching experience would degenerate into a witch hunt, focusing on cheating, instead of being about learning&lt;/span&gt;.&lt;/b&gt; And yes, I would still write the same blog post even if I were fully satisfied with my annual evaluation. &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In fact, the blog post was in my folder of draft posts for a few months now, long before receiving my annual evaluation.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
If you want a a parallel, consider this hypothetical story:&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;A tale about parking&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Suppose that you live in a city with a huge traffic problem, and a resulting huge parking problem. Too many cars on the street.&lt;br /&gt;
&lt;br /&gt;
People try to find parking and they drive around, drive around. A lot. Some drivers get frustrated and they double park. Some drivers are stupid enough to double park during rush hour, block the traffic, and leave the car unattended. As expected, the police arrives and assigns a ticket to the offender, sometimes taking the car as well. However, during quiet hours, when there is no traffic many drivers double park, but they do not block the traffic, and nobody gives them a ticket.&lt;br /&gt;
&lt;br /&gt;
Suddenly, in one neighborhood only, call it Redwich Village, a lone policeman starts assigning tickets for every parking violation. No matter if it is minor or major. No matter if the driver just stepped out, or if it is the first time that the driver double parked. Zero-tolerance policy.&lt;br /&gt;
&lt;br /&gt;
By doing that, and being more vigilant, our lone policeman assigns 10 times more tickets that before. By doing that, he also lost countless hours fighting with the offenders. This continuous fight, also annoys some other residents of the neighborhood that want the policeman to focus on policing the neighborhood, and not spend all his time giving parking tickets.&lt;br /&gt;
&lt;br /&gt;
But even our lone policeman gets frustrated: he realizes that he did not become a policeman to give parking tickets. While it is part of his duties, he feels that it is just better not to be so aggressive. His boss also gets a report that many neighborhood residents are annoyed. His boss knows that the complaints are due to the zero-tolerance policy on parking tickets. So he says that he would like our lone policeman to both continue this&amp;nbsp;idiosyncratic&amp;nbsp;zero-tolerance policy enforced just by our lone policeman, and be as diligent with his other duties as before.&lt;br /&gt;
&lt;br /&gt;
Our lone policeman goes on and reflects on the overall experience. He realizes that he is fighting a losing battle. As the number of cars increase in the city, there will be more people parking illegally.&lt;br /&gt;
&lt;br /&gt;
So, our lone policeman suggests that we need to do something more fundamental about the parking problem: He suggests that people could carpool, use bicycles, mass transit, or simply walk. And he asks for people to think of more such alternatives. If there are less cars in the city, the problem will be resolved.&lt;br /&gt;
&lt;br /&gt;
He describes all his thoughts in his blog, in a long post, titled "Why I will never give parking tickets again." He describes the futility of parking tickets to fight the underlying problem, and vows never to be so vigilant about parking tickets. He will be as vigilant as all the other policemen, which is as vigilant as he was before. &lt;br /&gt;
&lt;br /&gt;
His blog post goes viral. Media pick up fragments, everyone reads whatever they want to read. Some headlines:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;"Parking tickets in Redwich Village increase by 1000%. Is it impossible to park your car in Redwich?"&lt;/li&gt;
&lt;li&gt;"Parking-related violations skyrocket in&amp;nbsp;Redwich&amp;nbsp;Village. Policeman punished for enforcing the rules."&lt;/li&gt;
&lt;li&gt;"RedWich Village sucks. Only scumbags live in RedWich Village, what did you expect? Any lawful behavior?"&lt;/li&gt;
&lt;li&gt;"Stupid city residents: We know that all people that live in cities are cheaters and park illegally"&lt;/li&gt;
&lt;li&gt;"Why the government does not reward this honest policeman?"&lt;/li&gt;
&lt;li&gt;"Why this policeman is vowing not to obey the law? Oh the society..."&lt;/li&gt;
&lt;/ul&gt;Now, some of the business owners of Redwich Village are annoyed because people may not drive to Redwich, if they think it is impossible to find parking. Some residents are also annoyed because real estate prices may go down if people believe that Redwich is a place where you cannot park your car. After all, it is all a matter of reputation.&lt;br /&gt;
&lt;br /&gt;
And in this bruhaha, nobody pays any attention to the underlying problem. Is increased vigilance the solution to the parking problem? Should we give more tickets? Should we install cameras? Or should we try to follow the suggestions of our lone policeman and think of other ways to reduce traffic, and therefore resolve the parking problem on a more fundamental level?&lt;br /&gt;
&lt;br /&gt;
The blog post of our lone policeman is neither about the policeman nor about Redwich. It is about the fact that there is too much traffic in the whole city. Which in turn causes the parking problem. Parking scarcity is the symptom, not the real problem. And while he wrote about the traffic problem and suggested solutions, 99% of the coverage was about Rewich and about his own evaluation.&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
This is exactly how the discussion about cheating evolved in the media. Instead of focusing on how to make student evaluation objective and cheating-proof, the discussion focused on whether my salary went sufficiently up or not. &lt;b&gt;This is not the main point.&lt;/b&gt; It is not even a minor point, in reflection. The real question is on how we can best evaluate our students and which evaluation strategies are robust to cheating, encourage creativity, and evaluate true learning.&lt;br /&gt;
&lt;br /&gt;
And this is not a discussion that can be done while screaming.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3925690082343510518?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=JR2kmAeQIOc:wrg_chuh7TI:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=JR2kmAeQIOc:wrg_chuh7TI:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/JR2kmAeQIOc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3925690082343510518/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/07/tale-about-parking.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3925690082343510518?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3925690082343510518?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/JR2kmAeQIOc/tale-about-parking.html" title="A tale about parking" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/07/tale-about-parking.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEMEQXk_fSp7ImA9WhdSE0U.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6670828593372886603</id><published>2011-07-17T17:30:00.028-04:00</published><updated>2011-07-22T20:53:20.745-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-22T20:53:20.745-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="evaluation" /><category scheme="http://www.blogger.com/atom/ns#" term="education" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="teaching" /><category scheme="http://www.blogger.com/atom/ns#" term="cheating" /><category scheme="http://www.blogger.com/atom/ns#" term="incentives" /><title>Why I will never pursue cheating again</title><content type="html">The post is temporarily removed. I will restore it after ensuring that there are no legal liabilities for myself or my employer.&lt;br /&gt;
&lt;br /&gt;
Until then, you can read my commentary in my new blog post: &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/07/tale-about-parking.html"&gt;A tale about parking.&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
The discussion on &lt;a href="http://hackerne.ws/item?id=2774254"&gt;Hacker News&lt;/a&gt; was good as well.&amp;nbsp;Also see the response that I posted at the &lt;a href="http://www.businessinsider.com/nyu-professor-class-cheating-2011-7#comment-4e273a85cadcbb434e020000"&gt;Business Insider&lt;/a&gt; website and the coverage at &lt;a href="http://www.insidehighered.com/news/2011/07/22/nyu_professor_s_blog_post_sets_off_debate_on_plagiarism"&gt;Inside Higher Education&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6670828593372886603?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Y1HiH9ebOpU:bvNlYmY6qYc:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Y1HiH9ebOpU:bvNlYmY6qYc:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/Y1HiH9ebOpU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6670828593372886603/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/07/why-i-will-never-pursue-cheating-again.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6670828593372886603?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6670828593372886603?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/Y1HiH9ebOpU/why-i-will-never-pursue-cheating-again.html" title="Why I will never pursue cheating again" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/07/why-i-will-never-pursue-cheating-again.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0MDRXo8cCp7ImA9WhZaEUU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-2169164950293332365</id><published>2011-06-26T12:57:00.008-04:00</published><updated>2011-06-27T09:31:14.478-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-27T09:31:14.478-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="newsweek" /><category scheme="http://www.blogger.com/atom/ns#" term="minimum wage" /><category scheme="http://www.blogger.com/atom/ns#" term="tutorial" /><category scheme="http://www.blogger.com/atom/ns#" term="extreme value theory" /><title>Extreme value theory 101, or Newsweek researching minimum wage on Mechanical Turk</title><content type="html">Last week, Newsweek published an article titled &lt;a href="http://www.newsweek.com/2011/06/19/the-real-minimum-wage.html"&gt;The Real Minimum Wage&lt;/a&gt;. The authors report that "&lt;i&gt;in a weeks-long experiment, we posted simple, hourlong jobs (listening to audio recordings and counting instances of a specific keyword) and continually lowered our offer until we found the absolute bottom price that multiple people would accept, and then complete the task&lt;/i&gt;." &lt;br /&gt;
&lt;br /&gt;
The results "showed" that Americans are the ones willing to accept the lowest possible salary for working on a task, compared even to people in India, Romania, Philippines, etc. In fact, they found the that there are Americans willing to work for 25 cents per hour, while they could not find anyone willing to work for less than \$1/hr in any other country. The conclusion of the article? Americans are more desperate than anyone else in the world.&lt;br /&gt;
&lt;br /&gt;
What is the key problem of this study? There are many more US-based workers on Mechanical Turk compared to other nationalities. So, if you have a handful of workers from other countries, and hundreds of workers from the US, you are guaranteed to find more extreme findings for the US. Why? To put it simply, you are searching harder within the US to find small values, compared to the effort placed on other countries. &lt;span class="Apple-style-span" style="font-size: x-small;"&gt; (There are other issues as well, e.g., workers that would work on this task are not necessarily representative of the overall population; the same workers are exposed to multiple, decreasing salaries, issues of anchoring, issues of workers falsely reporting to be from the US, whether the authors checked IP geo-location, etc. While all these are valid concerns, they are secondary to the very basic statistical problem.)&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Finding a Minimum Value: A Probabilistic Approach&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
On an abstract, statistical level, by testing workers from multiple countries, to determine their minimum wage, we sample multiple "minimum wage distributions" trying to find the smallest value within each one of them.&lt;br /&gt;
&lt;br /&gt;
Each probability distribution corresponds to the minimum wages that workers from different countries are willing to accept. Let's call the CDF's of distributions $F_i(x)$, with, say, $F_1(x)$ being the distribution for minimum wages for US, $F_2(x)$ for India, $F_3(x)$ for UK, etc etc.&lt;br /&gt;
&lt;br /&gt;
As an simplifying example, assume that $F(x)$ is a uniform distribution, with minimum value \$0 and a maximum value \$10, &lt;b&gt;for an average acceptable minimum wage of \$5&lt;/b&gt;. This means that:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;10% of the population will accept a minimum wage below \$1, (i.e., $F(\$1)=0.1$)&lt;/li&gt;
&lt;li&gt;20% of the population will accept a minimum wage below \$2, (i.e., $F(\$2)=0.2$)&lt;/li&gt;
&lt;li&gt;...&lt;/li&gt;
&lt;li&gt;90% of the population will accept a minimum wage below \$9, (i.e., $F(\$9)=0.9$)&lt;/li&gt;
&lt;li&gt;100% of the population will accept a minimum wage below \$10, (i.e., $F(\$10)=1.0$)&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
Now, let's assume that we sample $n$ workers from one of the country-specific distributions. After running the experiment, we get back measurements $x_1, \ldots, x_n$, each one corresponding to the minimum wage for each of the workers that participated in the study, who comes from the country that we are measuring.&lt;br /&gt;
&lt;br /&gt;
What is the probability of one of these wages being below, say, $z=\$0.25$? Here is the probability calculation:&lt;br /&gt;
&lt;br /&gt;
$\begin{eqnarray}&lt;br /&gt;
Pr(\mathit{min~wage} &amp;lt; z) &amp;amp;=&amp;amp; 1 - Pr(\mathit{all~wages} \geq z)\\&lt;br /&gt;
&amp;amp; =&amp;amp; 1 - Pr(x_1 \geq z, \ldots, x_n \geq z)&lt;br /&gt;
\end{eqnarray}$&lt;br /&gt;
&lt;br /&gt;
Assuming independence across the sampled values, we have:&lt;br /&gt;
&lt;br /&gt;
$\begin{eqnarray}&lt;br /&gt;
Pr(\mathit{min~wage} &amp;lt; z) &amp;amp;=&amp;amp; 1 - \prod_{i=1}^n Pr(x_i \geq z) \\&lt;br /&gt;
&amp;amp; =&amp;amp; 1 - \left(1 - F(z) \right)^n&lt;br /&gt;
\end{eqnarray}$&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
So, if we sample $n$ workers, set the minimum wage at $z=0.25$ , and assume uniform distribution for $F$, then $F(\$0.25)=0.025$ and the probability that we will find at least one worker willing to work for 25 cents is:&lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{min~wage} &amp;lt; z) = 1 - 0.975^n$&lt;br /&gt;
&lt;br /&gt;
Plotting this, as a function of $n$, we have the following:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-jhK21E6rq0g/Tgcy4q1sQlI/AAAAAAAAs80/QKYLVhzW8h4/s1600/extreme-value-theory.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="253" src="http://3.bp.blogspot.com/-jhK21E6rq0g/Tgcy4q1sQlI/AAAAAAAAs80/QKYLVhzW8h4/s400/extreme-value-theory.gif" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;b&gt;As we get more and more workers, the more likely it is to find a value that will be at or below 25 cents/hour. &lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, how this approach explains the findings of Newsweek?&lt;br /&gt;
&lt;br /&gt;
We know that all countries are not equally represented on Mechanical Turk. Most workers are from the US (50% or so), followed by India (35% or so), and then by Canada (2%), UK (2%), Philippines (2%), and a variety of other countries with similarly small percentages. This means that in the study, we expect to have more Americans participating, followed by Indians, and then a variety of other countries. So, even if the distribution of minimum wages was identical across all countries, we expect to find lower wages in the country with the largest number of participants.&lt;br /&gt;
&lt;br /&gt;
Since the majority of the workers on Mechanical Turk are from US, followed by India, followed by Canada, and UK, etc, &lt;b&gt;the illustration by Newsweek simply gives us the country of origin of the workers, in reverse order of popularity!&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-lTDFUn167jw/Tgc0YC46ZCI/AAAAAAAAs84/-4IQ3IVRSwQ/s1600/Illustration-by-Newsweek.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="266" src="http://3.bp.blogspot.com/-lTDFUn167jw/Tgc0YC46ZCI/AAAAAAAAs84/-4IQ3IVRSwQ/s400/Illustration-by-Newsweek.jpg" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
At this point, someone may ask: what happens if the distribution is not uniform but, say, lognormal? (A much more plausible distribution for minimum acceptable wages.) For this specific question, as you can see from the analysis above, this does not make much of a difference: The only thing that we need to know if the value of $F(z)$ for the $z$ value of interest.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Going in depth: Extreme Value Theory&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
A more general question is: What is the expected maximum (or minimum) value that we expect to find when we sample from an arbitrary distribution? This is the topic of &lt;a href="http://en.wikipedia.org/wiki/Extreme_value_theory"&gt;extreme value theory&lt;/a&gt;, a field in statistics that tries to predict the probability of extreme events (e.g., what is the possible biggest possible drop in the stock market? what is the biggest rainfall in this region?) Given the events in the financial markets in 2008, this theory has received significant attention in the last few years.&lt;br /&gt;
&lt;br /&gt;
What is nice about this theory is that the fundamentals can be summarized very succinctly. The &lt;a href="http://en.wikipedia.org/wiki/Fisher%E2%80%93Tippett%E2%80%93Gnedenko_theorem"&gt;Fisher–Tippett–Gnedenko theorem&lt;/a&gt; states that, if we sample from a distribution, the maximum values that we expect to find will be a random variable, belonging to one of the three distributions:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;If the distribution from which we are sampling has a &lt;b&gt;tail that decreases exponentially&lt;/b&gt; (e.g., normal distribution, exponential, Gamma, etc), then the maximum value is described by the (reversed) &lt;a href="http://en.wikipedia.org/wiki/Gumbel_distribution"&gt;Gumbel distribution&lt;/a&gt; (aka "type I extreme value distribution")&lt;/li&gt;
&lt;li&gt;If the distribution from which we are sampling has a &lt;b&gt;tail that decreases as a polynomial (i.e., has a "long tail")&lt;/b&gt; (e.g., power-laws, Cauchy, Student-t, etc), then the maximum value is described by the &lt;a href="http://en.wikipedia.org/wiki/Fr%C3%A9chet_distribution"&gt;Frechet distribution&lt;/a&gt; (aka "type II extreme value distribution")&lt;/li&gt;
&lt;li&gt;If the distribution from which we are sampling has a &lt;b&gt;tail that is finite (i.e., has a "short tail")&lt;/b&gt; (e.g., uniform, Beta, etc), then the maximum follows the  (reversed) &lt;a href="http://en.wikipedia.org/wiki/Weibull_distribution"&gt;Weibull distribution&lt;/a&gt; (aka "type III extreme value distribution")&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
The three types of the distributions are all special cases of the &lt;a href="http://en.wikipedia.org/wiki/Generalized_extreme_value_distribution"&gt;generalized extreme value distribution&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
This theory has significant applications not only when modeling risk (stock market, weather, earthquakes, etc), but also when modeling decision-making for humans: Often, we model humans as utility maximizers, who are making decisions that maximize their own well-being. This maximum-seeking behavior results often in the distributions described above. I will give a more detailed description in a later blog post.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-2169164950293332365?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=SSLObarhYKo:TTWA9sVtdvw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=SSLObarhYKo:TTWA9sVtdvw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/SSLObarhYKo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/2169164950293332365/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/06/extreme-value-theory-101-or-newsweek.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2169164950293332365?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2169164950293332365?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/SSLObarhYKo/extreme-value-theory-101-or-newsweek.html" title="Extreme value theory 101, or Newsweek researching minimum wage on Mechanical Turk" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-jhK21E6rq0g/Tgcy4q1sQlI/AAAAAAAAs80/QKYLVhzW8h4/s72-c/extreme-value-theory.gif" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/06/extreme-value-theory-101-or-newsweek.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0ENQ30zcCp7ImA9WhdXFE8.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-5585467094888505184</id><published>2011-06-24T12:29:00.004-04:00</published><updated>2011-08-27T03:21:32.388-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-27T03:21:32.388-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><category scheme="http://www.blogger.com/atom/ns#" term="hcomp" /><title>Accepted papers for the 3rd Human Computation Workshop (HCOMP 2011)</title><content type="html">We have &lt;a href="http://www.humancomputation.com/Program.html"&gt;posted online the schedule&lt;/a&gt; for the &lt;a href="http://www.humancomputation.com/Welcome.html"&gt;3rd Human Computation Workshop (HCOMP 2011)&lt;/a&gt;, which will be organized as part of &lt;a href="http://www.aaai.org/Conferences/AAAI/aaai11.php"&gt;AAAI 2011&lt;/a&gt;, in San Francisco, on August 8th. The &lt;a href="https://www.aaai.org/Forms/aaai-registration-form.php"&gt;registration fee&lt;/a&gt; for participating in the workshop is a pretty modest \$125 for graduate students, and \$155 for other participants. Just make sure to register before July 1st to get these rates, as afterwards the rates jump to \$165 and \$185. I should also mention that, following the tradition established in Paris in HCOMP 2009, we will have a group dinner for all the participants after the workshop to continue the discussions from the day...&lt;br /&gt;
&lt;br /&gt;
We have a strong program, with 16 long papers accepted, and 16 papers being presented as demos and posters. Below you can find the titles of the papers and their abstracts. The PDF versions of the papers &lt;s&gt;will be posted online by AAAI, after the completion of the conference&lt;/s&gt; &lt;a href="http://www.aaai.org/Library/Workshops/ws11-11.php"&gt;are available through the AAAI Digital Library&lt;/a&gt;. Until then, you can search Google, or just ask the authors for a pre-print. So, if you are interested in crowdsourcing and human computation, we hope to see you there in San Francisco in August!&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;Long Papers&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;Large-Scale Live Active Learning: Training Object Detectors with Crawled Data and Crowds&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Sudheendra Vijayanarasimhan, Kristen Grauman (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Active learning and crowdsourcing are promising ways to efficiently build up training sets for object recognition, but thus far techniques are tested in artificially controlled settings. Typically the vision researcher has already determined the dataset's scope, the labels ``actively" obtained are in fact already known, and/or the crowd-sourced collection process is iteratively fine-tuned. We present an approach for *live learning* of object detectors, in which the system autonomously refines its models by actively requesting crowd-sourced annotations on images crawled from the Web. To address the technical issues such a large-scale system entails, we introduce a novel part-based detector amenable to linear classifiers, and show how to identify its most uncertain instances in sub-linear time with a hashing-based solution. We demonstrate the approach with experiments of unprecedented scale and autonomy, and show it successfully improves the state-of-the-art for the most challenging objects in the PASCAL benchmark. In addition, we show our detector competes well with popular nonlinear classifiers that are much more expensive to train.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Robust Active Learning using Crowdsourced Annotations for Activity Recognition&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Liyue Zhao, Gita Sukthankar (UCF); Rahul Sukthankar (Google Research/CMU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Recognizing human activities from wearable sensor data is an important problem, particularly for health and eldercare applications. However, collecting sufficient labeled training data is challenging, especially since interpreting IMU traces is difficult for human annotators. Recently, crowdsourcing through services such as Amazon's Mechanical Turk has emerged as a promising alternative for annotating such data, with active learning serving as a natural method for affordably selecting an appropriate subset of instances to label. Unfortunately, since most active learning strategies are greedy methods that select the most uncertain sample, they are very sensitive to annotation errors (which corrupt a significant fraction of crowdsourced labels). This paper proposes methods for robust active learning under these conditions. Specifically, we make three contributions: 1) we obtain better initial labels by asking labelers to solve a related task; 2) we propose a new principled method for selecting instances in active learning that is more robust to annotation noise; 3) we estimate confidence scores for labels acquired from MTurk and ask workers to relabel samples that receive low scores under this metric. The proposed method is shown to significantly outperform existing techniques both under controlled noise conditions and in real active learning scenarios. The resulting method trains classifiers that are close in accuracy to those trained using ground-truth data.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Beat the Machine: Challenging workers to find the unknown unknowns&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Josh Attenberg, Panos Ipeirotis, Foster Provost (NYU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
This paper presents techniques for gathering data that expose errors of automatic classification models. Prior work has demonstrated the promise of having humans seek training data, as an alternative to active learning, in cases where there is extreme class imbalance. We now explore the direction where we ask humans to identify cases what will cause the classification system to fail. Such techniques are valuable in revealing problematic cases that do not reveal themselves during the normal operation of the system, and may include cases that are rare but catastrophic. We describe our approach for building a system to satisfy this requirements, trying to encourage humans to provide us with such data points. In particular, we reward a human when the provided example is difficult for the model to handle, and the reward is proportional to the magnitude of the error. In a sense, the humans are asked to ''Beat the Machine'' and find cases where the automatic model (''the machine'') is wrong. Our experimental data show that the density of the identified problems is an order of magnitude higher compared to alternative approaches, and that the proposed technique can identify quickly the ``big flaws'' that would typically remain uncovered.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Human Intelligence Needs Artificial Intelligence&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Daniel Weld, Mausam Mausam, Peng Dai (University of Washington)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing platforms, such as Amazon Mechanical Turk, have enabled the construction of scalable applications for tasks ranging from product categorization and photo tagging to audio transcription and translation. These vertical applications are typically realized with complex, self-managing workflows that guarantee quality results. But constructing such workflows is challenging, with a huge number of alternative decisions for the designer to consider. Artificial intelligence methods can greatly simplify the process of creating complex crowdsourced workflows. We argue this thesis by presenting the design of TurKontrol 2.0, which uses machine learning to continually refine models of worker performance and task difficulty. Using these models, TurKontrol 2.0 uses decision-theoretic optimization to 1) choose between alternative workflows, 2) optimize parameters for a workflow, 3) create personalized interfaces for individual workers, and 4) dynamically control the workflow. Preliminary experience suggests that these optimized workflows are significantly more economical than those generated by humans.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Worker Motivation in Crowdsourcing and Human Computation&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Nicolas Kaufmann; Thimo Schulze (University of Mannheim)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Many human computation systems use crowdsourcing markets like Amazon Mechanical Turk to recruit human workers. The payment in these markets is usually very low, and still collected demographic data shows that the participants are a very diverse group including highly skilled full time workers. Many existing studies on their motivation are rudimental and not grounded on established motivation theory. Therefore, we adapt different models from classic motivation theory, work motivation theory and Open Source Software Development to crowdsourcing markets. The model is tested with a survey of 431 workers on Mechanical Turk. We find that the extrinsic motivational categories (immediate payoffs, delayed payoffs, social motivation) have a strong effect on the time spent on the platform. For many workers, however, intrinsic motivation aspects are more important, especially the different facets of enjoyment based motivation like “task autonomy” and “skill variety”. Our contribution is a preliminary model based on established theory intended for the comparison of different crowdsourcing platforms.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Honesty in an Online Labor Market&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Winter Mason, Siddharth Suri, Daniel Goldstein (Yahoo! Research)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The efficient functioning of markets and institutions assume a certain degree of honesty from participants. In labor markets, for instance, employers benefit from employees who will render meaningful work, and employees benefit from employers who will pay the promised amount for services rendered. We use an established method for detecting dishonest behavior in a series of experiments conducted on \amt, a popular online labor market. Our first experiment estimates a baseline amount of dishonesty for this task in the population sample. The second experiment tests the hypothesis that the level of dishonesty in the population will be sensitive to the relative amount that can be gained by dishonest reporting, and the third experiment, manipulates the degree to which dishonest reporting can be detected at the individual level. We conclude with a demographic and cross-cultural analysis of the predictors of dishonest reporting in this market.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Building a Persistent Workforce on Mechanical Turk for Multilingual Data Collection&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;David Chen (UT Austin); William Dolan (Microsoft Research)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Traditional methods of collecting translation and paraphrase data are prohibitively expensive, making constructions of large, new corpora difficult. While crowdsourcing offers a cheap alternative, quality control and scalability can become problematic. We discuss a novel annotation task that uses videos as the stimulus which discourages cheating. It also only requires monolingual speakers, thus making it easier to scale since more workers are qualified to contribute. Finally, we employed a multi-tiered payment system that helps retain good workers over the long-term, resulting in a persistent, high-quality workforce. We present the results of one of the largest linguistic data collection efforts using Mechanical Turk, yielding 85K English sentences and more than 1k sentences for each of a dozen more languages. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;CrowdSight: Rapidly Prototyping Intelligent Visual Processing Apps&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Mario Rodriguez (UCSC); James Davis&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We describe a framework for rapidly prototyping applications which require intelligent visual processing, but for which there does not yet exist reliable algorithms, or for which engineering those algorithms is too costly. The framework, CrowdSight, leverages the power of crowdsourcing to offload intelligent processing to humans, and enables new applications to be built quickly and cheaply, affording system builders the opportunity to validate a concept before committing significant time or capital. Our service accepts requests from users either via email or simple mobile applications, and handles all the communication with a backend human computation platform. We build redundant requests and data aggregation into the system freeing the user from managing these requirements. We validate our framework by building several test applications and verifying that prototypes can be built more easily and quickly than would be the case without the framework. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Digitalkoot: Making Old Archives Accessible Using Crowdsourcing&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Otto Chrons, Sami Sundell (Microtask)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper, we present Digitalkoot, a system for fixing errors in the Optical Character Recognition (OCR) process of old texts through the use of human computation. By turning the work into simple games, we are able to attract a great number of volunteers to donate their time and cognitive capacity for the cause. Our analysis shows how untrained people can reach very high accuracy through the use of crowdsourcing. Furthermore we analyze the effect of social media and gender on participation levels and the amount of work accomplished. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Error Detection and Correction in Human Computation: Lessons from the WPA&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;David Alan Grier (GWU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Human Computation is, of course, a very old field with a forgotten literature that treats many of the key problems, especially error detection and correction. The obvious methods of error detection, duplicate calculation, have proven to be subject to Babbage's Rule: Different workers using the same methods on the same data will tend to make the same errors. To avoid the consequences of this rule, early human computers developed a disciplined regimen to identify and correct mistakes. This paper reconstructs those methods, puts them in a modern context and identifies their implications for the modern version of human computation. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Programmatic gold: targeted and scalable quality assurance in crowdsourcing&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Dave Oleson, Vaughn Hester, Alex Sorokin, Greg Laughlin, John Le, Lukas Biewald (CrowdFlower)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing is an effective tool for scalable data annotation in both research and enterprise contexts. Due to crowdsourcing's open participation model, quality assurance is critical to the success of any project. Present methods rely on EM-style post-processing or manual annotation of large gold standard sets. In this paper we present an automated quality assurance process that is inexpensive and scalable. Our novel process relies on programmatic gold creation to provide targeted training feedback to workers and to prevent common scamming scenarios. We find that it decreases the amount of manual work required to manage crowdsourced labor while improving the overall quality of the results. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;An Iterative Dual Pathway Structure for Speech-to-Text Transcription&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Beatrice Liem, Haoqi Zhang, Yiling Chen (Harvard University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper, we develop a new human computation algorithm for speech-to-text transcription that can potentially achieve the high accuracy of professional transcription using only microtasks deployed via an online task market or a game. The algorithm partitions audio clips into short 10-second segments for independent processing and joins adjacent outputs to produce the full transcription. Each segment is sent through an iterative dual pathway structure that allows participants in either path to iteratively refine the transcriptions of others in their path while being rewarded based on transcriptions in the other path, eliminating the need to check transcripts in a separate process. Initial experiments with local subjects show that produced transcripts are on average 96.6% accurate. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;An Extendable Toolkit for Managing Quality of Human-based Electronic Services&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;David Bermbach, Robert Kern, Pascal Wichmann, Sandra Rath, Christian Zirpins (KIT)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Micro-task markets like Amazon MTurk enable online workers to provide human intelligence as Web-based on demand services (so called people services). Businesses facing large amounts of knowledge work can benefit from increased flexibility and scalability of their workforce but need to cope with reduced control of result quality. While this problem is well recognized, it is so far only rudimentarily addressed by existing platforms and tools. In this paper, we present a flexible research toolkit which enables experiments with advanced quality management mechanisms for generic micro-task markets. The toolkit enables control of correctness and performance of task fulfillment by means of dynamic sampling, weighted majority voting and worker pooling. We demonstrate its application and performance for an OCR scenario building on Amazon MTurk. The toolkit however enables the development of advanced quality management mechanisms for a large variety of people service scenarios and platforms. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;What’s the Right Price? Pricing Tasks for Finishing on Time&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Siamak Faridani, Bjoern Hartmann (UC Berkeley); Panos Ipeirotis (NYU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Many practitioners currently use rules of thumb to price tasks on online labor markets. Incorrect pricing leads to task starvation or inefficient use of capital. Formal optimal pricing policies can address these challenges. In this paper we argue that an optimal pricing policy must be based on the tradeoff between price and desired completion time. We show how this duality can lead to a better pricing policy for tasks in online labor markets. This paper makes three contributions. First, we devise an algorithm for optimal job pricing using a survival analysis model. We then show that worker arrivals can be modeled as a non-homogenous Poisson Process (NHPP). Finally using NHPP for worker arrivals and discrete choice models we present an abstract mathematical model that captures the dynamics of the market when full market information is presented to the task requester. This model can be used to predict completion times and optimal pricing policies for both public and private crowds. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Pricing Mechanisms for Online Labor Market&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Yaron Singer, Manas Mittal (UC Berkeley EECS)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In online labor markets, determining the appropriate incentives is a difficult problem. In this paper, we present dynamic pricing mechanisms for determining the optimal prices for such tasks. In particular, the mechanisms are designed to handle the intricacies of the markets like mechanical turk (workers are coming online, requesters have budgets, etc.). The mechanisms have desirable theoretical guarantees (incentive compatibility, budget feasibility, and competitive ration performance) and perform well in practice. Experiments demonstrate the effectiveness and feasibility of using such mechanisms in practice. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Labor Allocation in Paid Crowdsourcing: Experimental Evidence on Positioning, Nudges and Prices&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;John Horton (ODesk); Dana Chandler (MIT)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
This paper reports the results of a natural field experiment where workers from a paid crowdsourcing environment self-select into tasks and are presumed to have limited attention. In our experiment, workers labeled any of six pictures from a 2 x 3 grid of thumbnail images. In the absence of any incentives, workers exhibit a strong default bias and tend to select images from the top-left (``focal'') position; the bottom-right (``non-focal'') position, was the least preferred. We attempted to overcome this bias and increase the rate at which workers selected the least preferred task, by using a combination of monetary and non-monetary incentives. We also varied the saliency of these incentives by placing them in either the focal or non-focal position. Although both incentive types caused workers to re-allocate their labor, monetary incentives were more effective. Most interestingly, both incentive types worked better when they were placed in the focal position and made more salient. In fact, salient non-monetary incentives worked about as well as non-salient monetary ones. Our evidence suggests that user interface and cognitive biases play an important role in online labor markets and that salience can be used by employers as a kind of ``incentive multiplier''. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;Posters&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;Developing Scripts to Teach Social Skills: Can the Crowd Assist the Author?&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Fatima Boujarwah, Jennifer Kim, Gregory Abowd, Rosa Arriaga (Georgia Tech)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The social world that most of us navigate effortlessly can prove to be a perplexing and disconcerting place for individuals with autism. Currently there are no models to assist non-expert authors as they create customized social script-based instructional modules for a particular child. We describe an approach to using human computation to develop complex models of social scripts for a plethora of complex and interesting social scenarios, possible obstacles that may arise in those scenarios, and potential solutions to those obstacles. Human input is the natural way to build these models, and in so doing create valuable assistance for those trying to navigate the intricacies of a social life.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;CrowdLang - First Steps Towards Programmable Human Computers for General Computation&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Patrick Minder, Abraham Bernstein (University of Zurich)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing markets such as Amazon’s Mechanical Turk provide an enormous potential for accomplishing work by combining human and machine computation. Today crowdsourcing is mostly used for massive parallel information processing for a variety of tasks such as image labeling. However, as we move to more sophisticated problem-solving there is little knowledge about managing dependencies between steps and a lack of tools for doing so. As the contribution of this paper, we present a concept of an executable, model-based programming language and a general purpose framework for accomplishing more sophisticated problems. Our approach is inspired by coordination theory and an analysis of emergent collective intelligence. We illustrate the applicability of our proposed language by combining machine and human computation based on existing interaction patterns for several general computation problems.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Ranking Images on Semantic Attributes using CollaboRank&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Jeroen Janssens, Eric Postma, Jaap Van den Herik (Tilburg University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper, we investigate to what extent a large group of human workers is able to produce collaboratively a global ranking of images, based on a single semantic attribute. To this end, we developed CollaboRank, which is a method that formulates and distributes tasks to human workers, and aggregates their personal rankings into a global ranking. Our results show that a relatively high consensus can be achieved, depending on the type of the semantic attribute.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Artificial Intelligence for Artificial Artificial Intelligence&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Peng Dai, Mausam, Daniel Weld (University of Washington)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing platforms such as Amazon Mechanical Turk have become popular for a wide variety of human intelligence tasks; however, quality control continues to be a significant challenge. Recently, Dai et al (2010) propose TurKontrol, a theoretical model based on POMDPs to optimize iterative, crowd-sourced workflows. However, they neither describe how to learn the model parameters, nor show its effectiveness in a real crowd-sourced setting. Learning is challenging due to the scale of the model and noisy data: there are hundreds of thousands of workers with high-variance abilities. This paper presents an end-to-end system that first learns TurKontrol's POMDP parameters from real Mechanical Turk data, and then applies the model to dynamically optimize live tasks. We validate the model and use it to control a successive-improvement process on Mechanical Turk. By modeling worker accuracy and voting patterns, our system produces significantly superior artifacts compared to those generated through static workflows using the same amount of money.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;One Step beyond Independent Agreement: A Tournament Selection Approach for Quality Assurance of Human Computation Tasks&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Yu-An Sun, Shourya Roy (Xerox); Greg Little (MIT CSAIL)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Quality assurance remains a key topic in the human computation research field. Prior work indicates that independent agreement is effective for low difficulty tasks, but has limitations. This paper addresses this problem by proposing a tournament selection based quality control process. The experimental results from this paper show that humans are better at identifying the correct answers than generating them.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Turkomatic: Automatic, Recursive Task and Workflow Design for Mechanical Turk&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Anand Kulkarni, Matthew Can, Bjoern Hartmann (UC Berkeley)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
On today’s human computation systems, designing tasks and workflows is a difficult and labor-intensive process. Can workers from the crowd be used to help plan workflows? We explore this question with Turkomatic, a new interface to microwork platforms that uses crowd workers to help plan workflows for complex tasks. Turkomatic uses a general-purpose divide-and-conquer algorithm to solve arbitrary natural-language requests posed by end users. The interface includes a novel real-time visual workflow editor that enables requesters to observe and edit workflows while the tasks are being completed. Crowd verification of work and the division of labor among members of the crowd can be handled automatically by Turkomatic, which substantially simplifies the process of using human computation systems. These features enable a novel means of interaction with crowds of online workers to support successful execution of complex work.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;MuSweeper: Collect Mutual Exclusions with Extensive Game&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Tao-Hsuan Chang, Cheng-wei Chan, Jane Yung-jen Hsu (National Taiwan University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Mutual exclusions are important information for machine learning. Games With A Purpose (or GWAP) provide an effective way to get large amount of data from web users. This research proposes MuSweeper, a minesweeper-like game, to collect mutual exclusions. By embedding game theory into game mechanism, the precision is guaranteed. Experiment showed MuSweeper can efficiently collect mutual exclusions with high precision.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;MobileWorks: A Mobile Crowdsourcing Platform for Workers at the Bottom of the Pyramid&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Prayag Narula, Philipp Gutheim, David Rolnitzky, Anand Kulkarni, Bjoern Hartmann (UC Berkeley)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We present MobileWorks, a mobile phone-based crowdsourcing platform. MobileWorks targets workers in developing countries who live at the bottom of the economic pyramid. This population does not have access to desktop computers, so existing microtask labor markets are inaccessible to them. MobileWorks offers human OCR tasks that can be accomplished on low-end mobile phones; workers access it through their mobile web browser. To address the limited screen resolution available on low-end phones, MobileWorks segments documents into many small pieces, and sends each piece to a different worker. A first pilot study with 10 users over a period of 2 months revealed that it is feasible to do simple OCR tasks using simple Mobile Web based application. We found that on an average the workers do the tasks at 120 tasks per hour. Using single entry the accuracy of workers across the different documents is 89% . We propose a multiple entry solution which increases the theoretical accuracy of the OCR to more than 99%.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Towards Task Recommendation in Micro-Task Markets&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Vamsi Ambati, Stephan Vogel, Jaime Carbonell (CMU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
As researchers embrace micro-task markets for eliciting human input, the nature of the posted tasks moves from those requiring simple mechanical labor to requiring specific cognitive skills. On the other hand, increase is seen in the number of such tasks and the user population in micro-task market places requiring better search interfaces for productive user participation. In this paper we posit that understanding user skill sets and presenting them with suitable tasks not only maximizes the over quality of the output, but also attempts to maximize the benefit to the user in terms of more successfully completed tasks. We also implement a recommendation engine for suggesting tasks to users based on implicit modeling of skills and interests. We present results from a preliminary evaluation of our system using publicly available data gathered from a variety of human computation experiments recently conducted on Amazon's Mechanical Turk.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;On Quality Control and Machine Learning in Crowdsourcing&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Matthew Lease (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The advent of crowdsourcing has created a variety of new opportunities for improving upon traditional methods of data collection and annotation. This in turn has created intriguing new opportunities for data-driven machine learning (ML). Convenient access to crowd workers for simple data collection has further generalized to leveraging more arbitrary crowd-based human computation to supplement ML. While new potential applications of crowdsourcing continue to emerge, a variety of practical and sometimes unexpected obstacles have already limited the degree to which its promised potential can be actually realized in practice. This paper considers two particular aspects of crowdsourcing and their interplay, data quality control (QC) and ML, reflecting on where we have been, where we are, and where we might go from here.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;CollabMap: Augmenting Maps using the Wisdom of Crowds&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Ruben Stranders, Sarvapali Ramchurn, Bing Shi, Nicholas Jennings (University of Southampton)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper we develop a novel model of geospatial data creation, called CollabMap, that relies on human computation. CollabMap is a crowdsourcing tool to get users contracted via Amazon Mechanical Turk or a similar service to perform micro-tasks that involve augmenting existing maps (e.g. GoogleMaps or Ordnance Survey) by drawing evacuation routes, using satellite imagery from GoogleMaps and panoramic views from Google Street View. We use human computation to complete tasks that are hard for a computer vision algorithm to perform or to generate training data that could be used by a computer vision algorithm to automatically define evacuation routes.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Improving Consensus Accuracy via Z-score and Weighted Voting &lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Hyun Joon Jung, Matthew Lease (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We describe a Z-score based outlier detection method for detection and filtering of inaccurate crowd workers. After filtering, we aggregate labels from remaining workers via simple majority voting or feature-weighted voting. Both su-pervised and unsupervised features are used, individually and in combination, for both outlier detection and weighted voting. We evaluate on noisy judgments collected from Amazon Mechanical Turk which assess Websearch relevance of query/document pairs. We find that filtering in combination with multi-feature weighted voting achieves 8.94% relative error reduction for graded accuracy (4.25% absolute) and 5.32% for binary accuracy (3.45% absolute).&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Making Searchable Melodies: Human vs. Machine&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Mark Cartwright, Zafar Rafii, Jinyu Han, Bryan Pardo (Northwestern University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Systems that find music recordings based on hummed or sung, melodic input are called Query-By-Humming (QBH) systems. Such systems employ search keys that are more similar to a cappella singing than the original recordings. Successful deployed systems use human computation to create these search keys: hand-entered midi melodies or recordings of a cappella singing. Tunebot is one such system. In this paper, we compare search results using keys built from two automated melody extraction system to those gathered using two populations of humans: local paid sing-ers and Amazon Turk workers.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;PulaCloud: Using Human Computation to Enable Development at the Bottom of the Economic Ladder&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Andrew Schriner (University of Cincinnati); Daniel Oerther (Missouri University of Science and Technology); James Uber (University of Cincinnati)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
This research aims to explore how Human Computation can be used to aid economic development in communities experiencing extreme poverty throughout the world. Work is ongoing with a community in rural Kenya to connect them to employment opportunities through a Human Computation system. A feasibility study has been conducted in the community using the 3D protein folding game Foldit and Amazon’s Mechanical Turk. Feasibility has been confirmed and obstacles identified. Current work includes a pilot study doing image analysis for two research projects and developing a GUI that is usable by workers with little computer literacy. Future work includes developing effective incentive systems that operate both at the individual level and the group level and integrating worker accuracy evaluation, worker compensation, and result-credibility evaluation.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Towards Large-Scale Processing of Simple Tasks with Mechanical Turk&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Paul Wais, Shivaram Lingamneni, Duncan Cook, Jason Fennell, Benjamin Goldenberg, Daniel Lubarov, David Marin, Hari Simons (Yelp, inc.)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing platforms such as Amazon's Mechanical Turk (AMT) provide inexpensive and scalable workforces for processing simple online tasks. Unfortunately, workers participating in crowdsourcing tend to supply work of inconsistent or low quality. We report on our experiences using AMT to verify hundreds of thousands of local business listings for the online directory Yelp.com. Using expert-verified changes, we evaluate the accuracy of our workforce and present the results of preliminary experiments that work towards filtering low-quality workers and correcting for worker bias. Our report seeks to inform the community of practical and financial constraints that are critical to understanding the problem of quality control in crowdsourcing systems.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Learning to Rank From a Noisy Crowd&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Abhimanu Kumar, Matthew Lease (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We consider how to most effectively use crowd-based relevance assessors to produce training data for learning to rank. This integrates two lines of prior work: studies of unreliable crowd-based binary annotation for binary classification, and studies for aggregating graded relevance judgments from reliable experts for ranking. To model varying performance of the crowd, we simulate annotation noise with varying magnitude and distributional properties. Evaluation on three LETOR test collections reveals a striking trend contrary to prior studies: single labeling outperforms consensus methods in maximizing learner rate (relative to annotator effort). We also see surprising consistency of learning rate across noise distributions, as well as greater challenge with the adversarial case for multi-class labeling.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-5585467094888505184?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=2tGfg2jdcbU:a91ns58j9nk:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=2tGfg2jdcbU:a91ns58j9nk:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/2tGfg2jdcbU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/5585467094888505184/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/06/accepted-papers-for-hcomp-2011.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5585467094888505184?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5585467094888505184?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/2tGfg2jdcbU/accepted-papers-for-hcomp-2011.html" title="Accepted papers for the 3rd Human Computation Workshop (HCOMP 2011)" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/06/accepted-papers-for-hcomp-2011.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUNRnY8fip7ImA9WhdTEEs.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-5632533162427331699</id><published>2011-06-20T11:37:00.006-04:00</published><updated>2011-07-07T13:38:17.876-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-07T13:38:17.876-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="tagasauris" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="machine learning" /><title>Crowdsourcing and the discovery of a hidden treasure</title><content type="html">&lt;div class="separator" style="clear: both; text-align: left;"&gt;A few months back, I started advising &lt;a href="http://www.tagasauris.com/"&gt;Tagasauris&lt;/a&gt;, a company that provides media annotation services, using crowdsourcing.&amp;nbsp;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;This month, Tagasauris is featured in a &lt;a href="http://www.wired.com/"&gt;Wired&lt;/a&gt; article, titled "&lt;a href="http://www.wired.com/magazine/2011/06/pl_americangraffiti/?pid=5850"&gt;Hidden Treasure&lt;/a&gt;". It is a story of rediscovering a "lost" set of photos, from the shooting of the movie "American&amp;nbsp;Graffiti". You can see the article by clicking the image:&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;a alt="Hidden Treasure

Rediscovered: Never before seen American Graffiti photos in the Magnum archive.

IN MARCH, the Magnum photo agency stumbled onto a remarkable find: Nearly two dozen lost photos from the set of American Graffiti. The images feature pre-Star Wars George Lucas as well as cast members like Richard Dreyfuss, Mackenzie Phillips, and Ron Howard, and they offer an unparalleled look at the making of the 1973 film. So where did Magnum discover these gems? In its own archive. Magnum had hired Tagasauris, a company that tags photos using Amazon Mechanical Turk workers, to add keywords to hundreds of thousands of untagged images. When those workers came across the Graffiti photos, they quickly identified the actors, scenes, and other image details. Magnum originally hoped the phototagging would improve its archive's searchability, which it has, but the agency was also thrilled that the initiative unearthed such an incredible trove - images that visually resurrect an American classic." href="http://2.bp.blogspot.com/-6HiUV_rSzig/Tfzd3Q5nRNI/AAAAAAAAs6A/BO6WhPfEn-o/s1600/wired-tagasauris-magnum-american_graffitti_july_2011.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img alt="Hidden Treasure

Rediscovered: Never before seen American Graffiti photos in the Magnum archive.

IN MARCH, the Magnum photo agency stumbled onto a remarkable find: Nearly two dozen lost photos from the set of American Graffiti. The images feature pre-Star Wars George Lucas as well as cast members like Richard Dreyfuss, Mackenzie Phillips, and Ron Howard, and they offer an unparalleled look at the making of the 1973 film. So where did Magnum discover these gems? In its own archive. Magnum had hired Tagasauris, a company that tags photos using Amazon Mechanical Turk workers, to add keywords to hundreds of thousands of untagged images. When those workers came across the Graffiti photos, they quickly identified the actors, scenes, and other image details. Magnum originally hoped the phototagging would improve its archive's searchability, which it has, but the agency was also thrilled that the initiative unearthed such an incredible trove - images that visually resurrect an American classic." border="0" height="400" src="http://2.bp.blogspot.com/-6HiUV_rSzig/Tfzd3Q5nRNI/AAAAAAAAs6A/BO6WhPfEn-o/s400/wired-tagasauris-magnum-american_graffitti_july_2011.jpg" width="298" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
Since there are some interesting aspects of the story, which go beyond the simple "tag using MTurk" story, I would like to give a few more details that I consider interesting.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Magnum Photos&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
One of the clients of Tagasauris is &lt;a href="http://www.magnumphotos.com/"&gt;Magnum Photos&lt;/a&gt;, a cooperative &lt;a href="http://www.magnumphotos.com/C.aspx?VP=XSpecific_MAG.StaticPage_VPage&amp;amp;SP=photographers_list&amp;amp;l1=0&amp;amp;XXAPXX=SubPanel10"&gt;owned by its own photographer members&lt;/a&gt;, designated to handle the commercial aspect of their own work. The list of members of Magnum Photos include photographers such as&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Robert_Capa"&gt;Robert Capa&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/Henri_Cartier-Bresson"&gt;Henri Cartier-Bresson&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/David_Seymour"&gt;David Seymour&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/George_Rodger"&gt;George Rodger&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/Steve_McCurry"&gt;Steve McCurry&lt;/a&gt;, and&lt;a href="http://www.magnumphotos.com/C.aspx?VP=XSpecific_MAG.StaticPage_VPage&amp;amp;SP=photographers_list&amp;amp;l1=0&amp;amp;XXAPXX=SubPanel10"&gt; many others&lt;/a&gt;.&amp;nbsp;(See their &lt;a href="http://en.wikipedia.org/wiki/Magnum_Photos"&gt;Wikipedia entry&lt;/a&gt;&amp;nbsp;for further details.) A few photos in the Magnum Photos archive that you may recognize:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLU3Q07&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYDYF53IF&amp;amp;PN=3&amp;amp;CT=Search"&gt;&lt;img border="0" height="200" src="http://2.bp.blogspot.com/-UKRmgmVg5CA/TfzDd99r67I/AAAAAAAAs54/m5yU81Q3fHA/s200/SteveMcCurry-AfghanGirl.jpg" width="132" /&gt;&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLU3CWQ&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2K7O3R3AX6QL&amp;amp;PN=5&amp;amp;CT=Search"&gt;&lt;img border="0" height="200" src="http://1.bp.blogspot.com/-oYowUgGAAOM/TfzDcTzmbYI/AAAAAAAAs5s/JsvOtWa7dXI/s200/ElliottErwitt-MarilynMonroe.jpg" width="130" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLOB3F9&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYD1PEVR4&amp;amp;PN=1&amp;amp;CT=Search"&gt;&lt;img border="0" height="133" src="http://1.bp.blogspot.com/-uligbQqXNJY/Tf9iwyD2xMI/AAAAAAAAs6s/Tpro3iNask0/s200/ReneBurri-CheGuevara.jpg" width="200" /&gt;&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLU3B6X&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYDO52FJR&amp;amp;PN=5&amp;amp;CT=Search"&gt;&lt;img border="0" height="133" src="http://2.bp.blogspot.com/-fG1er5NJOp8/TfzDdWjHspI/AAAAAAAAs50/8pF7vqVK-iE/s200/RobertCapa-SpanishCivilWar.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
One of my favorite parts of the Magnum website is the &lt;a href="http://www.magnumphotos.com/C.aspx?VP=XSpecific_MAG.StaticPage_VPage&amp;amp;SP=agency_yearahead"&gt;Archival Calendar&lt;/a&gt;, where they have a set of photos showcasing various historic events. Beats Facebook browsing by a wide margin. But let's get back to the story.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The problem&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, what is the problem of Magnum Photos?&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; The same problem that almost &lt;u&gt;every single big media company faces&lt;/u&gt;:&amp;nbsp;a very large number of media objects without useful, descriptive metadata&lt;/span&gt;&lt;/b&gt;. No keywords, no description, nothing to aid the discovery process. Just the image file and mechanical data about film number etc. (Well, my own photo archive looks very similar...)&lt;br /&gt;
&lt;br /&gt;
This lack of metadata is the case not only for the archive but also for the new, incoming photos that arrive every day from its members. (To put it mildly, photographers are not exactly eager to sit, tag, and describe the hundreds of photos they shoot every day.)&amp;nbsp;This means that a large fraction of the Magnum Photos archive, which contains millions of photos,&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; is virtually unsearchable&lt;/span&gt;&lt;/b&gt;. The photos are effectively lost in the digital world, even though they are digitized and available on the Internet.&lt;br /&gt;
&lt;br /&gt;
An example of such case of "lost" photos is a set of photos from the shooting of the movie "&lt;a href="http://en.wikipedia.org/wiki/American_Graffiti"&gt;American Graffitti&lt;/a&gt;".&amp;nbsp;People at Magnum Photos &lt;i&gt;knew &lt;/i&gt;that one of their photographers, &lt;a href="http://en.wikipedia.org/wiki/Dennis_Stock"&gt;Dennis Stock&lt;/a&gt;&amp;nbsp;who died in 2009, was on set during the production of the movie, and he had taken photos of the, then young and unknown, members of the team.&amp;nbsp;Magnum Photos had no idea &lt;i&gt;where &lt;/i&gt;these photos were. &lt;b&gt;They &lt;i&gt;knew &lt;/i&gt;they digitized the archive of Dennis Stock, they &lt;i&gt;knew &lt;/i&gt;that the photos are in the archive, but nobody could locate the photos &lt;/b&gt;within the millions of other, untagged photos.&lt;br /&gt;
&lt;br /&gt;
For those unfamiliar with the movie,&amp;nbsp;American Graffiti is a 1973 film, by&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/George_Lucas"&gt;George Lucas&lt;/a&gt;&amp;nbsp;(pre-Star Wars), with starring actors the, then unknowns,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Richard_Dreyfuss"&gt;Richard Dreyfuss&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Ron_Howard"&gt;Ron Howard&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Paul_Le_Mat"&gt;Paul Le Mat&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Charles_Martin_Smith"&gt;Charles Martin Smith&lt;/a&gt;,&lt;a href="http://en.wikipedia.org/wiki/Cindy_Williams"&gt;Cindy Williams&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Candy_Clark"&gt;Candy Clark&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Mackenzie_Phillips"&gt;Mackenzie Phillips&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Harrison_Ford"&gt;Harrison Ford&lt;/a&gt;.&amp;nbsp;The latter shot to stardom of all the actors makes the movie almost a cult.&lt;br /&gt;
&lt;br /&gt;
The Magnum Photos archive is a trove of similar "hidden treasures". Sitting there, waiting for some accidental, serendipitous discovery.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The tagging solution and the machine support&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Magnum Photos had its own set of annotators. However, the annotators could not even catch up even with the volume of incoming photos. The task of going back and annotating the archive was an even more daunting task. This meant lost revenue for&amp;nbsp;Magnum Photos, as if you cannot find a photo, you cannot license it, and you cannot sell it.&lt;br /&gt;
&lt;br /&gt;
Tagasauris proposed to solve the problem using crowdsourcing. With hundreds of workers working in parallel, it became possible to tame the influx of untagged incoming photos, and start going backwards and tagging the archive.&lt;br /&gt;
&lt;br /&gt;
Of course, vanilla photo tagging is not a solution. Workers type&amp;nbsp;misspelled&amp;nbsp;words (named entities are systematic offenders), try to get away with generic tags, etc. Following the lessons learned from ESP Game, and all the subsequent studies, Tagasauris built solutions for cleaning the tags, rewarding specificity, and, in general, clean up and ensure high-quality for the noisy tagging process.&lt;br /&gt;
&lt;br /&gt;
A key component was the ability to match the tags entered by the workers with named entities, which themselves were then connected to Freebase entities.&lt;br /&gt;
&lt;br /&gt;
The result? When workers were tagging the photos from Magnum Photos, they identified the actors in the shots, and the machine process in the background assigned "semantic tags" to the photos, such as [&lt;a href="http://www.freebase.com/view/en/george_lucas"&gt;George Lucas&lt;/a&gt;],&amp;nbsp;[&lt;a href="http://www.freebase.com/view/en/richard_dreyfuss"&gt;Richard Dreyfuss&lt;/a&gt;], [&lt;a href="http://www.freebase.com/view/en/ron_howard"&gt;Ron Howard&lt;/a&gt;], [&lt;a href="http://www.freebase.com/view/en/mackenzie_phillips"&gt;Mackenzie Phillips&lt;/a&gt;],&amp;nbsp;[&lt;a href="http://www.freebase.com/view/en/harrison_ford"&gt;Harrison Ford&lt;/a&gt;] and others.&lt;br /&gt;
&lt;br /&gt;
Yes, humans + machines generate things that are better than the sum of the parts.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The machine support, cont.&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, how the workers discovered the photos from American Graffiti?&amp;nbsp;As you may imagine, the workers had no idea that the photos that they were tagging were from the shooting of the film. They could identify the actors, but that was it.&lt;br /&gt;
&lt;br /&gt;
Going from actor tagging to understanding the context of the photo shooting, is a task that cannot be required by layman, non-expert taggers.&amp;nbsp;You need experts that can "connect the dots". Unfortunately, subject experts are expensive. And they tend not to be interested in tedious tasks, such as assigning tags to photos.&lt;br /&gt;
&lt;br /&gt;
However, this "connecting the dots" is a task where machines are better than humans. We have recently seen how Watson, by having access to semantically connected ontologies (often generated by humans), could identify the correct answers to a wide variety of questions.&lt;br /&gt;
&lt;br /&gt;
Tagasauris employed a similar strategy. Knowing the entities that appear in a set of photos, it is then possible to identify additional metadata. For example, look at the five actors that were identified in the photos (red boxes, with white background), and the associated semantic graph that links the different entities together:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-Foyfqn00l90/Tfzd2-lm-nI/AAAAAAAAs58/_7nlnEs1CnM/s1600/semantic-annotation-discovery.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="385" src="http://4.bp.blogspot.com/-Foyfqn00l90/Tfzd2-lm-nI/AAAAAAAAs58/_7nlnEs1CnM/s400/semantic-annotation-discovery.jpg" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
Bingo! The entity that connects together the different entities is the entity "&lt;a href="http://www.freebase.com/view/en/american_graffiti"&gt;American Graffiti&lt;/a&gt;", which was not used by &lt;i&gt;any &lt;/i&gt;worker.&lt;br /&gt;
&lt;br /&gt;
At this point, you can understand how the story evolved. A graph activation/spreading algorithm suggests the tag, experts can verify it, and the rest is history.&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://blog.magnumphotos.com/meagan_young.html"&gt;Meagan Young&lt;/a&gt;&amp;nbsp;looked at the stream of incoming photos, noticed the American Graffiti tag, realized that the&amp;nbsp;"lost" photos were found, and she notified the others at Magnum Photos and Todd Carter, the CEO of Tagasauris.&amp;nbsp;The "hidden treasure" was identified, and the Wired story was underway...&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Crowdsourcing: It is not just about the humans&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
This is not a story to show how cool discovery based on linked entities is. This is old news for many people that work with such data. However, this &lt;i&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;is &lt;/span&gt;&lt;/b&gt;&lt;/i&gt;a &lt;i&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;simple &lt;/span&gt;&lt;/b&gt;&lt;/i&gt;example of using crowdsourcing in a more intelligent way that it is currently being used. Machines cannot do everything (in fact, they are especially bad in tasks that are "trivial" for humans) but when humans provide enough input, the machines can take it from there, and improve significantly the overall process.&lt;br /&gt;
&lt;br /&gt;
Someone can even see the obvious next step: Use face recognition and allow tagging to be done collaboratively with humans and machines. Google and Facebook have very advanced algorithms for face recognition. Match them intelligently with humans, and you are way ahead of solutions that rely simply on humans to tag faces.&lt;br /&gt;
&lt;br /&gt;
I think the lesson is clear: &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;Let humans do what they do best, and let machines do what they do best&lt;/span&gt;&lt;/b&gt;. (And expect the balance to change as we move forward and machines can do more.) Undoing and ignoring decades of research in computer science, just because it is easier to use cheap labor, is a disservice not only to computer science. It is a disservice to the potential of crowdsourcing as well.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-5632533162427331699?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=A5VftwLf91Y:Hg8LoLU13cg:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=A5VftwLf91Y:Hg8LoLU13cg:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/A5VftwLf91Y" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/5632533162427331699/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/06/crowdsourcing-and-discovery-of-hidden.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5632533162427331699?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5632533162427331699?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/A5VftwLf91Y/crowdsourcing-and-discovery-of-hidden.html" title="Crowdsourcing and the discovery of a hidden treasure" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-6HiUV_rSzig/Tfzd3Q5nRNI/AAAAAAAAs6A/BO6WhPfEn-o/s72-c/wired-tagasauris-magnum-american_graffitti_july_2011.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/06/crowdsourcing-and-discovery-of-hidden.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8ASXY9eyp7ImA9WhZVFk0.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3256121701426974704</id><published>2011-05-28T13:14:00.000-04:00</published><updated>2011-05-28T13:14:08.863-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-28T13:14:08.863-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="education" /><title>Crowdsourcing Education</title><content type="html">&lt;i&gt;This is a guest blog post by Lindsey Wright.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;hr /&gt;&lt;br /&gt;
&lt;b&gt;Crowdsourcing Education  &lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Technology and the popularity of online classes are radically transforming the educational process on both national and international scales. Society sees an increased failure of traditional schools in meeting goals to fully educate student populations to produce future generations of innovative, intellectual community leaders and workers. It is time to look at online learning and other alternative choices in education as possible solutions to complex obstacles and challenges of traditional learning routes.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Home Schooling Expands&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
As elementary and secondary schools continue to lag behind academically, home schooling movements continue to expand. &lt;a href="http://www.blogger.com/%E2%80%9Chttp://nces.ed.gov/fastfacts/display.asp?id=91%E2%80%9D"&gt; The National Household Education Survey Program &lt;/a&gt; found a 36 percent increase in home-schooled populations for reasons other than health from 2003 to 2007 and a 74 percent increase from 1999 to 2007. Parents cite a lack of moral education and safety fears as primary reasons to abandon the traditional school model. However, if parents enrolled their children in an &lt;a href="http://www.onlineschools.org/"&gt;online school&lt;/a&gt; these problems could be eliminated. For instance, overworked, tired parents would not have the additional responsibility of planning lessons and students would be held accountable for their learning. Students would also have fewer distractions to concentrate on their studies, thus eliminating discipline problems.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Who's Watching the Kids?&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Additionally, in 2002, only 7 percent of U.S. families survived on one spouse’s income, according to the Population Reference Bureau. As such, parents are spending a more substantial amount of time at work, leaving little time for parental school involvement. Overwhelmed teachers routinely contact parents primarily for discipline problems and not for building academic progress. As the primary years lay the foundation for future academic success, the educational and business community must look for new ways to transform education methods to ensure students will have the keys to higher learning opportunities and a lifetime of success. With the wealth of information found on the Internet, online classes could utilize educational Web sites and software that engage students in ways beyond the traditional classroom. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;High School Dropouts Increasing&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
According to &lt;a href="http://www.blogger.com/%E2%80%9Chttp://www.all4ed.org/files/GraduationRates_FactSheet.pdf%E2%80%9D"&gt; Valiance for Excellent Education &lt;/a&gt;, 1.2 million students annually fail to graduate from school. Yet due to the rapid growth of knowledge, businesses and industry are demanding innovative workers equipped with advanced skills in communication, mathematics and technology. Students who fail to gain these skills are less likely to earn decent salaries, forcing the government to spend more on poverty fighting programs such as Medicaid. Online classes would aid in dropout prevention by providing students with individualized instruction, allowing them to focus on gaining the practical, innovative working skills they need. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Teacher Attainment Issues&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
A study by the &lt;a href="http://www.blogger.com/%E2%80%9Chttp://www.nova.edu/ssss/QR/QR13-1/gonzalez.pdf%E2%80%9D"&gt; National Education Association &lt;/a&gt; found that 50 percent of new teachers quit teaching within a five year period, due to low salaries, heavy pressures of administrators and poor working conditions. Though teaching can be a fulfilling career, new teachers often lack the support of veteran co-workers and supervisors, leaving the new teacher lost in a maze of bureaucracy and hopelessness. This task can become overwhelming for teachers lacking support from parents, administration and the community at large. By streamlining the curriculum through online classes, teachers would be freed to devote more time to the art of teaching and have more time to spend learning from peers and gaining the support they need, eliminating the decline in teacher attrition rates.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The New Economy&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The global economy is radically different from what it was even fifty years ago, when America was a leading global power of industry and commerce. Today, international consumer and industry markets are the norm, as seen with technological leaders of Google and Microsoft, whose headquarters are not located in the United States, but in Ireland. American industries, forced by heavy government regulations, are relocating facilities to countries with friendlier business climates, such as India. A young person without a solid education will have severe difficulties in this new highly competitive job market. However the use of Web chats, videos and other learning projects will transition students into this global economy, and provide engaging activities to keep students interested. For example, instead of learning foreign languages through the traditional methods of worksheets and repetitive instruction, students could learn through interactive distance learning, eventually conversing with foreign students. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Popularity of Online classes&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Colleges are already embracing online classes, with the use of popular programs such as Blackboard. Flexibility, availability of classes anywhere in the country and streamlined instruction are benefits of this new technology. Instead of being lazily spoon-fed direction instruction by teachers and faculty members, students can take control of their education through individualized instruction with unlimited access to the Internet from the convenience of their own home. This also allows students to save time money, and gas while pursuing a bachelor’s or master’s degree. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Online Classes - The Solution?&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The future of education depends on the radical transformation of attitudes and traditional methods associated with it. These are if not changed, it is likely that the education in the U.S. will be doomed to miserable failure. With online and distance learning taking a strong hold in the education community, expansion of these services could decrease dropout rates, eliminate disciplinary problems and reduce new teacher attrition and instability. Likewise, schools would no longer have funding problems, as much could be saved by the elimination of transportation and utility costs. Parents would also be more aware of their child's academic progress and would have the added peace of mind of knowing that their child was in a safe environment. School uniforms and other required classroom supplies would also be eliminated from the family budget. &lt;br /&gt;
&lt;br /&gt;
In order for the United States to again become recognized as a global leader, we must look to online classes and distance learning as viable options to solving the issues that plague our education system. The traditional learning model is no longer functional in achieving educational goals for this country. Technology is making this revolution possible, if we believe in its potential. We cannot continue to use traditions and methods of the past if we are to educate the students of tomorrow.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3256121701426974704?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Uq4Oh9kAhb0:dKRncoCO6CU:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Uq4Oh9kAhb0:dKRncoCO6CU:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/Uq4Oh9kAhb0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3256121701426974704/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-education.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3256121701426974704?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3256121701426974704?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/Uq4Oh9kAhb0/crowdsourcing-education.html" title="Crowdsourcing Education" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-education.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08CRHk4cCp7ImA9WhZWFEw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-5502252624067659260</id><published>2011-05-13T18:02:00.020-04:00</published><updated>2011-05-14T20:37:45.738-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-14T20:37:45.738-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="incentives" /><category scheme="http://www.blogger.com/atom/ns#" term="payment" /><title>Pay Enough or Don't Pay at All</title><content type="html">&lt;div style="text-align: justify;"&gt;&lt;b&gt;No good deed goes unpunished&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;A while back, we have been working with Dahn Tamir on identifying spam tasks and requesters on the Mechanical Turk platform. Dahn took the lead and build a&amp;nbsp;task on MTurk&amp;nbsp;in which Turkers could see the (other) newly posted tasks on MTurk, and flag the obvious spam ones. Since this was not a task from which he could benefit, he asked workers to rate as many tasks as possible without submitting the task as "completed", to keep the costs down. Workers were happy to collaborate, and effectively work for free, in order to clear the market. We were collecting data nicely. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;And then, I received some minimal funding for the project ($1,000 to be exact). At that point, I thought that it would be a nice gesture to actually start paying the workers. So, we created a new task, we calibrated for time to pay around 7 dollars an hour, and we posted the task. We were expecting workers to be happy. They were doing the work for free before; now they would not only help clean the market, but they would also get paid for this!&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The result? A few positive messages with a thank-you note. But also a big backlash: "You, fat cat academic, with all the grants, you want us to work for peanuts?". "Hey, big prof, would you like to be paid minimum wage for your work?". "Yeah, we should be the slaves doing all the grunge work for your research, so that you can get the fame." &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;I was shocked. What happened? I tried to remind the workers that they were doing the same task for free before, but it did not really make a difference. Actually, it made things worse.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Market norms vs. social norms&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Then, I remembered. Dan Ariely, in this book "Predictably Irrational" has warned about this. There are the &lt;b&gt;social norms&lt;/b&gt; and the &lt;b&gt;market norms&lt;/b&gt;. When no money is involved, the exchanges operate using social norms.  Once you put a price on a task, it becomes part of a market norm. It can be measured and compared. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;When the workers were not getting paid, they were working towards a noble goal: Clean the market from the spammers. By putting a price on the task of classifying spam tasks, we essentially told the workers how much we value their work: minimum wage. Instead of offering their priceless help, they were being valued as unskilled workers, like every other worker in the market. &lt;b&gt;Money and altruism do not mix.&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Somebody must have studied that before&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Needless to say, examining the influence of money on performance and motivation is not a new topic. A wonderful paper that deals is the "&lt;a href="http://www.jstor.org/stable/2586896"&gt;Pay Enough or Don't Pay at All&lt;/a&gt;" by Gneezy and Rustichini, published back in 2000, titled  (625 citations so far, according to Google Scholar). Instead of trying to describe the paper myself, I will just list here the succinct abstract:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;Economists usually assume that monetary incentives improve performance, and psychologists claim that the opposite may happen. We present and discuss a set of experiments designed to test these contrasting claims. We found that the effect of monetary compensation on performance was not monotonic. In the treatments in which money was offered, a larger amount yielded a higher performance. However, offering money did not always produce an improvement: subjects who were offered monetary incentives performed more poorly than those who were offered no compensation. Several possible interpretations of the results are discussed.&lt;/div&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;I would encourage anyone to read the paper, as it contains extensive discussion of various models and explanations. I will definitely do no justice if I claim that I covered fully the content of the paper here. However, I would like to highlight some parts below.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Gneezy and Rustichini extended research in psychology from the 1970's, which examined the difference between intrinsic and extrinsic motivation.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;Psychologists study behavior modification through conditioning (in the  case  of the behaviorist school)  or learning (for the cognitive school). We do not. To illustrate the difference, we may consider the classic experiment reported in &lt;a href="http://www.psych.wdev.rochester.edu/SDT/documents/1971_Deci.pdf"&gt;Deci [1971]&lt;/a&gt;. He  had college  students play with a puzzle in three successive sessions. In the  first session participants were left to play freely. In the second  session subjects in one group received payment if they solved the  puzzle, while the control group did not. In a  third session the  subjects were again left to play freely. The amount of time spent on free activity in the first and  third session was  taken as  a measure  of intrinsic motivation.&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt; &lt;/span&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;Deci  found that in the third  session the experimental group spent less time than the control  group playing with the puzzle, and he concluded that the reward offered had decreased the intrinsic motivation of subjects in the  first group over the three sessions.&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;That was a result from research in the 1970's. Gneezy and Rustichini wanted to also examine the effect of money in non-sequential environments. So, they conducted the following experiments:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Effect of &lt;i&gt;additional&lt;/i&gt; payment on a paid task&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;At the beginning of the experiment, &lt;b&gt;each student was promised a fixed payment of NIS  60 for participation&lt;/b&gt;. (NIS = New Israeli Shekel, at the time of the experiment, 3.5 NIS = $1.) They were then told that the experiment would take 45 minutes, and they would be asked to answer a quiz consisting of 50 problems taken out of a psychometric test used to scan applicants to the university. [...] In  the four different treatments subjects were  promised different additional payments for each correct answer.&lt;b&gt; &lt;br /&gt;
&lt;/b&gt;&lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the first  group no mention was made of any additional payment. &lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the second group subjects were promised an additional payment of 0.1 NIS per question answered correctly.&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the third group subjects were promised an additional payment of 1 NIS per question answered correctly and&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the fourth group subjects were promised an additional payment of 3 NIS per question answered correctly&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt; [...]  The average number of correct answers was: &lt;/span&gt;&lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;  28.4 in the first group (no additional payment)&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; 23.1 in the second group (additional&amp;nbsp;0.1 NIS per  correct answer).&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; 34.7 in the third group,&amp;nbsp;(additional 1 NIS per correct answer).&amp;nbsp; &lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; 34.1 in the fourth group,&amp;nbsp;(additional&amp;nbsp;3 NIS per correct answer).&amp;nbsp;&lt;/span&gt; &lt;/li&gt;
&lt;/ul&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;In other words, performance-based payment improved performance. But offering just a small additional financial incentive, actually decreased performance compared to the case of providing no financial incentives.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Effect of payment on &lt;i&gt;unpaid&lt;/i&gt; tasks&lt;/b&gt;  &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;We  had  180  high-school students around  the  age  of 16 participating with three treatment levels [collecting money for a charitable purpose]....&lt;/div&gt;&lt;ul&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the first treatment, the students were told about the importance of collecting money for the society, that the results of the collection would be published, so that the amount collected by each pair would become public knowledge.&amp;nbsp;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the second treatment, after the same speech, each pair was promised 1 percent of the amount that the two of them collected.&amp;nbsp;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the third treatment, each pair was promised 10 percent of the amount they collected.&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;div style="text-align: justify;"&gt;In the second and third treatments it was made clear that the  payment was made from funds additional to the donation, provided by the researchers. The average amount collected was: &lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;238.67 for groups in the first treatment (with no payment). &lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;153.67 in the second group (1 percent of the collected amount). &lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;219.33 in the third group (10 percent of the collected amount). &lt;/span&gt;&lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;In this case, where there was no initial payment and the task had an altruistic purpose, &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;providing financial incentives actually reduced performance.  &lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;b&gt;Additional literature&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div&gt;&lt;div style="text-align: justify;"&gt;There is significant literature for anyone interested (thanks &lt;a href="http://pages.stern.nyu.edu/~ptambe/"&gt;Sonny&lt;/a&gt;!). A few pointers to start:&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;a href="http://www.amazon.com/Gift-Relationship-Human-Social-Policy/dp/1565844033"&gt;The Gift Relationship: From Human Blood to Social Policy&lt;/a&gt;&amp;nbsp;(or why paying for blood donations decreases&amp;nbsp;willingness&amp;nbsp;to donate)&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;a href="http://www.jstor.org/stable/10.1086/468061"&gt;A Fine is a Price&lt;/a&gt;&amp;nbsp;(or how a day-care, by penalizing late-coming parents, increased significantly the number of late-coming parents)&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;a href="http://www.jstor.org/stable/2951373"&gt;The Cost of Price Incentives: An Empirical Analysis of Motivation Crowding-Out&lt;/a&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;a href="http://papers.ssrn.com/sol3/papers.cfm?abstract_id=229047"&gt;Do Incentive Contracts Crowd Out Voluntary Cooperation?&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;/div&gt;&lt;div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Conclusions&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Essentially, Gneezy and Rustichini  found that:  &lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Paying more indeed increases performance, compared to paying less.&amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;However, paying nothing may actually be better than paying!&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;div style="text-align: justify;"&gt;Section IV of the paper has a very nice discussion on how to interpret and model the process. Here are a few explanations in increasing power of explaining the observed phenomenon:  &lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Paying something removes the intrinsic motivation for a task, and&amp;nbsp;&lt;u&gt;replaces&lt;/u&gt;&amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;it with the external motivation for money.&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Incomplete contract: the piece-wise or performance-based payment &lt;u&gt;changes the original meaning of the contract&lt;/u&gt;, which implied  that high-performance is part of the task.&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Paying small amounts compared to the originally implied value of the task &lt;u&gt;devalues the task&lt;/u&gt; (e.g., take back a glass bottle to help recycling vs. for getting 5 cents back)&lt;br /&gt;
&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Relevance to crowdsourcing&lt;/b&gt;&amp;nbsp; &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;I found the results pretty interesting, with significant implications for micro-crowdsourcing. While volunteers may be great for various tasks (e.g., in &lt;a href="http://en.wikipedia.org/wiki/Citizen_science"&gt;citizen science&lt;/a&gt; applications, such as the &lt;a href="http://en.wikipedia.org/wiki/Galaxy_Zoo"&gt;Galaxy Zoo&lt;/a&gt;), migrating such applications to a paid crowdsourcing application may have a significant downside. Paying small rewards to workers will be counterproductive. The work of volunteers is, indeed, priceless.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Furthermore, with the low level of payments on Mechanical Turk, we are stuck at the worst possible status. &lt;b&gt;We pay, and we do not pay enough.&amp;nbsp;&lt;/b&gt;But how can we pay more, when every attempt to increase the price to reasonable levels is followed by attempts of scammers to game the system and get paid for doing nothing?&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-5502252624067659260?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=fyN4i8fXNIg:BuFC0Y-ubyw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=fyN4i8fXNIg:BuFC0Y-ubyw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/fyN4i8fXNIg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/5502252624067659260/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/05/pay-enough-or-dont-pay-at-all.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5502252624067659260?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5502252624067659260?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/fyN4i8fXNIg/pay-enough-or-dont-pay-at-all.html" title="Pay Enough or Don't Pay at All" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/05/pay-enough-or-dont-pay-at-all.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ak4MRXg7fSp7ImA9WhZWEEs.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1847137548481255471</id><published>2011-05-10T19:08:00.001-04:00</published><updated>2011-05-10T19:09:44.605-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-10T19:09:44.605-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="www2011" /><category scheme="http://www.blogger.com/atom/ns#" term="slides" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="reputation" /><category scheme="http://www.blogger.com/atom/ns#" term="online labor" /><category scheme="http://www.blogger.com/atom/ns#" term="assembly line" /><title>Crowdsourcing: Lessons from Henry Ford</title><content type="html">Last month, during the WWW2011 conference, I participated in the panel "&lt;a href="http://portal.acm.org/citation.cfm?id=1963335"&gt;The computer is the new sewing machine: benefits and perils of crowdsourcing&lt;/a&gt;"&lt;br /&gt;
&lt;br /&gt;
Below you can find my slides:&lt;br /&gt;
&lt;br /&gt;
&lt;center&gt;&lt;div style="width:425px" id="__ss_7884697"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/ipeirotis/crowdsourcing-lessons-from-henry-ford" title="Crowdsourcing: Lessons from Henry Ford"&gt;Crowdsourcing: Lessons from Henry Ford&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/7884697" width="425" height="355" frameborder="0" marginwidth="0" marginheight="0" scrolling="no"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;/center&gt;&lt;br /&gt;
&lt;br /&gt;
The main points that I wanted to make:&lt;br /&gt;
&lt;div&gt;&lt;ul&gt;&lt;li&gt;It is common to consider crowdsourcing as the "assembly line for knowledge work" and think of the workers as simple cogs in a big machine.&amp;nbsp;It is almost a knee-jerk reaction to think negatively about the concept. However, it was the proper use of the assembly line (together with the proper automation) by Henry Ford that&lt;b&gt; led to the first significant improvement in the level of living for the masses&lt;/b&gt;.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Crowdsourcing suffers a lot due to significant worker turnover: Everyone who experimented with large tasks on MTurk knows that the participation distribution is very skewed. A few workers contribute the majority of the work, while a large number of workers contribute only minimally. &lt;b&gt;Dealing with these hit-and-run workers is a pain, as we cannot apply any statistically meaningful mechanism for quality control.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;We ignore the fact that workers give back what they are given. Pay peanuts, get monkeys. &lt;b&gt;Pay well, and get good workers&lt;/b&gt;. Needless to say, reputation and other quality signaling mechanisms are of fundamental importance for this task.&lt;/li&gt;
&lt;li&gt;Keeping the same workers around can give significant improvements in quality. Today on MTurk we have a tremendous turnover of workers, wasting significant effort and efficiencies. &lt;b&gt;Whomever builds a strong base of a few good workers can pay the workers much better and, at the same time, generate a better product for lower cost&lt;/b&gt; than relying on an army of inexperienced, noisy workers.&lt;/li&gt;
&lt;/ul&gt;&lt;div&gt;Yes, at the end, crowdourcing is not about the crowd. It is about the individuals in the crowd. And we can now search for these valuable individuals very effectively. Crowd&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;b&gt;sourcing&lt;/b&gt;&lt;/span&gt; is crowd&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;searching&lt;/span&gt;&lt;/b&gt;.&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1847137548481255471?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=jaSVlG-j8fI:PzSy2eBbpDw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=jaSVlG-j8fI:PzSy2eBbpDw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/jaSVlG-j8fI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1847137548481255471/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-lessons-from-henry-ford.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1847137548481255471?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1847137548481255471?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/jaSVlG-j8fI/crowdsourcing-lessons-from-henry-ford.html" title="Crowdsourcing: Lessons from Henry Ford" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-lessons-from-henry-ford.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEUNRHc9fCp7ImA9WhZRGUw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3771063621543204461</id><published>2011-04-15T20:04:00.000-04:00</published><updated>2011-04-15T20:04:55.964-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-15T20:04:55.964-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><category scheme="http://www.blogger.com/atom/ns#" term="hcomp" /><title>Deadline for HCOMP 2011 extended: Submission due on April 29th</title><content type="html">Due to a significant number of requests, and a number of conflicts with other conferences and workshops, we decided to extend the submission deadline for &lt;a href="http://www.humancomputation.com/"&gt;HCOMP 2011&lt;/a&gt;. The &lt;b&gt;new deadline is April 29th&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
If you want to know more, you can see the &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/02/3rd-human-computation-workshop-hcomp.html"&gt;call for papers and workshop announcement&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3771063621543204461?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=kB_fYsNcSbI:HWayrRtrU4g:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=kB_fYsNcSbI:HWayrRtrU4g:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/kB_fYsNcSbI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3771063621543204461/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/deadline-for-hcomp-2011-extended.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3771063621543204461?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3771063621543204461?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/kB_fYsNcSbI/deadline-for-hcomp-2011-extended.html" title="Deadline for HCOMP 2011 extended: Submission due on April 29th" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/deadline-for-hcomp-2011-extended.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkANRnY8cCp7ImA9WhZQEkg.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-2822764788019891983</id><published>2011-04-15T11:29:00.003-04:00</published><updated>2011-04-19T20:19:57.878-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-19T20:19:57.878-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><title>Video from NYC Crowdsourcing Meetup</title><content type="html">On April 13th, we hosted at NYU Stern the &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/04/nyc-crowdsourcing-meetup-april-13th.html"&gt;NYC Crowdsourcing Meetup&lt;/a&gt;.&amp;nbsp;For those who missed it, you can now download an&amp;nbsp;&lt;a href="http://echo360.stern.nyu.edu:8080/ess/echo/presentation/3cca744c-8dc7-4c0d-9564-d8868fe3f177/media.mp3"&gt;audio-only podcast version&lt;/a&gt;, see &lt;a href="http://sterntv.stern.nyu.edu:8080/ramgen/faculty/panos/398259/041311-panos-398259.rm"&gt;the online video&lt;/a&gt;,&amp;nbsp;or watch the&amp;nbsp;&lt;a href="http://echo360.stern.nyu.edu:8080/ess/echo/presentation/3cca744c-8dc7-4c0d-9564-d8868fe3f177"&gt;video from the event together with the slide presentations&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://echo360.stern.nyu.edu:8080/ess/echo/presentation/3cca744c-8dc7-4c0d-9564-d8868fe3f177"&gt;&lt;img border="0" height="313" src="http://4.bp.blogspot.com/-jFmqv6pObxM/TahjhbKIWBI/AAAAAAAAlu0/Y9GNVKlqIZM/s320/crowdsourcing-meetup-video-snapshot.PNG" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
The speakers at the event:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;John Horton&lt;/b&gt;, Staff Economist of oDesk. John talked on issues of matching employers with contractors in an online marketplace. Specifically he described mechanisms for forcing contractors to give an accurate description of their skills, avoiding issues of over-tagging a profile with irrelevant keywords or over-claiming qualifications.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Amanda Michel&lt;/b&gt;, Director of Distributed Reporting at ProPublica. Amanda talked about the crowdsourcing efforts of ProPublica, and how they use the crowd to enable better journalistic investigation of topics they are researching. At some point during the presentation, Amanda quoted from &lt;a href="http://www.propublica.org/article/our-stimulus-spot-check-summer-wave-of-projects-nears-crest-817"&gt;one of their studie&lt;/a&gt;s "&lt;i&gt;ProPublica pulled a random sample of 520 of the roughly 6,000 approved projects to examine stimulus progress around the country. That sample is large enough to estimate national patterns with a margin of error of plus or minus 4.5 percentage points&lt;/i&gt;." Honestly, a tear came down my eye when I compared that with the corresponding practices of Greek newsrooms that typically operate with samples of n=1 or n=0.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Todd Carter&lt;/b&gt;, CEO and Co-Founder of Tagasauris. Todd described Tagasauris, a system for annotating and tagging media files. Todd described the annotation effort for &lt;a href="http://www.magnumphotos.com/"&gt;Magnum Photos&lt;/a&gt;, (sample photos in their collection include the &lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXHAFXW3&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYDYF53IF&amp;amp;PN=8&amp;amp;CT=Search"&gt;Afghan refugee girl&lt;/a&gt;, &lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXHAQ5TH&amp;amp;CT=Search&amp;amp;DT=Image"&gt;Merilyn Monroe on top of the vent&lt;/a&gt;, and many other iconic photos). A highlight was the discovery of a "lost" set of images from the shooting of the movie "American Graffiti". These images, shot by Dennis Stock, were in the Magnum archive but were not possible to find as they were lacking any tags and description. After the annotation effort from Tagasauris, the lost set of photos &lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXHARHQF&amp;amp;CT=Search&amp;amp;DT=Image"&gt;were re-discovered&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Panos Ipeirotis&lt;/b&gt;, representing AdSafe Media. I talked about our efforts in AdSafe, on using crowdsourcing in order to create machine learning systems for classifying web pages.&lt;/li&gt;
&lt;/ul&gt;&lt;div&gt;It was a lively and&amp;nbsp;successful&amp;nbsp;event. If there is enough interest and participants, I think this is an event that can be repeated periodically.&lt;/div&gt;&lt;a href="http://www.blogger.com/"&gt;&lt;/a&gt;&lt;span id="goog_1773302289"&gt;&lt;/span&gt;&lt;span id="goog_1773302290"&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-2822764788019891983?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VedNdDFx-Mo:c-I0qJ6OG14:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VedNdDFx-Mo:c-I0qJ6OG14:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/VedNdDFx-Mo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/2822764788019891983/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/video-from-nyc-crowdsourcing-meetup.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2822764788019891983?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2822764788019891983?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/VedNdDFx-Mo/video-from-nyc-crowdsourcing-meetup.html" title="Video from NYC Crowdsourcing Meetup" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-jFmqv6pObxM/TahjhbKIWBI/AAAAAAAAlu0/Y9GNVKlqIZM/s72-c/crowdsourcing-meetup-video-snapshot.PNG" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/video-from-nyc-crowdsourcing-meetup.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkINR38-fCp7ImA9WhZRFEk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1156336514512612042</id><published>2011-04-10T11:16:00.000-04:00</published><updated>2011-04-10T11:16:36.154-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-10T11:16:36.154-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="meetup" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="online labor" /><title>NYC Crowdsourcing Meetup: April 13th, 6.30pm</title><content type="html">Join us for its first ever &lt;a href="http://www.meetup.com/Distributed-Work/events/17149053/"&gt;New York City Crowdsourcing meetup&lt;/a&gt; hosted by NYU and sponsored by &lt;a href="http://crowdflower.com/"&gt;CrowdFlower&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;When&lt;/b&gt;: Wednesday, April 13,&amp;nbsp;6:30-9pm&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Where&lt;/b&gt;: NYU Stern School of Business,&amp;nbsp;Room M3-110,&amp;nbsp;&lt;a href="http://goo.gl/maps/rVxm"&gt;44 West 4th Street, New York, NY 10012&lt;/a&gt;&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
Pizza, beer, and thought provoking conversation about the future of work.&amp;nbsp;Come listen, ask, and debate how crowdsourcing is changing everything from philanthropy and urban planing to creative design and enterprise solutions.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Confirmed Speakers:&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Lukas Biewald, CEO and Co-Founder of CrowdFlower&lt;/li&gt;
&lt;li&gt;Todd Carter, CEO and Co-Founder of Tagasauris&lt;/li&gt;
&lt;li&gt;John Horton, Chief Economist of oDesk&lt;/li&gt;
&lt;li&gt;Panos Ipeirotis, Associate Professor at Stern School of Business, NYU&lt;/li&gt;
&lt;li&gt;Amanda Michel, Director of Distributed Reporting at ProPublica&lt;/li&gt;
&lt;li&gt;Bartek Ringwelski, CEO and Co-Founder of SkillSlate&lt;/li&gt;
&lt;li&gt;Trebor Scholz, Associate Professor in Media &amp;amp; Culture at The New School University&lt;/li&gt;
&lt;/ul&gt;&lt;div&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1156336514512612042?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=OGw0IhDLU7M:WmJWQttM9XE:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=OGw0IhDLU7M:WmJWQttM9XE:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/OGw0IhDLU7M" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1156336514512612042/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/nyc-crowdsourcing-meetup-april-13th.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1156336514512612042?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1156336514512612042?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/OGw0IhDLU7M/nyc-crowdsourcing-meetup-april-13th.html" title="NYC Crowdsourcing Meetup: April 13th, 6.30pm" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/nyc-crowdsourcing-meetup-april-13th.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0cBQXg9cCp7ImA9WhZREEU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-9006676185953191494</id><published>2011-04-05T16:08:00.003-04:00</published><updated>2011-04-06T07:24:10.668-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-06T07:24:10.668-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="surveys" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="tutorial" /><category scheme="http://www.blogger.com/atom/ns#" term="computer science" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><title>Tutorial on Crowdsourcing and Human Computation</title><content type="html">Last week, together with Praveen Paritosh from Google, we presented a 6-hour tutorial at the WWW 2011 conference, on crowdsourcing and human computation. The title of the tutorial was "&lt;a href="http://www.www2011india.com/tutorialstr26.html"&gt;Managing Crowdsourced Human Computation&lt;/a&gt;".&lt;br /&gt;
&lt;br /&gt;
My slides from the tutorial are &lt;a href="http://www.slideshare.net/ipeirotis/managing-crowdsourced-human-computation"&gt;available now on Slideshare&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;center&gt;&lt;br /&gt;
&lt;div style="text-align: center;"&gt;&lt;div id="__ss_7526103" style="width: 425px;"&gt;&lt;iframe frameborder="0" height="355" marginheight="0" marginwidth="0" scrolling="no" src="http://www.slideshare.net/slideshow/embed_code/7526103" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;/div&gt;&lt;/center&gt;&lt;br /&gt;
&lt;br /&gt;
Once Praveen gets clearance from Google, we will post his slides as well. &lt;br /&gt;
&lt;br /&gt;
Judging from all the crap that I get to review lately, I was getting pessimistic about the quality of research on crowdsourcing. However, while preparing the tutorial, I realized the massive amount of high-quality research that is being published. We had 6 hours for the tutorial, and we did not have enough time to cover many really interesting papers. I had to refer people to other, more "specialized" tutorials (e.g., on linguistic annotation, on search relevance, etc), which I mention at the end of the slides.&lt;br /&gt;
&lt;br /&gt;
Special thanks go to my PhD student, &lt;a href="http://pages.stern.nyu.edu/~jwang5/"&gt;Jing Wang&lt;/a&gt;, for her slides on market design, &lt;a href="http://www.ischool.utexas.edu/~ml/"&gt;Matt Lease&lt;/a&gt; for his excellent &lt;a href="http://ir.ischool.utexas.edu/crowd/"&gt;list of pointers for crowdsourcing resources&lt;/a&gt;, &lt;a href="http://wwwcsif.cs.ucdavis.edu/~alonsoom/"&gt;Omar Alonso&lt;/a&gt; for his tutorial slides on crowdsourcing for search relevance, &lt;a href="http://alexquinn.org/"&gt;Alex Quinn&lt;/a&gt; and &lt;a href="http://www.cs.umd.edu/~bederson/"&gt;Ben Bederson&lt;/a&gt; for their survey on human computation, and Winter Mason for sharing his slides from his CSDM keynote. And all the other researchers for making crowdsourcing and human computation an exciting field for research!&lt;br /&gt;
&lt;br /&gt;
Last but not least: Luis von Ahn with Edith Law will be presenting another tutorial on human computation during AAAI, in San Francisco on August 8th. We will be organizing the &lt;a href="http://humancomputation.com/"&gt;HCOMP 2011 workshop&lt;/a&gt; in conjunction with AAAI as well! The submission deadline is April 22nd! Do not forget to submit!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-9006676185953191494?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=7g3ltXQm4YI:Zt38x7lnCAw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=7g3ltXQm4YI:Zt38x7lnCAw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/7g3ltXQm4YI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/9006676185953191494/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/tutorial-on-crowdsourcing-and-human.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/9006676185953191494?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/9006676185953191494?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/7g3ltXQm4YI/tutorial-on-crowdsourcing-and-human.html" title="Tutorial on Crowdsourcing and Human Computation" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/tutorial-on-crowdsourcing-and-human.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEQASXo4fSp7ImA9WhRRFUw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6882121483395717753</id><published>2011-04-05T08:41:00.011-04:00</published><updated>2011-11-28T16:39:08.435-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-28T16:39:08.435-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><title>An ingenious application of crowdsourcing: Fix reviews' grammar, improve sales</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
I have been doing &lt;a href="http://pages.stern.nyu.edu/~panos/publications/kdd2007.pdf"&gt;research&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/icis2010.pdf"&gt;on&lt;/a&gt; the &lt;a href="http://pages.stern.nyu.edu/~panos/publications/jrpm2009.pdf"&gt;economic&lt;/a&gt; &lt;a href="http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1024903"&gt;impact&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/tkde2010-usefulness.pdf"&gt;of&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/icec2007.pdf"&gt;product&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/acl2007.pdf"&gt;reviews&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/www2011.pdf"&gt;for a&lt;/a&gt; &lt;a href="http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&amp;amp;Sect2=HITOFF&amp;amp;p=1&amp;amp;u=/netahtml/PTO/search-bool.html&amp;amp;r=1&amp;amp;f=G&amp;amp;l=50&amp;amp;co1=AND&amp;amp;d=PTXT&amp;amp;s1=7848979.PN.&amp;amp;OS=PN/7848979&amp;amp;RS=PN/7848979"&gt;while&lt;/a&gt;. One thing that we have noticed is that the quality of the reviews can have an impact on product sales,&lt;i&gt; independently of the polarity of the review&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;High-quality reviews improve product sales&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
A well-written review tends to inspire confidence about the product, even if the review is negative. Typically, such reviews are perceived as objective and thorough. If we have a high-quality &lt;i&gt;negative &lt;/i&gt;review this may serve as a guarantee that the negative aspects of the product are not that bad after all. For example, a negative review, such as "&lt;i&gt;horrible battery life... in my tests battery lasts barely longer than 24 hours,&lt;/i&gt;" may be perceived as positive  by other customers that consider a 24-hour battery life to be more than sufficient.&lt;br /&gt;
&lt;br /&gt;
In our recent (&lt;a href="http://www.www2011india.com/"&gt;award-winning&lt;/a&gt;) &lt;a href="http://pages.stern.nyu.edu/~panos/publications/www2011.pdf"&gt;WWW2011 paper "Towards a Theory Model for Product Search"&lt;/a&gt;&amp;nbsp;(with &lt;a href="http://pages.stern.nyu.edu/~bli/"&gt;Beibei Li&lt;/a&gt; and &lt;a href="http://people.stern.nyu.edu/aghose/"&gt;Anindya Ghose&lt;/a&gt;), we noticed that demand for a hotel increases if the reviews on TripAdvisor and Travelocity are well-written, without spelling errors; this holds no matter if the review is positive or negative. In our &lt;a href="http://pages.stern.nyu.edu/~panos/publications/tkde2010-usefulness.pdf"&gt;TKDE paper "Estimating the Helpfulness and Economic Impact of Product Reviews: Mining Text and Reviewer Characteristics"&lt;/a&gt;&amp;nbsp;&amp;nbsp;(with&amp;nbsp;&lt;a href="http://people.stern.nyu.edu/aghose/"&gt;Anindya Ghose&lt;/a&gt;), we observed similar trends for products sold and reviewed on Amazon.com.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;And what can we do knowing this?&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Being in a business school, these findings were considered informative but not deeply interesting. Do not forget, the focus of researchers in business schools is &lt;a href="http://behind-the-enemy-lines.blogspot.com/2009/09/different-attitudes-of-computer.html"&gt;centered on causality and on policy-making&lt;/a&gt;. Yes, we now know that it is important for the reviews to be well-written and informative, if we want the product to sell well. But if we cannot do anything about this, it is not deeply interesting. It is almost like knowing that during the cold months the demand for summer resorts drops!&lt;br /&gt;
&lt;br /&gt;
But here comes the twist...&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;The crowdsourcing solution&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Last week, over drinks during the WWW conference, I learned about a fascinating application of crowdsourcing that attacked exactly this issue.&lt;br /&gt;
&lt;br /&gt;
An online retailer noticed that, indeed, products with high-quality reviews are selling well. So, they decided to take action. The retailer used Amazon Mechanical Turk to improve the quality of the reviews posted on its own website. Using the &lt;a href="http://groups.csail.mit.edu/uid/other-pubs/soylent.pdf"&gt;Find-Fix-Verify pattern&lt;/a&gt;, the retailed used Mechanical Turk to examine &lt;b&gt;&lt;i&gt;millions &lt;/i&gt;&lt;/b&gt;of product reviews. (Here are the archived versions of the HITs: &lt;a href="http://mturk-tracker.com/hit/79f44798e8c296e29bddbd3a3aa8f60a/"&gt;Find&lt;/a&gt;, &lt;a href="http://mturk-tracker.com/hit/160L5FUZB7DDN03NIAB2D4J8HRZ022/"&gt;Fix&lt;/a&gt;, &lt;a href="http://mturk-tracker.com/hit/1QPDXYWLI7K28IZ6MAP7WEO0N6V03B/"&gt;Verify&lt;/a&gt;. And if you have not figured out the firm name by now, the retailer is Zappos.) For the reviews with mistakes, &lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;they fixed the spelling and grammar errors! Thus they effectively improved the quality of the reviews on their website. And, correspondingly, they improved the demand for their products.&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;br /&gt;
&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;span class="Apple-style-span" style="color: black; font-style: normal; font-weight: normal;"&gt;For the curious readers, Zappos has been doing this&amp;nbsp;&lt;a href="http://mturk-tracker.com/hit/e39ec137599fd6adbdf44a18ab53054a/"&gt;at least since April of 2009&lt;/a&gt;, which means that they were doing it even before being bought by Amazon.&lt;/span&gt;&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
While I do not know the exact revenue improvement, I was told that it was substantial. Given that Zappos spent at least 10 cents per review, and that they examined approximately 5 million reviews, this is an expense of a few hundred thousand dollars. (My archive on MTurk-Tracker kind of confirms these numbers.) So, the expected revenue improvement should have been at least a few million dollars for this exercise to make sense.&lt;br /&gt;
&lt;br /&gt;
Ethical? Notice that they are not fixing the polarity or the content of the reviews. They just change the language to be correct and error-free. I can see the counter-argument that the writing style allows us to judge if the review is serious or not. So, artificially improving the writing style may be considered as interference with the perceived objectivity of the user-generated reviews. I still consider it fine to change the grammar, from the ethics point of view.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;But is it ingenious? A resounding yes!&lt;/span&gt;&lt;/b&gt; It is one of these solutions that is sitting in front of you but you just cannot see it. And this is what makes it ingenious.&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6882121483395717753?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=suZr4PVOpB0:deP2qE04G8E:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=suZr4PVOpB0:deP2qE04G8E:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/suZr4PVOpB0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6882121483395717753/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/want-to-improve-sales-fix-grammar-and.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6882121483395717753?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6882121483395717753?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/suZr4PVOpB0/want-to-improve-sales-fix-grammar-and.html" title="An ingenious application of crowdsourcing: Fix reviews' grammar, improve sales" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/want-to-improve-sales-fix-grammar-and.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0QMQXcyfSp7ImA9WhZREkQ.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-4546172668884835093</id><published>2011-03-22T15:25:00.007-04:00</published><updated>2011-04-08T16:43:00.995-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-08T16:43:00.995-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="industry analysis" /><category scheme="http://www.blogger.com/atom/ns#" term="efficient markets" /><title>Crowdsourcing goes professional: The rise of the verticals</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;Over the last few months, I see a trend. Instead of letting end-users interact directly with the crowd (e.g., on Mechanical Turk), we see a rise of the number of solutions that target a very specific vertical.&lt;br /&gt;
&lt;ul style="text-align: left;"&gt;&lt;li&gt;&lt;a href="http://castingwords.com/"&gt;CastingWords&lt;/a&gt;: Audio transcription&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.speakertext.com/"&gt;SpeakerText&lt;/a&gt;: Video transcription&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.serv.io/translation"&gt;Serv.io Translate&lt;/a&gt;: Translation (by &lt;a href="http://www.cloudcrowd.com/"&gt;CloudCrowd&lt;/a&gt;)&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.serv.io/edit"&gt;Serv.io Edit&lt;/a&gt;: Proofreading (by &lt;a href="http://www.cloudcrowd.com/"&gt;CloudCrowd&lt;/a&gt;)&lt;/li&gt;
&lt;li&gt;&lt;a href="http://crowdflower.com/"&gt;CrowdFlower&lt;/a&gt;&amp;nbsp;&lt;a href="http://crowdflower.com/solutions/blv/index"&gt;Business Listing Verification&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://crowdflower.com/"&gt;CrowdFlower&lt;/a&gt;&amp;nbsp;&lt;a href="http://crowdflower.com/solutions/search_rel/index"&gt;Search Relevance&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://crowdflower.com/"&gt;CrowdFlower&lt;/a&gt;&amp;nbsp;&lt;a href="http://crowdflower.com/solutions/prod_cat/index"&gt;Product Categorization&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.betterocr.com/"&gt;BetterOCR&lt;/a&gt;: Improving Optical Character Recognition&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.tagasauris.com/"&gt;Tagasauris&lt;/a&gt;: Photo tagging&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.mediapiston.com/"&gt;MediaPiston&lt;/a&gt;:&amp;nbsp;Targeted content generation&lt;/li&gt;
&lt;/ul&gt;&lt;div&gt;Add services like &lt;a href="http://www.trada.com/"&gt;Trada&lt;/a&gt;&amp;nbsp;for crowd-optimizing paid advertising campaigns, &lt;a href="http://www.utest.com/"&gt;uTest&lt;/a&gt;&amp;nbsp;for crowd-testing software applications, etc. and you will see that for most crowd applications there is now a professionally developed crowd-app.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div&gt;Why do we see these efforts? This is the time that most people realize that crowdsourcing is not that simple. Using Mechanical Turk directly &lt;a href="http://engineeringblog.yelp.com/2011/02/towards-building-a-high-quality-workforce-with-mechanical-turk.html"&gt;is a very costly enterprise&lt;/a&gt; and &lt;b&gt;c&lt;i&gt;annot be done effectively by amateurs&lt;/i&gt;&lt;/b&gt;: The interface needs to be professionally designed, quality control needs to be done intelligently, and the crowd needs to be managed in the same way that any employee is managed.&amp;nbsp;Most companies do not have time or the resources to invest in such solutions. So, we see the rise of such verticals that address the most common tasks that were accomplished on Mechanical Turk.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;
&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;(Interestingly enough, if I remember correctly, the rise of vertical solutions was also a phase during web search. In the period in which AltaVista started being spammed and full of irrelevant results, we saw the rise of topic-specific search engines that were trying to eliminate the problems of polysemy by letting you search only for web pages within a given topic.)&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;/div&gt;&lt;div&gt;For me, this is the signal that &lt;b&gt;crowdsourcing will stop being the fad of the day&lt;/b&gt;. Amateurish solutions will be shunned, and most people will find it cheaper to just use the services of the verticals above. Saying "oh, I paid just $[&lt;i&gt;add offensively low dollar amount&lt;/i&gt;] to do [&lt;i&gt;add trivial task&lt;/i&gt;] on Mechanical Turk" will stop being a novelty and people will just point to a company that does the same thing&amp;nbsp;professionally&amp;nbsp;and in a large scale.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div&gt;This also means that &lt;b&gt;the crowdsourcing space will become increasingly "boring."&lt;/b&gt; All the low-hanging fruits will be gone. Only people that are willing to invest time and effort in the long term will get into the space.&amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div&gt;And it will be the time that we will get to separate the wheat from the chaff.&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-4546172668884835093?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=L7tYILqlaws:khoaqt_0ea8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=L7tYILqlaws:khoaqt_0ea8:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=L7tYILqlaws:khoaqt_0ea8:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=L7tYILqlaws:khoaqt_0ea8:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=L7tYILqlaws:khoaqt_0ea8:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=L7tYILqlaws:khoaqt_0ea8:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=L7tYILqlaws:khoaqt_0ea8:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/L7tYILqlaws" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/4546172668884835093/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/03/crowdsourcing-goes-professional-rise-of.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4546172668884835093?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4546172668884835093?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/L7tYILqlaws/crowdsourcing-goes-professional-rise-of.html" title="Crowdsourcing goes professional: The rise of the verticals" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/03/crowdsourcing-goes-professional-rise-of.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0AFSH84eip7ImA9WhRUFEg.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-4162663974219185399</id><published>2011-03-16T13:50:00.015-04:00</published><updated>2012-01-24T20:41:59.132-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-24T20:41:59.132-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="online advertising" /><category scheme="http://www.blogger.com/atom/ns#" term="spam" /><category scheme="http://www.blogger.com/atom/ns#" term="fraud" /><title>Uncovering an advertising fraud scheme. Or "the Internet is for porn"</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
You have heard about fraud and online advertising. You may have seen the &lt;a href="http://online.wsj.com/video/digits-porn-sites-scam-advertisers/5FBF57B5-3063-4A45-B55D-47B98C006862.html"&gt;Wall Street Journal video &amp;nbsp;"Porn Sites Scam Advertisers"&lt;/a&gt;, or even read the story at today's Wall Street Journal about "&lt;a href="http://online.wsj.com/article/SB10001424052748704893604576200383793893712.html"&gt;Off Screen, Porn Sites Trick Advertisers&lt;/a&gt;" (&lt;i&gt;Hint&lt;/i&gt;: to avoid the WSJ paywall, search the title of the article through Google News and click from there, to read the full article).&lt;br /&gt;
&lt;br /&gt;
Since I am intimately familiar with the story covered by WSJ (i.e., I was part of the team at &lt;a href="http://www.adsafemedia.com/"&gt;AdSafe&lt;/a&gt; that uncovered it), I thought it would be also good to cover the technical aspects in more detail, uncovering the way in which this advertising fraud scheme operated.&lt;br /&gt;
&lt;br /&gt;
It is long but (I think) interesting. &lt;b&gt;It is a story of a one-man-making-a-million-dollar-per-month fraud scheme&lt;/b&gt;. It shows how a moderately sophisticated advertising fraud scheme can generate very significant monetary benefits for the fraudster: Profits of millions of dollars per year.&lt;br /&gt;
&lt;br /&gt;
If you want to skip the technical sleuthing details, you can skip directly to the &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/03/uncovering-advertising-fraud-scheme.html#overall"&gt;overall picture&lt;/a&gt; and the discussion.&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;Disclaimer&lt;/span&gt;: In the story below, I will only mention by name the sites performing the fraudulent activities. All the brand names that you see are just for illustration purposes. They are &lt;span class="Apple-style-span" style="color: #cc0000;"&gt;not &lt;/span&gt;the ones affected by this case of fraud. Also remember that this is a personal blog. The views and opinions that I express here are my own and do not necessarily represent the views of &lt;a href="http://www.adsafemedia.com/"&gt;AdSafe&lt;/a&gt;&amp;nbsp;or the views of &lt;a href="http://www.stern.nyu.edu/"&gt;New York University&lt;/a&gt;.&lt;/b&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;i&gt;&lt;b&gt;The erroneous classifier&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;br /&gt;&lt;/div&gt;
It all started while working at &lt;a href="http://www.adsafemedia.com/"&gt;AdSafe&lt;/a&gt;.  For those not familiar with AdSafe: The role of AdSafe is to provide brand protection services to online advertisers. In plain English, AdSafe analyzes website content and can block ads from appearing on individual web pages with content inappropriate for a brand. Porn, hate speech, gambling, celebrity gossip, torrents, are among the many categories that we detect.&lt;br /&gt;
&lt;br /&gt;
On a nice Monday, the data science team gets the notification: The web page classifier was detecting a large number of porn web pages within legitimate, clean, big-brand-name websites. Think of websites such as BabyCenter, MSN MoneyCentral, HGTV, and so on. These sites would never have anything racy in their pages. However, we could see them being classified as having hard-core porn!&lt;br /&gt;
&lt;br /&gt;
Why do we detect porn in clean sites? None of the pages within the sites contained anything offensive. No porn, no offensive material. Nothing. The website was clean as it gets. What was going on?&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;i&gt;&lt;b&gt;The invisible iframe hosting &lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;br /&gt;
The lifesaver was a technique developed at AdSafe: The key to the solution was the ability to read the address of the top frame that was hosting the ad&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;(*)&lt;/span&gt;. We were detecting porn because the ads that were supposed to appear within a "clean publisher" site &lt;b&gt;&lt;i&gt;were appearing within the frame of a porn website. &lt;/i&gt;&lt;/b&gt;Think of HGTV as an illustrative, &lt;i&gt;but not the real&lt;/i&gt;, example of such a "clean publisher."&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;(*) For the technically curious: reading the address of the top frame is a challenging problem. For security reasons, browsers do not allow cross-domain scripting. So, it is not possible to just call the "top" object and read its properties. We have a proprietary solution for this.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
By using this technique, we got our explanation: The HGTV website was appearing within an iframe of a porn website. In our case, the porn website was &lt;a href="http://www.hqtubevideos.com/"&gt;www.hqtubevideos.com&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
WTF? This made no sense. Why would the porn website display HGTV (and the associated ads) within an iframe? Why would the porn site generate this "invisible" traffic towards HGTV? Just so that HGTV would get paid for the CPM ads? Or was the porn site trying to decrease the clickthrough rate of HGTV and ruin the performance the CPC campaigns? Did the porn website love HGTV so much and it was trying to increase its traffic? No way. Did HGTV employ a porn website to increase its traffic? No way, either.&lt;br /&gt;
&lt;br /&gt;
Made no sense whatsoever.&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;Checking the structure of the porn website&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, we decided to investigate. Let's see what is going on. First, we go and see the HTML source of &lt;a href="http://www.hqtubevideos.com/play.html"&gt;www.hqtubevideos.com/play.html&lt;/a&gt; that was the top frame what we were detecting. Here is the source:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh5.googleusercontent.com/-GXHdZyVj3DI/TXEktLQfshI/AAAAAAAAhsM/Xa5o-DOTgYE/s1600/hqtube-play_html.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="317" src="https://lh5.googleusercontent.com/-GXHdZyVj3DI/TXEktLQfshI/AAAAAAAAhsM/Xa5o-DOTgYE/s400/hqtube-play_html.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;br /&gt;
The highlighted part shows an interesting redirection. We go to the &lt;a href="http://www.hqtubevideos.com/index.php"&gt;www.hqtubevideos.com/index.php&lt;/a&gt;, but with the parameter &lt;b&gt;?x=1&lt;/b&gt; at the end.&lt;br /&gt;
&lt;br /&gt;
Loading the page &lt;a href="http://www.hqtubevideos.com/index.php"&gt;www.hqtubevideos.com/index.php&lt;/a&gt; without this parameter loads a "vanilla" porn website. A few semi-suspicious attempts to add the website in the bookmarks in the beginning. Then porn pictures and links to affiliate sites. Plenty of porn but nothing to set an alarm. So far, so good.&lt;br /&gt;
&lt;br /&gt;
They key, though, is this parameter &lt;b&gt;?x=1&lt;/b&gt;. Loading the &lt;a href="http://www.hqtubevideos.com/index.php?x=1"&gt;www.hqtubevideos.com/index.php?x=1&lt;/a&gt; &lt;b&gt;we see a key part added in the website, at the very bottom. &lt;/b&gt;Here is the corresponding source.&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;span class="Apple-style-span" style="font-family: monospace;"&gt;&lt;span class="Apple-style-span" style="white-space: pre-wrap;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; white-space: normal;"&gt;&lt;a href="http://3.bp.blogspot.com/-G28bMoOBVd0/TWXaItPW4iI/AAAAAAAAhnQ/7kXWSGX0iwQ/s1600/hqtubevideos-index_html-iframecode.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="50" src="http://3.bp.blogspot.com/-G28bMoOBVd0/TWXaItPW4iI/AAAAAAAAhnQ/7kXWSGX0iwQ/s400/hqtubevideos-index_html-iframecode.PNG" width="400" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;br /&gt;
Aha! A 0x0 iframe, loading the following URLs:&lt;br /&gt;
&lt;ul style="text-align: left;"&gt;
&lt;li&gt;&lt;a href="http://www.hqtubevideos.com/counter.php"&gt;www.hqtubevideos.com/counter.php&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.hqtubevideos.com/counter2.php"&gt;www.hqtubevideos.com/counter2.php&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
The first URL seems to be loading some randomized hashid. Ignore.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
The second URL, &lt;a href="http://www.hqtubevideos.com/counter2.php"&gt;www.hqtubevideos.com/counter2.php&lt;/a&gt;, is a little bit more interesting and puzzling:&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-yBSORQvlIro/TWXcSly1Z5I/AAAAAAAAhnY/xJHTO-3H7Bk/s1600/hqtubevideos-counter2.php.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="101" src="http://4.bp.blogspot.com/-yBSORQvlIro/TWXcSly1Z5I/AAAAAAAAhnY/xJHTO-3H7Bk/s400/hqtubevideos-counter2.php.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
What is going on? Why would a porn site link to these domains? &lt;b&gt;What is the connection? &lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;The parked domains&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;br /&gt;&lt;/div&gt;
We started by doing a &lt;i&gt;whois &lt;/i&gt;to figure out the ownership of this domains. Unfortunately, the registration information for the hqtubevideos is private and protected. However, the registration info for all the other domains is available. Not surprisingly, we see a common ownership for all these seven domains:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;
Registrant:&lt;br /&gt;
Thomas Schneider&lt;br /&gt;
519 S. York Road&lt;br /&gt;
Dillsburg, Pennsylvania 17019&lt;br /&gt;
United States&lt;br /&gt;
Registered through: GoDaddy.com, Inc. (http://www.godaddy.com)&lt;br /&gt;
Domain Name: RELAXHEALTH.COM&lt;br /&gt;
Created on: 11-Mar-09&lt;br /&gt;
Expires on: 11-Mar-12&lt;br /&gt;
Last Updated on: 10-Jan-11&lt;br /&gt;
Administrative Contact:&lt;br /&gt;
Schneider, Thomas  garret.and@gmail.com&lt;br /&gt;
519 S. York Road&lt;br /&gt;
Dillsburg, Pennsylvania 17019&lt;br /&gt;
United States&lt;br /&gt;
7174327575      Fax --&lt;br /&gt;
Technical Contact:&lt;br /&gt;
Schneider, Thomas  garret.and@gmail.com&lt;br /&gt;
519 S. York Road&lt;br /&gt;
Dillsburg, Pennsylvania 17019&lt;br /&gt;
United States&lt;br /&gt;
7174327575      Fax --&lt;br /&gt;
Domain servers in listed order:&lt;br /&gt;
NS1.ROLENEWS.COM&lt;br /&gt;
NS2.ROLENEWS.COM&lt;/blockquote&gt;
&lt;br /&gt;
Now we start seeing something being uncovered. Would this guy, Thomas Schneider, be behind this? Too easy to be true. We went and did a reverse whois to find other domains that contained the email &lt;b&gt;garret.and@gmail.com&lt;/b&gt;. And here we are: The email is associated with the registration of 89 other domains, which are registered under a variety of last names, but all listing &lt;b&gt;garret.and@gmail.com &lt;/b&gt;as the contact email:&lt;br /&gt;
&lt;blockquote&gt;
&lt;div style="text-align: center;"&gt;
aboutclimax.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
aboutclinical.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
aboutcouples.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
abouterectile.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
abouterection.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
achieveday.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
achievedrugs.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
afterdeaths.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
afterdrugs.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
associatedmagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
atlantea.org&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
baldnesshealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
basehealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
becomeerect.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
begineducate.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
behaviordesire.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
beingdizzy.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
bestcialis.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
bestclimax.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
bigcouples.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
bodychemical.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
bodyclimax.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
bodyday.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
bundlehealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
calnam.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
cancerdamage.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
carecouples.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
carloschongdds.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
ceaifa.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
cialisc.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
cigarettesfinder.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
clubofheads.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
coacaz.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
college-grants1.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
college-scholarships1.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
collinshall.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
conditionnews.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
couponvi.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
criminaldefenseattorneys2.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
criminaldefenselawfirms1.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
detailedhealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
drinkershealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
drinkingmagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
eurovision-2008.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
experiencemedical.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
fantasiesmagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
fearhealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
gendergibe.org&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
government-grants1.org&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
groupovienna.net&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
hardballdollars.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
hawgsandpaws.org&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
impotencemagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
letscurepeyronies.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
levitrav.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
medicationmagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
moorehabitat.org&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
nighttimemagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
ownmeds.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
panimarock.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
playmeds.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
powerfulselling.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
printcoupons1.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
propeciav.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
relationshipmeds.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
relaxhealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
rml-inc.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
rxvis.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
savewhalompark.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
sex-tvs.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
shopwizz.biz&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
signbysign.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
steve-magic.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
styleandmore.net&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
syncsql.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
takemedical.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
taylor-training.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
testosteronehealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
thedongman.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
traumamedical.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
twohealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
viagracomp.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
viagraeds.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
viagramagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
viagravi.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
washealth.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
waymagazine.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
weightmedical.com&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
worldcuplive1.com&lt;/div&gt;
&lt;/blockquote&gt;
&lt;div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
Let's see what we have so far: The owner of a porn domain loads in a set of 0x0 iframes, a set of other websites, all operated by the same owner. But still, no clear motivation. Also, the connection with the publishers that we checked remains elusive.&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;/div&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;Re-directions within the parked domains&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
Now, let's see what is going on within these URL calls, such as &lt;a href="http://www.takemedical.com/go_with_post.php"&gt;www.takemedical.com/go_with_post.php&lt;/a&gt;. Here is the HTML source of one of those URLs:&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh6.googleusercontent.com/-OXJ0B8qFnNw/TXEstfh_dDI/AAAAAAAAhsU/6rSed97u-70/s1600/parked-domain-post-redirect.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="127" src="https://lh6.googleusercontent.com/-OXJ0B8qFnNw/TXEstfh_dDI/AAAAAAAAhsU/6rSed97u-70/s400/parked-domain-post-redirect.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
Interesting. Another redirection. The site automatically submits a search form, searching for the term "&lt;i&gt;hihijiji&lt;/i&gt;". Loading the page in the browser with the GET method (as opposed to the POST method indicated in the form), takes us to the normal page of a parked domain.&lt;br /&gt;
&lt;br /&gt;
But let's submit with the POST method:&lt;br /&gt;
&lt;blockquote&gt;
&lt;code&gt;curl www.takemedical.com/search.php -d token=hihijiji&lt;/code&gt;&lt;/blockquote&gt;
&lt;/div&gt;
&lt;div&gt;
Ha! The result is different:&lt;/div&gt;
&lt;div&gt;
&lt;blockquote&gt;
&lt;br /&gt;
&lt;code&gt; &amp;lt;iframe frameborder="No" height="1" src="index2.php" width="1"&amp;gt; &amp;lt;/iframe&amp;gt; &lt;/code&gt;&lt;/blockquote&gt;
&lt;/div&gt;
&lt;div&gt;
We uncovered a hidden URL. This is the point where everything will start falling into place.&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;b&gt;&lt;i&gt;Re-directions and generating click fraud with normal click patterns&lt;/i&gt;&lt;/b&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;br /&gt;
Within this hidden&amp;nbsp;URL&amp;nbsp;is where all the interesting things are happening!&lt;b&gt; Let's load &lt;a href="http://www.takemedical.com/index2.php"&gt;www.takemedical.com/index2.php&lt;/a&gt; and see the network activity.&lt;/b&gt; (In Chrome, go to Tools, Developer Tools, and then to the Network tab.). Here is the screenshot:&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh5.googleusercontent.com/-R9lgD8LQfdw/TXEu7e2Sb5I/AAAAAAAAhsY/cT0tZn1-bDk/s1600/parked-domain-redirects.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="145" src="https://lh5.googleusercontent.com/-R9lgD8LQfdw/TXEu7e2Sb5I/AAAAAAAAhsY/cT0tZn1-bDk/s400/parked-domain-redirects.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Indeed, here is where all the action is happening: These innocent sounding parked domain load all sorts of ad sites, and then "clicks" on the ads. By click, we do not mean any actual click. Instead the site loads the URL in the ad, that is typically a redirection to the ad server, which then redirects to the advertised URL. After this "click" within the iframe we finally have the publisher website (the "HGTV" that we mentioned before)! &lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Interestingly enough, the click fraud was very well-done: It was not loading all the time the same website. Sometimes it was mevio, other times it was tremor, other times bodyarchitect.tv, and so on. And once we have been redirected enough times from the same IP address, the final redirect was going to find.fm, to execute a straight-forward search. Clever! Engage in fraud, but be careful not to trigger any alarms.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Also, notice that the traffic patterns for the clicks are not bot-generated&lt;/b&gt;. These are actual users. With real and different web browsers. Different IP addresses. Different times of the day, following the usual traffic patterns per region. Good job: these click-fraud patterns are the least likely to be caught as &lt;b&gt;they have patterns very similar to normal traffic&lt;/b&gt;.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
For those interested in the details, here is the set of screenshots with the redirects:&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh5.googleusercontent.com/-qVyp1ChmmRA/TXExPKhXnOI/AAAAAAAAhsc/qvg_K3bUaWI/s1600/parked-domain-1.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="135" src="https://lh5.googleusercontent.com/-qVyp1ChmmRA/TXExPKhXnOI/AAAAAAAAhsc/qvg_K3bUaWI/s400/parked-domain-1.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh3.googleusercontent.com/-3KulINgZhwA/TXExPdCASOI/AAAAAAAAhsg/iybgmapk3qg/s1600/parked-domain-2.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="142" src="https://lh3.googleusercontent.com/-3KulINgZhwA/TXExPdCASOI/AAAAAAAAhsg/iybgmapk3qg/s400/parked-domain-2.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh3.googleusercontent.com/-q3Cpp0uIM6g/TXExPsl3ycI/AAAAAAAAhsk/W1wY7lz8rdo/s1600/parked-domain-3.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="162" src="https://lh3.googleusercontent.com/-q3Cpp0uIM6g/TXExPsl3ycI/AAAAAAAAhsk/W1wY7lz8rdo/s400/parked-domain-3.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh4.googleusercontent.com/-zWpoE6IlyfI/TXExP36fWhI/AAAAAAAAhso/gdQL4qAbuew/s1600/parked-domain-4.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="123" src="https://lh4.googleusercontent.com/-zWpoE6IlyfI/TXExP36fWhI/AAAAAAAAhso/gdQL4qAbuew/s400/parked-domain-4.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh5.googleusercontent.com/-c0FdFkU0xX4/TXExQF5jHfI/AAAAAAAAhss/BkNvK0wy-QY/s1600/parked-domain-5.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="142" src="https://lh5.googleusercontent.com/-c0FdFkU0xX4/TXExQF5jHfI/AAAAAAAAhss/BkNvK0wy-QY/s400/parked-domain-5.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh4.googleusercontent.com/-oLleFeARs3o/TXExQS95chI/AAAAAAAAhsw/TnyPVDzQf0I/s1600/parked-domain-6.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="143" src="https://lh4.googleusercontent.com/-oLleFeARs3o/TXExQS95chI/AAAAAAAAhsw/TnyPVDzQf0I/s400/parked-domain-6.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh4.googleusercontent.com/-_-KqSY7ue5w/TXExQTVttwI/AAAAAAAAhs0/j039lVKHojE/s1600/parked-domain-7.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="146" src="https://lh4.googleusercontent.com/-_-KqSY7ue5w/TXExQTVttwI/AAAAAAAAhs0/j039lVKHojE/s400/parked-domain-7.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh4.googleusercontent.com/-Dxp-OfM0oXw/TXExQrQsoLI/AAAAAAAAhs4/5mH5KM0jBSY/s1600/parked-domain-8.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="122" src="https://lh4.googleusercontent.com/-Dxp-OfM0oXw/TXExQrQsoLI/AAAAAAAAhs4/5mH5KM0jBSY/s400/parked-domain-8.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;
&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Update (3/17/2011)&lt;/span&gt;&lt;/b&gt;: Initially, I did not want to post screenshots with the actual ad networks that were being defrauded, as it was not my intention to involve them in the story. However, since they were mentioned in the Wall Street Journal article already, and they have taken measures against this, I am posting them now. Here are the screenshots with ad loads from the networks. Warning: NSFW.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh3.googleusercontent.com/-SUHgf5Tl7J8/TYIOyds7PBI/AAAAAAAAhzU/lpi6qSTWUdY/s1600/screenshots-with-adsloading.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="208" src="https://lh3.googleusercontent.com/-SUHgf5Tl7J8/TYIOyds7PBI/AAAAAAAAhzU/lpi6qSTWUdY/s400/screenshots-with-adsloading.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh4.googleusercontent.com/-YRnaz2nFI7I/TYIOytSnz9I/AAAAAAAAhzY/o8mAQFt4NLM/s1600/screenshots-with-adsloading-2.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="195" src="https://lh4.googleusercontent.com/-YRnaz2nFI7I/TYIOytSnz9I/AAAAAAAAhzY/o8mAQFt4NLM/s400/screenshots-with-adsloading-2.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh5.googleusercontent.com/-2F5fWyJcJxE/TYIOy1EfVQI/AAAAAAAAhzc/7wQBe5u2-Ao/s1600/screenshots-with-adsloading-3.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="192" src="https://lh5.googleusercontent.com/-2F5fWyJcJxE/TYIOy1EfVQI/AAAAAAAAhzc/7wQBe5u2-Ao/s400/screenshots-with-adsloading-3.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh4.googleusercontent.com/-0oK0TYKUp-s/TYIOzVkwEyI/AAAAAAAAhzg/qhApru-wz3M/s1600/screenshots-with-adsloading-4.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="183" src="https://lh4.googleusercontent.com/-0oK0TYKUp-s/TYIOzVkwEyI/AAAAAAAAhzg/qhApru-wz3M/s400/screenshots-with-adsloading-4.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh6.googleusercontent.com/-HTRY6sjthcM/TYIOzpONdaI/AAAAAAAAhzk/GlqC4kIH13Q/s1600/screenshots-with-adsloading-5.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="187" src="https://lh6.googleusercontent.com/-HTRY6sjthcM/TYIOzpONdaI/AAAAAAAAhzk/GlqC4kIH13Q/s400/screenshots-with-adsloading-5.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh6.googleusercontent.com/-k3tPj1W9Dmw/TYIO0NK3FeI/AAAAAAAAhzo/Y4KC52lC_4o/s1600/screenshots-with-adsloading-6.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="190" src="https://lh6.googleusercontent.com/-k3tPj1W9Dmw/TYIO0NK3FeI/AAAAAAAAhzo/Y4KC52lC_4o/s400/screenshots-with-adsloading-6.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh4.googleusercontent.com/-BF-RRQHDSTM/TYIO0bLYgLI/AAAAAAAAhzs/lHOTJo8FeGs/s1600/screenshots-with-adsloading-7.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="180" src="https://lh4.googleusercontent.com/-BF-RRQHDSTM/TYIO0bLYgLI/AAAAAAAAhzs/lHOTJo8FeGs/s400/screenshots-with-adsloading-7.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: left;"&gt;
&lt;/div&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;The role of parked domains: Laundering traffic&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;br /&gt;
At this point, we now know how this person makes money. Clearly, there is click-fraud: the scammer is employing click-fraud services to click on the pay-per-click ads "displayed" in his parked domains. If some of the ads are also pay-per-impression, he may also get paid for these invisible impressions that happen within the 0x0 iframe.&lt;br /&gt;
&lt;br /&gt;
Why the parked domains though? Why not doing the same directly within the porn site? The answer is simple: &lt;b&gt;Traffic laundering.&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
What do I mean by "traffic laundering"? First, the ad networks are unlikely to place many ads within a porn site. On the other hand, they have ad-placement services for parked domains. Second, the publishers that get the traffic from the parked domains see in the referral URLs some legitimately-sounding domain names, not a porn site. Even if they go and check the site, they will only see an empty site full of ads. Nothing too suspicious. Hats off to the scammer. Clever scheme.&lt;br /&gt;
&lt;br /&gt;
You think we are done? No. There is one more piece in the puzzle. How does the scammer attract visitors to the porn site? &lt;/div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;Generating traffic through an adult traffic exchange&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
The other interesting part: The porn website does not really contain porn!  There are a few images but most of the links are to other porn website that actually host the video. In other words, the scammer does not even pay the cost of hosting porn!&lt;br /&gt;
&lt;br /&gt;
However, according to QuantCast and Compete, the website has a pretty significant number of unique visitors per month. Here is the traffic over the last year:&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;iframe frameborder="0" height="205" marginheight="0px" marginwidth="0px" scrolling="no" src="http://www.quantcast.com/profile/embed?img=http%3A//www.quantcast.com/profile/trafficGraph%3Fwunit%3Dwd%253Acom.hqtubevideos%26drg%3D%26dty%3Dpp%26gl%3D1yr%26reachType%3Dperiod%26dtr%3Ddm%26width%3D522%26country%3DUK%26ggt%3Dlarge%26showDeleteButtons%3Dtrue&amp;amp;w=322&amp;amp;h=205&amp;amp;showDeleteButtons=false&amp;amp;wunit=Charts.Traffic.FrequencyGraph." width="322"&gt;&lt;/iframe&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;a href="http://siteanalytics.compete.com/hqtubevideos.com/?metric=uv"&gt;&lt;img src="http://grapher.compete.com/hqtubevideos.com_uv_310.png" /&gt;&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
This porn website gets 500K to 1M unique visitors per month! That is &lt;b&gt;a lot&lt;/b&gt; of traffic for a website without any real content! So, how does the guy get all the traffic?&lt;br /&gt;
&lt;br /&gt;
The answer surprised me. Apparently, there is an exchange (yes, my dear readers, an exchange!) for buying and selling adult traffic! Its name: &lt;a href="http://www.trafficholder.com/"&gt;TrafficHolder.com&lt;/a&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
Do you want to buy traffic for people interested in midget sex? The price is \$2.94 per thousand visitors. Interested in latex? The cost is \$2.54 per thousand visitors. Interested in HD video? \$3.54 per thousand visitors. (The running price catalog and the available traffic volume is available at &lt;a href="http://www.trafficholder.com/cgi-bin/traffic/manager/buying100.cgi"&gt;http://www.trafficholder.com/cgi-bin/traffic/manager/buying100.cgi&lt;/a&gt;)&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
How do the porn sites sell traffic to each other? Through pop-ups, pop-unders, by causing the first click to the website to redirect to the buyer's site. The term for this traffic is "&lt;a href="http://www.trafficholder.com/faq.html"&gt;skimmed traffic&lt;/a&gt;"&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
Following the trail, we figured out the source of the traffic for hqtubevideos.com: It was coming from the (very popular, apparently) website &lt;a href="http://www.pornoxo.com/"&gt;www.pornoxo.com&lt;/a&gt;. The reports from &lt;a href="http://www.quantcast.com/pornoxo.com"&gt;QuantCast &lt;/a&gt;and &lt;a href="http://siteanalytics.compete.com/pornoxo.com/"&gt;Compete &lt;/a&gt;confirm that PornoXo gets approximately 1 million unique visitors per month. If you visit the PornoXo.com website, you will see that the first click will create a pop-under that loads the page hqtubevideos.com/play.html. This is the page responsible for all the fraud that I described above.&lt;br /&gt;
&lt;br /&gt;
Based on the exchange prices and the visitorship at PornoXo, this traffic has a cost of \$3K/month for hqtubevideos.com, which is significant. So, we need to figure out how the scammer recovers this cost.&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;How much money are we talking about?&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;br /&gt;
So, the key question now: How much money the hqtubevideos.com generates through the scheme? To get a feeling of how much fraud is going on, please do the following:&lt;br /&gt;
&lt;ul style="text-align: left;"&gt;
&lt;li&gt;Open Chrome&lt;/li&gt;
&lt;li&gt;Open the options, and then Tools, then Developer Tools. This will load the monitoring tool.&lt;/li&gt;
&lt;li&gt;Switch to the "Network" tab&lt;/li&gt;
&lt;li&gt;Visit &lt;a href="http://www.hqtubevideos.com/play.html"&gt;http://www.hqtubevideos.com/play.html&lt;/a&gt; and see what is being loaded in the background (my own counting was approximately 1 ad loading per 10 seconds)&lt;/li&gt;
&lt;/ul&gt;
Let's do some back-of-the-envelope, very conservative, approximations:&lt;br /&gt;
&lt;ul style="text-align: left;"&gt;
&lt;li&gt;The site gets 500K-1M visitors per month&lt;/li&gt;
&lt;li&gt;The cost of this traffic is approximately \$1.5K to \$3K per month&lt;/li&gt;
&lt;li&gt;Each unique visit loads 7 sites, which then generate clicks. Let's assume that there is no reload of the invisible sites, to keep the estimates low.&lt;/li&gt;
&lt;ul&gt;
&lt;li&gt;Assuming 500K visitors and that just one click out of the seven sites goes through, this is 500K clicks per month (low estimate)&lt;/li&gt;
&lt;li&gt;Assuming 1M visitors and that all clicks, in all 7 sites, go through, this is 7M clicks per month (high estimate)&lt;/li&gt;
&lt;/ul&gt;
&lt;li&gt;The a low-end estimate for &lt;a href="http://www.clickz.com/clickz/stats/1706952/average-search-cpc-data-category-february-2010"&gt;CPC click costs is 30 cents&lt;/a&gt;, out of which we can assume that the scammer gets, say, 10 cents.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;This generates a total income of \&lt;span class="Apple-style-span" style="color: #990000;"&gt;$50K to \$700K per month&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;&lt;b&gt;The scheme is running for 8 months now, generating total revenue of \$400K to \$5M so far.&lt;/b&gt;&amp;nbsp;&lt;i&gt;(And you thought that investment bankers were getting paid a lot...)&lt;/i&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
Notice that these approximations assume that the site only generates the direct clicks discussed above. You will notice that there is no end in the loading of ads, if you leave the website open for a while. Given that the site visitors come from PornoXo, there is a good chance they will keep watching the porn video at PornoXo, leaving hqtubevideos to load the ads in the background.&lt;br /&gt;
&lt;br /&gt;
But even with the modest estimates listed above, we are talking about a business that generates tens of thousands of dollars, with really minimum requirements. This is a scheme that a single person can set up in a week...&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;a href="http://www.blogger.com/post-edit.g?blogID=7118563403027467631&amp;amp;postID=3825542681386408252" name="overall"&gt;&lt;/a&gt;&lt;br /&gt;
&lt;b&gt;&lt;i&gt;Overall picture&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
Trying to put all pieces together, I created the following graphical summary to see what is going on:&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="https://lh6.googleusercontent.com/-OdTNSl6wHEM/TXF2a6jxXNI/AAAAAAAAhtE/b3kxxNFozIo/s1600/overall-diagram.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="302" src="https://lh6.googleusercontent.com/-OdTNSl6wHEM/TXF2a6jxXNI/AAAAAAAAhtE/b3kxxNFozIo/s400/overall-diagram.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;br /&gt;
Let's follow the flow of the users:&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;ol style="text-align: left;"&gt;
&lt;li&gt;Scammer buys user traffic from PornoXo.com and sends it to HQTubeVideos. &lt;/li&gt;
&lt;li&gt;HQTubeVideos loads, in invisible iframes, some parked domains with innocent-sounding names (relaxhealth.com, etc)&lt;/li&gt;
&lt;li&gt;In the parked domains, ad networks serve display and PPC ads.&lt;/li&gt;
&lt;li&gt;The click-fraud sites click on the ads that appear within the parked domains.&lt;/li&gt;
&lt;li&gt;The legitimate publishers gets invisible/fraudulent traffic through the (fraudulently) clicked ads from parked domains.&lt;/li&gt;
&lt;li&gt;Brand advertisers place their ad on the websites of the legitimate publishers, which in reality appear within the (invisible) iframe of HQTubeVideos. &lt;/li&gt;
&lt;li&gt;AdSafe detects the attempted placement within the porn website, and prevents the ads of the brand publisher from appearing in the legitimate website, which is hosted within the invisible frame of the porn site.&lt;/li&gt;
&lt;/ol&gt;
&lt;div&gt;
Notice how nicely orchestrated is the whole scheme: The parked domains "launder" the porn traffic. The ad networks place the ads in some legitimately-sounding parked domains, not in a porn site. The publishers get traffic from innocent domains such as RelaxHealth, not from porn sites. The porn site loads a variety of publishers, distributing the fraud across many publishers and many advertisers.&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;
&lt;div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;/div&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div&gt;
&lt;b&gt;&lt;i&gt;Who has the incentives to fight this? &lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
And now let's see who has the incentives to fight this. It is fraud, right? But I think it is well-executed type of fraud. &lt;b&gt;It targets and defrauds the player that has the least incentives to fight the scam.&lt;/b&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
Who is affected? Let's follow the money:&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;/div&gt;
&lt;ol style="text-align: left;"&gt;
&lt;li&gt;The big brand advertisers (Continental, Coca Cola, Verizon, Vonage,...) pay the publishers and the ad networks for running their campaigns.&lt;/li&gt;
&lt;li&gt;The publishers pay the ad network and the scammer for the fraudulent clicks.&lt;/li&gt;
&lt;li&gt;The scammer pays PornoXo and TrafficHolder for the traffic.&lt;/li&gt;
&lt;/ol&gt;
&lt;/div&gt;
&lt;div&gt;
The ad networks see clicks on their ads, they get paid, so not much to worry about. They would worry if their advertisers were not happy. But here we have a piece of&amp;nbsp;genius:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;div&gt;
The scammer &lt;i&gt;did not target sites that would measure conversions or cost-per-acquisition&lt;/i&gt;. Instead, the scammer was targeting mainly sites that sell pay-per-impression ads and video ads. If the publishers display CPM ads paid by impression, any traffic is good, all impressions count. It is not an accident that the &lt;b&gt;scammer targets publishers with video content, and plenty of pay-per-impression video ads&lt;/b&gt;. The publishers have no reason to worry if they get traffic and the cost-per-visit is low.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Effectively, the only one hurt in this chain are the big brand advertisers, who feed the rest of the advertising chain. &lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Do the big brands care about this type of fraud? Yes and no, but not really deeply. Yes, they pay for some "invisible impressions". But this is a marketing campaign. In any case, not all marketing attempts are successful. Do all readers of Economist look at the printed ads? Hardly. Do all web users pay attention to the banner ads? I do not think so. Invisible ads are just one of the things that make advertising a little bit more expensive and harder. Consider it part of the cost of doing business. In any case, compared to the overall marketing budget of these behemoths, the cost of such fraud is peanuts. &lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
The big brands do not want their brand to be hurt. If the ads do not appear in places inappropriate for the brand, things are fine. Fighting the fraud publicly? This will just associate the brand with fraud. No marketing department wants that.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Note also that the fraudster does not target a single publisher, does not target a single advertiser. The damage is amortized so nicely that nobody feels that it is a big deal. A mastery of the long tail...&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Well, but what if fraud is big? What if big bucks are wasted? Maybe some newspapers would like to investigate. Let's break the big story. What would be the effect? Publicizing that a significant source of their income (online advertising) is a dangerous thing, full of fraud? Who would like to shoot himself in the foot?&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;b&gt;&lt;i&gt;&lt;br /&gt;
&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;b&gt;&lt;i&gt;Fraud as (harmless?) parasite &lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Really. Genius. Defraud many rich guys a little bit each, and ensure that nobody has the incentives to really fight and chase the fraud.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
The guy essentially realized that this type of fraud is really behaving like a parasite within a much bigger ecosystem. And it is a parasite that is so costly to remove that it makes sense to leave it there. As long as the parasite does not annoy the host too much, things will be fine. &lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Only if fraud becomes really big there will be the real incentive to fight advertising fraud. Until then, you know how to make \$500K/month...&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: left;"&gt;
&lt;a href="http://www.blogger.com/"&gt;&lt;/a&gt;&lt;span id="goog_208365650"&gt;&lt;/span&gt;&lt;span id="goog_208365651"&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-4162663974219185399?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ol3yPU2akME:G0On342W0zc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ol3yPU2akME:G0On342W0zc:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=ol3yPU2akME:G0On342W0zc:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ol3yPU2akME:G0On342W0zc:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=ol3yPU2akME:G0On342W0zc:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ol3yPU2akME:G0On342W0zc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ol3yPU2akME:G0On342W0zc:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/ol3yPU2akME" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/4162663974219185399/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/03/uncovering-advertising-fraud-scheme.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4162663974219185399?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4162663974219185399?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/ol3yPU2akME/uncovering-advertising-fraud-scheme.html" title="Uncovering an advertising fraud scheme. Or &quot;the Internet is for porn&quot;" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://lh5.googleusercontent.com/-GXHdZyVj3DI/TXEktLQfshI/AAAAAAAAhsM/Xa5o-DOTgYE/s72-c/hqtube-play_html.PNG" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/03/uncovering-advertising-fraud-scheme.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkUHSX05eSp7ImA9WhZUGUo.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-8372106548651983583</id><published>2011-03-14T19:41:00.004-04:00</published><updated>2011-06-13T10:10:38.321-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-13T10:10:38.321-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><title>Do Mechanical Turk workers lie about their location?</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;A few weeks back, Dahn Tamir graciously allowed me to take a peek at the data that he has been gathering about this workers on Mechanical Turk. He has assigned tasks over time to more than 50,000 workers on Mechanical Turk, so I consider his data to be one of the most representative samples of workers.&lt;br /&gt;
&lt;br /&gt;
One of the nice tasks that he has been running is a simple HIT in which he asks workers to report their location. At the same time, in this task, Dahn was recording the IP of the worker. Why the task was nice? Because there is absolutely no incentive for the workers to be truthful. The submission will be accepted and paid no matter what. In a sense,&lt;b&gt;&lt;i&gt; it is a test that check if workers will be truthful in cases where it is not possible to check their accuracy&lt;/i&gt;&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
So, we used this test to check how sincere are the workers: We can simply geocode the IP address and find out the actual location of the worker. (With some degree of error, but good enough for approximation purposes.) For the workers that reported to be based in the US (approximately 22,000 workers), the HIT was asking for the zip code of the worker, making it easy to assign an approximate long/lat location.&lt;br /&gt;
&lt;br /&gt;
To measure how accurately the worker report their location, we measured the distance between the location of the IP and the location of the zip code. The plot below shows the distribution of the differences:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="https://lh6.googleusercontent.com/-shz_czOZqoI/TX6jVzh8ejI/AAAAAAAAhxo/nXAYPLjHsxE/s1600/distance-distribution.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="252" src="https://lh6.googleusercontent.com/-shz_czOZqoI/TX6jVzh8ejI/AAAAAAAAhxo/nXAYPLjHsxE/s400/distance-distribution.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="https://lh3.googleusercontent.com/-hX-EC7Xv8kI/TX6l4xltY2I/AAAAAAAAhxs/Qj-mS4n-9Ak/s1600/distance-cdf.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="230" src="https://lh3.googleusercontent.com/-hX-EC7Xv8kI/TX6l4xltY2I/AAAAAAAAhxs/Qj-mS4n-9Ak/s400/distance-cdf.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;br /&gt;
As you can see, most of the workers were pretty truthful about their location. The difference in distance was less than 10 miles for more than 60% of the workers: this difference can be easily explained by the limited accuracy of the geocoding API's and by the approximation of using zipcode locations.&lt;br /&gt;
&lt;br /&gt;
Of course, the flip side of the coin is that a significant fraction of the workers were essentially lying about their location: For 10% of the workers (i.e., ~2250 of them) the IP address was more than 100 miles away from the reported zip code. For 2% of the workers (i.e., ~500 workers) the distance was more than 1000 miles away.&lt;br /&gt;
&lt;br /&gt;
The biggest liar? A worker from Chennai, India who reported a zip code corresponding to Tampa in Florida. The IP was a cool 9500 miles away from the reported location!&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-8372106548651983583?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=6VUPtlmHIe8:LLdn1E9wTX0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=6VUPtlmHIe8:LLdn1E9wTX0:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=6VUPtlmHIe8:LLdn1E9wTX0:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=6VUPtlmHIe8:LLdn1E9wTX0:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=6VUPtlmHIe8:LLdn1E9wTX0:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=6VUPtlmHIe8:LLdn1E9wTX0:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=6VUPtlmHIe8:LLdn1E9wTX0:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/6VUPtlmHIe8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/8372106548651983583/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/03/do-mechanical-turk-workers-lie-about.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8372106548651983583?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8372106548651983583?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/6VUPtlmHIe8/do-mechanical-turk-workers-lie-about.html" title="Do Mechanical Turk workers lie about their location?" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://lh6.googleusercontent.com/-shz_czOZqoI/TX6jVzh8ejI/AAAAAAAAhxo/nXAYPLjHsxE/s72-c/distance-distribution.PNG" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/03/do-mechanical-turk-workers-lie-about.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkYAQn04eip7ImA9WhZTGUU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-2757306212688729101</id><published>2011-03-11T01:01:00.004-05:00</published><updated>2011-03-24T13:35:43.332-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-24T13:35:43.332-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="publishers" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="acm" /><title>The Road to Serfdom, ACM Edition</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;&lt;b&gt;&amp;lt;rant&amp;gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
A couple of days back, I got the following email from &lt;a href="http://www.acm.org/"&gt;ACM&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;Dear Moderator/Chairs,&lt;br /&gt;
&lt;br /&gt;
This is being sent to everyone with the chairs cc'd as the last and final requeset for the eform below to be completed or your panel overview abstract will be removed from the WWW 2011 Companion Publication and will NOT appear in the ACM DL.&lt;br /&gt;
&lt;br /&gt;
Your prompt and immediate attention to the form below is needed.&lt;br /&gt;
&lt;br /&gt;
permission release form URL: ....&lt;br /&gt;
&lt;br /&gt;
ACM Copyrights &amp;amp; Permissions&lt;/blockquote&gt;&lt;br /&gt;
Given that this was the "&lt;i&gt;last and final requeset&lt;/i&gt;"[sic], I assumed that somehow I missed the previous requests. So, I checked my email to find out how late I was. Nope. Nothing in the archive, nothing in the trash, nothing in the spam, no entry in the delivery log. This was the &lt;b&gt;&lt;i&gt;first&lt;/i&gt;&lt;/b&gt; notification sent by ACM. They have just forgotten about this. But since they were running late, why not just threaten the authors? It is so much easier to pass the blame to others and be the first one to be aggressive. &lt;br /&gt;
&lt;br /&gt;
What happened ACM, did you start get advice on customer service from your pals at Sheridan Printing, who tend to send requests &lt;a href="http://behind-the-enemy-lines.blogspot.com/2007/06/are-publishers-making-themselves.html"&gt;like this&lt;/a&gt;?&lt;br /&gt;
&lt;br /&gt;
But I should not have been so surprised. This email just reflects the overall attitude of ACM. I have experienced this many times in the past. Anyway, I decided to sign the e-form, without firing back.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;Donating copyright to ACM&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Signing the form was a mechanic action before. However, after reading Matt Blaze's post &lt;a href="http://www.crypto.com/blog/copywrongs/"&gt;on copyright and academic publishing&lt;/a&gt;, I decided to read the form a little bit more carefully, to see exactly what I was signing.&lt;br /&gt;
&lt;br /&gt;
As usual, we start with a transfer of copyright to ACM. The authors agree to transfer all their copyright rights to ACM, blah blah...&lt;br /&gt;
&lt;br /&gt;
Wait a minute! Why does ACM needs to &lt;b&gt;&lt;i&gt;own &lt;/i&gt;&lt;/b&gt;the copyright? No good reason. To publish and distribute the article, ACM just needs a &lt;i&gt;&lt;b&gt;non-exclusive license to print and distribute&lt;/b&gt;&lt;/i&gt;. There is no need to &lt;b&gt;&lt;i&gt;own &lt;/i&gt;&lt;/b&gt;the copyright.&lt;br /&gt;
&lt;br /&gt;
If we follow ACM's logic, any artist that wants to see their work exhibited in any museum, they need to give up the ownership of their work and give full ownership of their creations to the museum. For free. Without expecting any royalties back in return. Ever. Furthermore, the museum instead of promoting the work, they would lock it in a "patron members access only". For all others, the museum would demand a separate entrance ticket to show &lt;b&gt;&lt;i&gt;each &lt;/i&gt;&lt;/b&gt;of the collection pieces. &amp;nbsp;(Say, for a friendly price of $5 to see each painting?)&amp;nbsp;.&lt;br /&gt;
&lt;br /&gt;
Anyway, let's not belabor the point with copyright. We know that ACM's policy sucks. We know that ACM is a bureaucracy serving just itself and not its members or the profession. Let's move on.&lt;br /&gt;
&lt;br /&gt;
Let's move to the point that really got me fired up.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;Protecting ACM from liability&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
What got me really pissed was the &lt;a href="http://www.acm.org/publications/CopyReleaseProc-1.26.10.pdf"&gt;last part of the agreement&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;&lt;b&gt;Liability Waiver&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;* Your grant of permission is conditional upon you agreeing to the terms set out below.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
I hereby &lt;b&gt;&lt;i&gt;release and discharge ACM and other publication sponsors and organizers from any and all liability arising out of my inclusion in the publication, or in connection with the performance of any of the activities described in this document as permitted herein&lt;/i&gt;&lt;/b&gt;. This includes, but is not limited to, my right of privacy or publicity, copyright, patent rights, trade secret rights, moral rights or trademark rights.&lt;br /&gt;
&lt;br /&gt;
All permissions and releases granted by me herein shall be effective in perpetuity unless otherwise stipulated, and extend and apply to the ACM and its assigns, contractors, sublicensed distributors, successors and agents.&lt;br /&gt;
&lt;br /&gt;
&lt;/blockquote&gt;&lt;b&gt;&lt;i&gt;So, not only we should donate "voluntarily" ownership&amp;nbsp;&lt;/i&gt;&lt;/b&gt;&lt;b&gt;&lt;i&gt;of our copyright&lt;/i&gt;&lt;/b&gt;&lt;b&gt;&lt;i&gt;&amp;nbsp;to ACM . We also need to protect ACM from any  liability.&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
In other words, ACM wants to get all the upside from owning the copyright, &lt;b&gt;&lt;i&gt;without ever distributing royalties&lt;/i&gt;&lt;/b&gt; to the contributing authors. (Not that it would be worth much. It is a matter of principle and a signal of respect to the authors, not an issue of monetary importance.) At the same, ACM also wants the authors to provide guarantee that if there is any problem with the copyright, the author will be the one liable for the damages.&lt;br /&gt;
&lt;br /&gt;
All the upside for ACM, no revenue to the authors. All the downside to the authors, no obligations for ACM.&lt;br /&gt;
&lt;br /&gt;
Thank you ACM for caring so much about your members. You will not be missed when you disappear.&lt;br /&gt;
&lt;br /&gt;
Yours truly,&lt;br /&gt;
A &lt;a href="http://plone.acm.org/membership/life"&gt;lifetime member&lt;/a&gt; of ACM.&lt;br /&gt;
&lt;br /&gt;
PS:&amp;nbsp;In retrospect, the title of the post is offensive: From Wikipedia's definition of &lt;a href="http://en.wikipedia.org/wiki/Serfdom"&gt;serfdom&lt;/a&gt;: "&lt;i&gt;Serfdom included the forced labor of serfs bound to a hereditary plot of land owned by a lord in return for protection&lt;/i&gt;". In other words, the&amp;nbsp;slave owners took the product of slaves' work, but in return they provided the protection and military support, to defend the slaves that were working the land. ACM also wants the slaves to "protect the land" as well. I owe an apology to the slave owners for the comparison.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&amp;lt;/rant&amp;gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-2757306212688729101?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Kos6jhlSUZQ:NtPPBZIkubA:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Kos6jhlSUZQ:NtPPBZIkubA:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Kos6jhlSUZQ:NtPPBZIkubA:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Kos6jhlSUZQ:NtPPBZIkubA:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Kos6jhlSUZQ:NtPPBZIkubA:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Kos6jhlSUZQ:NtPPBZIkubA:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Kos6jhlSUZQ:NtPPBZIkubA:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/Kos6jhlSUZQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/2757306212688729101/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/03/road-to-serfdom-acm-version.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2757306212688729101?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2757306212688729101?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/Kos6jhlSUZQ/road-to-serfdom-acm-version.html" title="The Road to Serfdom, ACM Edition" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/03/road-to-serfdom-acm-version.html</feedburner:origLink></entry></feed>

