<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;DEIHSXs6fCp7ImA9WhVTEUo.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631</id><updated>2012-02-25T08:28:58.514-05:00</updated><category term="clustering" /><category term="meetup" /><category term="data mining" /><category term="funny" /><category term="comic" /><category term="privacy" /><category term="structural models." /><category term="bayesian" /><category term="FROC" /><category term="presentation" /><category term="academia" /><category term="psychology" /><category term="string matching" /><category term="adwords" /><category term="market for lemons" /><category term="spam" /><category term="wisdom of the crowds" /><category term="presidential elections 2008" /><category term="open access" /><category term="lda" /><category term="probability" /><category term="cfp" /><category term="fraud" /><category term="humor" /><category term="frequentist" /><category term="power law" /><category term="deduplication" /><category term="keyword bidding" /><category term="mechanical turk" /><category term="advice" /><category term="reviews" /><category term="odesk" /><category term="customer service" /><category term="outliers" /><category term="acm" /><category term="dagstuhl" /><category term="data cleaning" /><category term="honda" /><category term="hcomp" /><category term="prediction markets" /><category term="incentives" /><category term="human computation" /><category term="call for papers" /><category term="drm" /><category term="peer reviewing" /><category term="online advertising" /><category term="efficient markets" /><category term="interviews" /><category term="quality" /><category term="payment" /><category term="statistics" /><category term="crowdsourcing" /><category term="google" /><category term="pricing" /><category term="yahoo" /><category term="education" /><category term="aca" /><category term="newsweek" /><category term="slides" /><category term="Rudy Giuliani" /><category term="extreme value theory" /><category term="reputation" /><category term="youtube" /><category term="mind maps" /><category term="conference" /><category term="demo" /><category term="large datasets" /><category term="propublica" /><category term="ranked xml querying" /><category term="evaluation" /><category term="cheating" /><category term="charity" /><category term="information extraction" /><category term="amazon" /><category term="wikis" /><category term="tagasauris" /><category term="industry analysis" /><category term="Mitt Romney" /><category term="embed" /><category term="teaching" /><category term="powerpoint" /><category term="computer science" /><category term="research" /><category term="reduced models" /><category term="tutorial" /><category term="wikipedia" /><category term="economics" /><category term="www2011" /><category term="surveys" /><category term="csdm" /><category term="minimum wage" /><category term="intellectual property" /><category term="search" /><category term="businessweek" /><category term="microsoft" /><category term="Hillary Clinton" /><category term="online labor" /><category term="independence" /><category term="readability" /><category term="machine learning" /><category term="ROC" /><category term="publishers" /><category term="cognitive dissonance" /><category term="gmail" /><category term="dirichlet" /><category term="assembly line" /><category term="merger" /><category term="typesetting" /><title>A Computer Scientist in a Business School</title><subtitle type="html">Random thoughts of a computer scientist who is now working behind the enemy lines. And lately he turned into a double agent.</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://www.behind-the-enemy-lines.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>195</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/AComputerScientistInABusinessSchool" /><feedburner:info uri="acomputerscientistinabusinessschool" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><geo:lat>40.72596</geo:lat><geo:long>-73.998345</geo:long><link rel="license" type="text/html" href="http://creativecommons.org/licenses/by/3.0/" /><logo>http://creativecommons.org/images/public/somerights20.gif</logo><feedburner:emailServiceId>AComputerScientistInABusinessSchool</feedburner:emailServiceId><feedburner:feedburnerHostname>http://feedburner.google.com</feedburner:feedburnerHostname><entry gd:etag="W/&quot;C0QMRXk7eSp7ImA9WhVTEU0.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-4832602240138693236</id><published>2012-02-23T17:17:00.000-05:00</published><updated>2012-02-24T11:36:24.701-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-24T11:36:24.701-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="wikipedia" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="interviews" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="online labor" /><title>Crowdsourcing and the end of job interviews</title><content type="html">When you discuss crowdsourcing solutions with people that have not heard the concept before, they tend to ask the question: "&lt;i&gt;Why is crowdsourcing so much cheaper than existing solutions that depend on 'classic' outsourcing?&lt;/i&gt;"
&lt;br /&gt;
&lt;br /&gt;
Interestingly enough, this is not a phenomenon that appears only in crowdsourcing. The Sunday edition of the New York Times has an article titled &lt;a href="http://www.nytimes.com/2012/02/26/magazine/why-are-harvard-graduates-in-the-mailroom.html"&gt;Why Are Harvard Graduates in the Mailroom?&lt;/a&gt;. The article discusses the job searching strategy in some fields (e.g., Hollywood, academic, etc), where talented young applicants are willing to start with jobs that are paying well below what their skills deserve, in exchange for having the ability to make it big later in the future:
&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;
[This is] the model lottery industry. For most companies in the business, it doesn’t make economic sense to, as Google does, put promising young applicants through a series of tests and then hire only the small number who pass. Instead, it’s cheaper for talent agencies and studios to hire a lot of young workers and run them through a few years of low-paying drudgery.... This &lt;i&gt;occupational centrifuge&lt;/i&gt; allows workers to effectively sort themselves out based on skill and drive. Over time, some will lose their commitment; others will realize that they don’t have the right talent set; others will find that they’re better at something else.&lt;/blockquote&gt;
&lt;br /&gt;
Interestingly enough, this &lt;b&gt;&lt;i&gt;occupational centrifuge&lt;/i&gt;&lt;/b&gt; is very close to the model of employment in crowdsourcing. 
&lt;br /&gt;
&lt;br /&gt;
In crowdsourcing, there is very little friction in entering and leaving a job. In fact, this is the key crucial difference with traditional modes of employment: &lt;b&gt;&lt;i&gt;There is no interview and the employment is truly at will.&lt;/i&gt;&lt;/b&gt; You want to work on a task? Start working. You are bored? Stop working. No friction with and interviewing and hiring process, and no friction if the worker decides to stop working.
&lt;br /&gt;&lt;br /&gt;
As in the case of Hollywood and academia, the evaluation is being done on-the-job. While currently the model is mainly applied to small tasks, there is nothing that fundamentally prevents this model from being applied to any other form of employment. With the &lt;a href="http://www.udacity.com/" target="_blank"&gt;Udacity&lt;/a&gt; and &lt;a href="http://www.cs101-class.org/hub.php" target="_blank"&gt;Coursera&lt;/a&gt; model, we start seeing that concept being applied to education. Later on, we may see other jobs adapting this model for their purposes (stock trading, anyone)?
&lt;br /&gt;
&lt;br /&gt;
What you observe in such settings is that the distribution of participation and engagement is heavy-tailed, tending to follow a power-law: A few participants will provide a significant amount of input, while there will be a long tail of participants that will come, do a few things (&lt;i&gt;&lt;span style="font-size: x-small;"&gt;complete HITs on MTurk, write Wikipedia articles, watch lectures and homeworks in Coursera, trade stocks, pick your task...&lt;/span&gt;&lt;/i&gt;) and then leave.
&lt;br /&gt;
&lt;br /&gt;
What does it mean to have a power law distribution of participation in crowdsourced projects?
&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;It means that the long-tail of the occasional participants is just not naturally attracted to the task. The persistent few are the good matches for the task. This is self-selection at its best.&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
No interview needed, and only the people that are truly interested stick around.
&lt;br /&gt;&lt;br /&gt;
&lt;b&gt;&lt;span style="color: #990000;"&gt;Crowdsourcing is the new interview.&amp;nbsp;&lt;/span&gt;&lt;/b&gt;
&lt;br /&gt;
&lt;br /&gt;
The selection of the best participants happens naturally, without the artificial introduction of a selection process mediated through am interview. The interview is an artificial process. It tries to keep out from the task the participants that are not qualified and tries to identify the ones that are the best. This is an imperfect filter. It has false positives and also false positives. Many people are hired with great hopes, just to be later proven to be ill-suited for the task (false positives). And many good people do not get the chance to work on a task just because they do not look good on paper (I am dying to make a Jeremy Lin joke here...)
&lt;br /&gt;
&lt;br /&gt;
Think now of an environment where everyone gets a shot to try working on something they are interested in. No friction of getting hired and getting fired. You have a benefit where the best people work on the tasks that they are best at.&lt;span style="font-size: x-small;"&gt; &lt;i&gt;[You ask what if there are fewer dream jobs than available labor? What to do when training on the job is not possible (cough, doctors, cough). Let me dream for now, and let's bury under the carpet the millions of details need to be addressed before this mode of operation has a shot in becoming reality.]&lt;/i&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;br /&gt;
To answer the question posed at the beginning of the post,&amp;nbsp;"&lt;i&gt;Why is crowdsourcing so much cheaper than existing solutions that depend on 'classic' outsourcing?&lt;/i&gt;" The process of self-selection in matching workers and tasks is the key reason on why crowdsourcing is typically cheaper than the traditional process of assigning directly tasks to people. The easier it is for the crowd to find jobs they like, the more efficient the matching and execution.&lt;br /&gt;
&lt;br /&gt; When you effectively have the most interested and self-selected people working on a given task, the productivity of a team for the task is much higher than the performance of a team consisting of people that may simply be bored or not very interested in the task. &lt;a href="http://www.quora.com/10X-Engineers/Why-are-the-best-programmers-10x-more-productive-than-mediocre-programmers-but-paid-only-3x-as-much-Why-aren-t-they-paid-10x-as-much"&gt;Just consider the productivity of five programmers that are dedicated and enthusiastic&lt;/a&gt; about what they are building, compared to a similar team of five programmers that were assigned the task by someone and they have to implement it. &lt;br /&gt;
&lt;br /&gt;
At oDesk, there is a significant effort to improve the matching process of projects and contractors, by showing to contractors the best projects for them and to employers the best contractors for a task. My own dream is to be able to eliminate the friction of interviewing and get the process of finding a job and working to be as seamless as possible.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-4832602240138693236?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=wPa1QKalp54:98pP2dPsxnU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=wPa1QKalp54:98pP2dPsxnU:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=wPa1QKalp54:98pP2dPsxnU:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=wPa1QKalp54:98pP2dPsxnU:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=wPa1QKalp54:98pP2dPsxnU:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=wPa1QKalp54:98pP2dPsxnU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=wPa1QKalp54:98pP2dPsxnU:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/wPa1QKalp54" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/4832602240138693236/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/02/crowdsourcing-end-of-job-interviews.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4832602240138693236?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4832602240138693236?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/wPa1QKalp54/crowdsourcing-end-of-job-interviews.html" title="Crowdsourcing and the end of job interviews" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/02/crowdsourcing-end-of-job-interviews.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8HRX89cCp7ImA9WhRaGE4.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1586100796338264724</id><published>2012-02-19T16:20:00.004-05:00</published><updated>2012-02-21T09:17:14.168-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-21T09:17:14.168-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="online labor" /><category scheme="http://www.blogger.com/atom/ns#" term="odesk" /><category scheme="http://www.blogger.com/atom/ns#" term="efficient markets" /><title>The Need for Standardization in Crowdsourcing</title><content type="html">&lt;div style="text-align: justify;"&gt;
&lt;i&gt;[This is the blog version of a &lt;a href="http://www.ipeirotis.com/research/publications/the-need-for-standardization-in-crowdsourcing" target="_blank"&gt;brief position paper&lt;/a&gt; that &lt;a href="http://www.john-joseph-horton.com/" target="_blank"&gt;John Horton&lt;/a&gt; and I wrote last year on the advantages of standardization for crowdsourcing. Edited for brevity. Random pictures added for fun.]&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Crowdsourcing has shown itself to be well-suited for the accomplishment of various tasks. Yet many crowdsourceable tasks still require extensive structuring and managerial effort to make crowdsourcing feasible. This overhead could be substantially reduced via standardization. &lt;b&gt;&lt;span style="color: #990000;"&gt;In the same way that task standardization enabled the mass production of physical goods, standardization of basic “building block” tasks would make crowdsourcing more scalable&lt;/span&gt;&lt;/b&gt;.&amp;nbsp;Standardization would make it easier to set prices, spread best practices, build meaningful reputation systems and track quality.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Why standardizing?&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Crowdsourcing has emerged over the last few years as a promising solution for a variety of problems. What most problems have in common is one or more sub-problems that cannot be fully automated, and require human labor. This labor demand is being met by workers recruited from online labor markets such as Amazon Mechanical Turk, Microtask, oDesk and Elance or from casual participants recruited by intermediaries like CrowdFlower and CloudCrowd.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
In labor markets, buyers and sellers have great flexibility in the tasks they propose and the making and accepting of offers.&amp;nbsp;The flexibility of online labor markets is similar to the flexibility of traditional labor markets. In both markets, buyers and sellers are free to trade almost any kind of labor at almost any terms. However, an important distinction between online and offline is that once a worker is hired off an offline, traditional market, they are not allocated to tasks via a spot market. Workers within firms are employees who have been screened, trained for their jobs and are have incentives for good performance—at a minimum, poor performance can cause them to lose their jobs. Furthermore, for many jobs—particularly those focusing on the production of physical goods—good performance is very well defined, in that workers must adhere to a standard set of instructions.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="color: #990000;"&gt;&lt;b&gt;This standardization of tasks is the essential feature of modern production. The question is how to apply this idea in crowdsourcing.&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-4kSztAvZlY4/T0B9vZZIWgI/AAAAAAAAuoU/e856GlwuB7Y/s1600/Assembly-Line.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="267" src="http://4.bp.blogspot.com/-4kSztAvZlY4/T0B9vZZIWgI/AAAAAAAAuoU/e856GlwuB7Y/s320/Assembly-Line.jpg" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Crowdsourcing, the dream: The assembly line for knowledge work&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
With task standardization, innovators like Henry Ford could ensure that hired workers—after suitable training—could complete those tasks easily, predictably and in a way that training was easy to replicate for new workers. To return to paid crowdsourcing, most of the high demand crowdsourcing tasks are relatively low-skilled and require workers to closely and consistently adhere to instructions for a particular, standardized task.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-N_PVygGMKfA/T0B9wLq_rBI/AAAAAAAAuoc/A6MaKKfQpM4/s1600/Bazaar.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="251" src="http://1.bp.blogspot.com/-N_PVygGMKfA/T0B9wLq_rBI/AAAAAAAAuoc/A6MaKKfQpM4/s320/Bazaar.png" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Crowdsourcing, the reality: The bazaar of knowledge work&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
As it currently stands, existing crowdsourcing platforms bear little resemblance to Henry Ford’s car plants. In crowdsourcing markets, the factory would be more like an open bazaar where workers could come and go as they pleased, receiving or making offers on tasks that different in their difficulty and skill requirements (“install engines!”, “add windshields!”, “design a new chassis!”) for different rates of pay—and with different pricing structures (fixed payment, hourly wages, incentives etc.). Some buyers would be offering work on buses, some on cars, some on lawnmowers. Reputations would be weak and easily subverted. Among both buyers and sellers, one can find scammers; some buyers are simply recruiting accomplices for nefarious activities.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
The upside of such a disorganized market is that workers and buyers have lots of flexibility. There are good reasons for not wanting to just recreate the on-line equivalent of single-firm factory. However, we do not think it is an “either-or” proposition. In this paper, we discuss ways that we can have more structure on a marketplace platform, without undermining its key advantages. In particular, we believe that greater task standardization, a cultivated garden approach to work-pools and a market-making type work allocation mechanism to help arrive at prices could help us build scalable human-powered systems that meet real-world needs.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Current status&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Despite the excitement and apparent industry maturation, there has been relatively little innovation—at least at the micro-work level—in the technology of how workers are allocated tasks, how reputation is managed and how tasks are presented etc. As innovative as MTurk is, it is basically unchanged since its launch. The criticism of MTurk—the difficulty of pricing work, the difficulty in predicting completion times and gaining quality, the inadequacy of the way that workers can search for tasks—are recurrent and still unanswered. Would-be users of crowdsourcing often fumble, with even technically savvy users getting mixed results. Best practices feel more like folk wisdom than an emerging consensus. Even more troubling, there is some evidence that at least some markets are becoming inundated with spammers.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-lpW9tHRiMXY/T0OnS5ko9VI/AAAAAAAAuqY/FDDGflb-Y7U/s1600/utest.PNG" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="224" src="http://2.bp.blogspot.com/-lpW9tHRiMXY/T0OnS5ko9VI/AAAAAAAAuqY/FDDGflb-Y7U/s320/utest.PNG" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;uTest: An example of verticalized crowdsourcing&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
One part of the crowdsourcing ecosystem that appears to be thriving is the “curated garden” approach used by companies like uTest (testing software), MicroTask (quality assurance for data entry), CloudCrowd (proofreading and translation), and LiveOps (call centers). These firms recruit and train workers for their standardized tasks and they set prices of both sides of the market. Because the task is relatively narrow, it is easier to build meaningful, informative feedback and verify ex ante that workers can do the task, rather than try to screen bad work out ex post. While this kind of control is not free, practitioners gain the scalability and cost savings of crowdsourcing without the confusion of the open market. The downside of these walled gardens is that access as both a buyer and seller is limited. One of the great virtues of more market like platforms is that they are democratic and easy to experiment on. The natural question is whether it is possible to create labor pools that look more like curated gardens—with well defined, standardized tasks—and yet are still relatively open, both to new buyers and sellers?&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Standardizing basic work units&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Currently, the labor markets operate in a completely uncoordinated manner. Every employer generates its own work request, prices the request independently, and evaluates the answers separately from everyone else. Although this approach have some intuitive appeal in terms of worker and employer flexibility, it is a fundamentally inefficient approach.&lt;/div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;Every employer has to implement from scratch the “best practices” for each type of work. For example, there are multiple UI’s for labeling images, or for transcribing audio. The longterm employers learn from their mistakes and fix the design problems, while newcomers have to learn the lessons of bad design the hard way.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;Every employer needs to price its work unit without knowing the conditions of the market and this price cannot fluctuate without removing and reposting the tasks.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;Workers need to learn the intricacies of the interface for each separate employer.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;Workers need to adapt to the different quality requirements of each employer.&lt;/li&gt;
&lt;/ul&gt;
&lt;div style="text-align: justify;"&gt;
The efficiency of the market can increase tremendously if there is at least some basic standardization of the common types of (micro-)work that is being posted on online labor markets.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
So, what are these common types of (micro-)work that we can standardize? Amazon Mechanical Turk lists &lt;a href="https://requester.mturk.com/bulk/hit_templates/"&gt;a set of basic templates&lt;/a&gt;, which give a good idea of what tasks are good candidates to standardize first. The &lt;a href="http://dl.acm.org/citation.cfm?id=1869094"&gt;analysis of the Mechanical Turk marketplace&lt;/a&gt;&amp;nbsp;also indicates a set of tasks that are very frequent on Mechanical Turk and are also good candidates to standardize.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-Fou23xfxmU4/T0B-2rz1IXI/AAAAAAAAuok/7cKWLfdTnc8/s1600/Table_of_Mechanicks%252C_Cyclopaedia%252C_Volume_2.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/-Fou23xfxmU4/T0B-2rz1IXI/AAAAAAAAuok/7cKWLfdTnc8/s400/Table_of_Mechanicks%252C_Cyclopaedia%252C_Volume_2.png" width="258" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Simple Machines, the standardized units for mechanics. Can we create corresponding simple machines for labor?&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
We can draw in parallel with engineering: In mechanics, we have a set of “&lt;a href="http://en.wikipedia.org/wiki/Simple_machine"&gt;simple machines&lt;/a&gt;,” such as screws, levers, wheel and axle, and so on. These simple machines are typically standardized and serve as components for larger, significantly more complicated creations. Analogously, in crowdsourcing, we can define a set of such simple tasks, standardize them, and then build, if necessary, more complicated tasks on top. &lt;b&gt;&lt;span style="color: #990000;"&gt;What are the advantages of standardizing the simple tasks, if we only need them as components?&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Reusability: &lt;/b&gt;First of all, as mentioned above, there is no need for requesters to think on how to create the user interfaces and best practices for such simple tasks. These standardized tasks can be, of course, revised over time to reflect our knowledge on how to best accomplish them.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Trading commodities:&lt;/b&gt; Second, and potentially more important, these simple tasks can be traded in the market in the same way that stocks and commodities are currently traded in financial markets. In stock markets, the buyer does not need to know who is the seller, or whether the order was fulfilled by a single seller or multiple ones: it is the task of the market maker to match and fulfill buy and sell orders. In the same way, we can have a queue of standardized tasks that need to be completed, and workers can complete them at any time, without having to think about the reputation of the requester or to refamiliarize themselves with the task. This should lead to much more efficient task execution.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;True market pricing:&lt;/b&gt;&amp;nbsp;A third advantage of standardized work units is that pricing becomes significantly simpler. Instead of “testing the market” to see what price points leads to an optimal setting, we can instead have a very “liquid” market with a large number of offered tasks and a large number of workers that work on these tasks. This can lead to a stock-market-like pricing. The tasks get completed by the workers, in priority order according to the offered price for the work unit: the highest paying units get completed first. So, if requesters want to prioritize their own tasks, they can simply price them higher than the current market price. This corresponds to an increase in demand, which moves up the market price. On the other hand, if no requesters post tasks then, once the tasks with the highest prices get completed, then we automatically move to the tasks that have lower price associated with them. This corresponds to the case where the supply of work is higher than the demand, and market prices for the work unit move down.&lt;/li&gt;
&lt;/ul&gt;
&lt;div style="text-align: justify;"&gt;
In cases where there is not enough “liquidity” in the market (i.e., when the workers are not willing to work for the posted prices), then we can employ &lt;a href="http://dl.acm.org/citation.cfm?id=1807402"&gt;automated market makers, such as the ones currently used by prediction markets&lt;/a&gt;. The process would then operate like this: The workers identify the price for which they are willing to work. Then, the automated market maker takes into consideration the “ask” (the worker quote) and the “bid” (the price of the task), and can perform the trade by “bridging” the difference. Essentially, such automated market makers provide a subsidy in order for the transactions to happen. We should note that a market owner can typically benefit even in scenarios, where they need to subsidize the market through an automated market maker: the fee from a transaction that happens can cover the necessary subsidy which is consumed by the automated market maker.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-kBCWEJziXwQ/T0CAXPldtuI/AAAAAAAAuos/inD6Df-q_ko/s1600/bid-ask.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="175" src="http://3.bp.blogspot.com/-kBCWEJziXwQ/T0CAXPldtuI/AAAAAAAAuos/inD6Df-q_ko/s320/bid-ask.jpg" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Can we trade and price standardized crowdsourced tasks as we trade and price securities?&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Having basic, standardized work units with highly liquid, high-volume markets can serve as a catalyst for companies to adopt crowdsourcing. Standardization can strengthen the network effects, can provide the basis for better reputation systems, can facilitate pricing, and can lead to the easier development of more complicated tasks that comprise of an arbitrary combination of small work units.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;CONSTRUCTING AND PRICING COMPOSITE TASKS&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Once we have some basic work units in place, we can start generating tasks that consist of multiple such units, to generate tasks that cannot be achieved with just using basic units. Again we can draw the analogs from mechanical engineering: the “simple machines” (screws, levers, wheel and axle, and so on) can then be assembled together to generate machines of arbitrary complexity. Similarly, in crowdsourcing we can use these standardized set of “simple work units” that can be later assembled to generate tasks of arbitrary complexity.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;Quality Assurance&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Assume that we have a basic work unit for a task such as comment moderation, that guarantees an accuracy of 80% or higher (e.g., by screening and testing continuously the workers that can complete these tasks). If we want to have a work unit that has higher quality guarantees, we can generate a composite unit that uses multiple, redundant work units and relies on, say, majority vote to generate a work unit with higher quality guarantees.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;Pricing Workflows&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
There is already work available on how to &lt;a href="http://dl.acm.org/citation.cfm?doid=1600150.1600159"&gt;create&lt;/a&gt;&amp;nbsp;and &lt;a href="http://ai.cs.washington.edu/projects/decision-theoretic-control-crowdsourced-workflows"&gt;control the quality of workflows&lt;/a&gt; in crowdsourced environments. We also have &lt;a href="http://www.workflowpatterns.com/patterns/"&gt;a set of design patterns for workflows in general&lt;/a&gt;. If we have a crowdsourced workflow that consists of standardized work units, we can also accurately price the overall workflow.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-zTMHYRFBEko/T0EsczFR9uI/AAAAAAAAupE/vv-Po43pIYY/s1600/workflow.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="305" src="http://4.bp.blogspot.com/-zTMHYRFBEko/T0EsczFR9uI/AAAAAAAAupE/vv-Po43pIYY/s320/workflow.png" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Pricing complex, workflow-based tasks becomes significantly easier when the basic execution units in the workflow are standardized and priced by the market.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
We do not even have to reinvent the wheel: there is a significant amount of work on &lt;a href="http://dl.acm.org/citation.cfm?id=1374421"&gt;pricing combinatorial contracts&lt;/a&gt;&amp;nbsp;in prediction markets. (An example of a combinatorial contract: “Obama will win the 2012 election and will win Ohio” or “Obama will win the 2012 election given that he will win Ohio”.) A workflow can be expressed as a combinatorial expression of the underlying simple work units. Since we know the price of standard units, we can easily leverage work from prediction markets to price tasks of almost arbitrary complexity. The successful deployment of &lt;a href="http://predictalot.yahoo.com/"&gt;Predictalot&lt;/a&gt; by Yahoo! during the 2010 soccer World Cup, with the extensive real-time pricing of complicated combinatorial contracts, gives us the confidence that such a pricing mechanism is also possible for online labor markets.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;Timing and Optimizing Workflows&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
There is already significant amount of work in distributed computing on &lt;a href="http://dl.acm.org/citation.cfm?id=1687568"&gt;optimizing execution of task workflows in Mapreduce-like environments&lt;/a&gt;. This research should be directly applicable in an environment where the basic computation is performed not by computers but by humans. Also, since the work units will be completed through easy-to-model waiting queues, we can easily leverage the work from queuing theory to estimate how long a task will remain within the system: by identifying the critical parts of execution we can also identify potential bottlenecks and increase the offered prices for only the work units that critically affect the completion time of the overall task.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Role of platforms&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
One helpful way to think about the role and incentives of online labor platforms is to consider that they are analogous to a commerce-promoting government in a traditional labor market. Most platforms levy an ad valorem charge and thus they have an incentive to increase the size of the total wage bill. While there are many steps these markets can take, their efforts fall into two categories:&lt;/div&gt;
&lt;ol&gt;
&lt;li style="text-align: justify;"&gt;remedying externalities, and&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;setting enforceable standards and rules, i.e., their “weights and measures” function.&lt;/li&gt;
&lt;/ol&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;Remedying Externalities&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
An externality is created whenever the costs and benefits from some activity are not solely internalized by the person choosing to engage in that activity. A negative example is pollution—the factory owner gets the goods, others get the smoke—while a positive example is yard beautification (the gardener works and buys the plants, others get to enjoy the scenery). Because the parties making the decision do not fully internalize the costs and benefits, activities producing negative externalities are (inefficiently) over-provided, and activities producing positive externalities are (inefficiently) under-provided. In such cases, “government” intervention can improve efficiency.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-wLB7hihUISY/T0Ezcc7IqPI/AAAAAAAAupM/GmQv_a-UBN8/s1600/traffic-cartoon.gif" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="205" src="http://1.bp.blogspot.com/-wLB7hihUISY/T0Ezcc7IqPI/AAAAAAAAupM/GmQv_a-UBN8/s320/traffic-cartoon.gif" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Road traffic is an example of a product with negative externalities.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
Negative examples are easy to find in on-line labor markets— fraud is one example. Not only is fraud unjust, it also makes everyone else more distrustful, lowering the volume and value of trade. Removing bad actors helps ameliorate the market-killing problem of information asymmetry, as uncertainty about the quality of some good or service is often just the probability that the other trading partner is a fraud.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
A positive example is honest feedback after a trade. Giving feedback is costly to both buyers and sellers: It takes time and giving negative feedback invites retaliation or scares off future trading partners. In the negative case, the platform needs to fight fraud—not simply fraud directed at itself but fraud directed at others on the platform, which has a negative second-order effect on the platform creator. In the positive case, the firm can make offering feedback more attractive, by offering rewards, making in mandatory, making it easier, changing rules to prevent retaliation etc.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
There are lots of options in both the positive and negative case— the important point is that platform creators recognize externalities and act to encourage positive externalities and eliminate the negative ones. Individual participants do not have the incentives (or even the ability) to fix the negative externalities for all other market participants. For example, no employer has the incentive to publish his own evaluation of the workers that work for his, as this is a signal earned after a significant cost for the employer. This is a case where the market owner can provide the appropriate incentives and designs for the necessary transparency.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;Setting Enforceable Standards&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Task standardization will probably require buy-in from on online labor markets and intermediaries. Setting cross-platform standards is likely to be a contentious process, as the introduction of standards gives different incentives to different firms, depending upon their business model and market share. However, at least within a particular platform and ignoring their competitors, there is powerful incentive to create standards as they raise the value of paid crowdsourcing and promote efficiency. For example, the market for SMS’s took off in the US only when the big carriers agreed on a common interoperable standard for sending and receiving SMS’s across carrier’s networks.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-vhTJBf0wEPA/T0CBcFDLK5I/AAAAAAAAuo0/JYyxwcFoah8/s1600/standard_units.PNG" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="252" src="http://1.bp.blogspot.com/-vhTJBf0wEPA/T0CBcFDLK5I/AAAAAAAAuo0/JYyxwcFoah8/s320/standard_units.PNG" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Standardizing units of measure facilitate transactions and gives us flexibility to create more complex units on top.&amp;nbsp;Can we achieve the same standardization for labor?&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
In traditional markets, market-wide agreement about basic units of measure facilitate trade. In commodity markets, agreements about quality standards serve a similar role, in that buyers know what they are getting and sellers know what they are supposed to provide. (For example, electricity producers are required to produce electricity adhering to some minimum standards before being able to connect to the grid and sell to other parties.) It should be clear that having public standards make quality assurance easier for the platform: enforcing standards on standardized units of work can be done much easier than enforcing quality standards in a wide variety of adhoc tasks. With such standards, it easier to imagine platform owners more willingly taking the role of testing for and enforcing quality standards for the participants that provide labor.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
If we define weights and measures more broadly to include verification of claims, the platform role becomes even wider. They can verify credentials, test scores, work and payment histories, reputation scores and every other piece of information that individuals cannot credibly report themselves. Individuals are also not able to credibly report the quality of their work, but at least with an objective standard, validating those claims is possible. (For example, one of the main innovations made by oDesk was that they logged a worker’s time spent on a task, enabling truthful hourly billing.)&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Conclusion&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
As our knowledge increases and platforms and practices mature, more work will be outsourced to remote workers. On the whole, we think this is a positive development, particularly because paid crowdsourcing gives people in poor countries access to buyers in rich countries, enabling a kind of virtual migration.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
At the same time, access to an on demand, inexpensive labor force, more often than than not, enables the creation of products and services that were not possible before: Once you solve a problem that was deemed too-costly-to-solve before, people start looking for the next thing to fix. This in turn generates more positions, more demand, and so on. It is a virtuous cycle, not the Armageddon.&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1586100796338264724?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=j-2fT9byecg:PzH-WL-RH_U:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=j-2fT9byecg:PzH-WL-RH_U:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=j-2fT9byecg:PzH-WL-RH_U:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=j-2fT9byecg:PzH-WL-RH_U:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=j-2fT9byecg:PzH-WL-RH_U:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=j-2fT9byecg:PzH-WL-RH_U:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=j-2fT9byecg:PzH-WL-RH_U:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/j-2fT9byecg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1586100796338264724/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/02/need-for-standardization-in.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1586100796338264724?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1586100796338264724?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/j-2fT9byecg/need-for-standardization-in.html" title="The Need for Standardization in Crowdsourcing" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-4kSztAvZlY4/T0B9vZZIWgI/AAAAAAAAuoU/e856GlwuB7Y/s72-c/Assembly-Line.jpg" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/02/need-for-standardization-in.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEcBQnw-eCp7ImA9WhRaFkU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-4610013927731042806</id><published>2012-02-18T21:17:00.001-05:00</published><updated>2012-02-19T16:14:13.250-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-19T16:14:13.250-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="odesk" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><title>Mechanical Turk vs oDesk: My experiences</title><content type="html">&lt;div style="text-align: justify;"&gt;
&lt;i&gt;[Necessary disclaimer: I work with the oDesk Research team as the "academic-in-residence." The experiences that I describe in this blog post are the &lt;b&gt;reason &lt;/b&gt;that I started working with oDesk. I am &lt;b&gt;not &lt;/b&gt;writing this because I started working with oDesk. And at the end of the day, I doubt that &lt;a href="https://www.odesk.com/oconomy/"&gt;oDesk needs my blog posts to get visibility&lt;/a&gt; :-)]&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
A question that I receive often is how to structure tasks on Mechanical Turk for which it is necessary for the workers to pass training before doing the task. My common answer to most such question is that Mechanical Turk is not the ideal environment for such tasks: When training and frequent interaction is required, an employer is typically better off by using a site such as oDesk to hire people for the long term to do the job.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Mechanical Turk: The choice for short-term, bursty tasks&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Mechanical Turk tends to shine in cases where demand is super bursty. A task appears out of nowhere, it requires 1000 people to work on it for 2-3 hours each, and get it done within a couple of days. Then the task disappears, and everyone moves on. For such scenarios, I cannot think of a better alternative than Mechanical Turk. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;The blessing and curse of the long tail&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Why Mechanical Turk allows easy scaling to a large number of workers? Because you can reach a large number of workers quickly. Admittedly, most people will just come and do a few tasks and then disappear. The old saying "&lt;i&gt;80% of the work gets done by 20% of the workers&lt;/i&gt;" is typically translated on MTurk as "&lt;i&gt;80% of the work gets done by 2% of the workers&lt;/i&gt;". But even these people that work in just a few tasks can contribute a significant amount of work on the aggregate. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
But this is also a problem: Workers that complete just a few tasks cannot be evaluated by any reasonable method of statistical quality control. To have a confident measurement of the true performance of the workers, it is not uncommon to require 500 tasks or more. It is highly unclear how you can convince a Turker to stick around for so long.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;The task listing interface interferes with task completion times&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Since workers tend to rank tasks either by "most recent" or by "most HITs available", the allocation of visibility varies significantly across tasks. If a task gets buried in the 5th or 6th page of the results, it is effectively dead. Nobody looks at the task anymore, and the rate of completion gets pretty close to zero. &lt;a href="http://www.behind-the-enemy-lines.com/2009/02/your-estimated-completion-time-infinite.html"&gt;Such tasks are effectively abandoned and will never finish.&lt;/a&gt; You need to "refresh" the task by posting some extra HITs within the task, take the task down and repost it, or play other tricks to get people to look at your task again. Needless to say this is completely unnecessary overhead, a pure result of bad design.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;The curse of simplicity&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Unfortunately, the ability to scale on demand has some additional drawbacks that are more subtle but, at the end, more important. The key problem: the need for simplicity. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
When you suddenly require 1000 new people to work on your task, it is advisable to structure the task as if planning for the worst case scenario. This means that every worker is treated as a first grader; the tasks should be described in the most simple way possible. This often necessitates the generation of workflows that chop the tasks into tiny, easily-digestable pieces, effectively embedding "training" in the process. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
As an example, consider the case of classifying a page as a containing "hate speech". Since it is not possible to get the workers to watch an 1-hour tutorial on what exactly is considered hate speech, the task on Mechanical Turk ends up being a loooong list of questions, such as "Do you see any racist jokes?", "Do you see any mention of male chauvinism?", "Do you see any call for violence against a specific ethnic group?" etc etc. Such brain-dead-simple workflows can ensure quality even when the workers are completely inexperienced. With such workflows it is also easy to defend against potential attacks from scammers that may try to submit junk, hoping to get paid for sub-par work.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
However, there is a catch: Such micro-task workflows start getting into the way once workers become more experienced. A worker that has spent a few hours examining pages for hate speech has all these questions in his brain, and can process a page much faster. The clickety-click approach with simple, easy-to-chew questions worked early on, to train the worker, but now it is a tedious micromanager embedded in the system. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;oDesk: The choice for long-term tasks&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
When the tasks are expected to last for many days, weeks, or months, then Mechanical Turk is often a suboptimal choice. The continuous need to fight scammer workers, the inability to interact easily with the workers, etc make it much easier to just on oDesk and hire a few people there to work on the task.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;How I learned about oDesk as a platform for "micro"-work&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
While I knew about oDesk as an alternative to Rent-A-Coder and eLance, I never thought about oDesk as a platform for completing tasks similar to the ones done on Mechanical Turk. In &lt;a href="http://www.behind-the-enemy-lines.com/2010/07/liveblogging-from-hcomp-2010.html"&gt;HCOMP 2010&lt;/a&gt; though, I learned about the efforts of Metaweb that used oDesk, paying workers on an hourly basis, as opposed to paying piecemeal. This allowed them to get workers to focus on the hard cases; on MTurk people have the incentive to skip the hard cases and perform only the easy tasks that can be done very quickly. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I had seen this problem with the AdSafe tasks that we were running on Mechanical Turk: workers were doing a decent job on classifying pages for the easy cases, but if the page was hard to classify (e.g., if you had to read the text to understand its true content, as opposed to looking at the images) then workers were just skipping or were giving a random answer. To fight such problem, I decided to give it a shot and hire a team of approximately 30 workers from oDesk to annotate web pages.&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;Migrating from Mechanical Turk to oDesk&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Although the migration of a task from MTurk to oDesk seems like a tedious task, it is often pretty simple, and this is due to a design flaw (?) of Mechanical Turk. What is this flaw? If you use the Mechanical Turk capabilities for building a HIT, you are very restricted in terms of what html you can use, and what subset of JavaScript. The solution for anyone who wants to do anything moderately complicated is to build a bespoke html interface and host it within an iframe in the MTurk website. &lt;b&gt;&lt;span style="color: #990000;"&gt;This "iframe-based MTurk HIT" is effectively a custom web application&lt;/span&gt;&lt;/b&gt;. This web application is trivially easy to adapt, to handle workers from &lt;i&gt;any &lt;/i&gt;platform. Instead of logging in using the MTurk worker id, workers from other platforms can login directly in your website. The added bonus? The workers can use the full screen real-estate.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
When I am using oDesk, I tend to hire people with minimum checking, and as part of the welcome message, the workers receive an email with their username and password for my website that hosts the MTurk HITs. I noticed lately that oDesk has an API as well, which can be used to further automate the process. But even for hiring workers manually, I could handle the task rather easily for hiring 30-50 workers, who then become effectively permanent employees, working on my tasks only, and getting paid hourly.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
One of the things that I want to learn to do more effectively is to use the &lt;a href="http://developers.odesk.com/w/page/12364003/FrontPage"&gt;oDesk API&lt;/a&gt; to open job slots and hire people. While oDesk does not provide direct capabilities for creating a UI for task handling and execution, I do not use the MTurk UI in any case. So, this is a functionality that I do not really miss.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;i&gt;Providing training and &lt;/i&gt;&lt;i&gt;interacting with oDesk workers&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
When the need for human labor in long-term, it makes sense to ask the oDesk workers to first spend some time familiarizing themselves with the task, watching some training videos etc. Even asking them to spend 5-6 hours for training themselves is not an unusual request and most oDeskers will happily oblige: They know that there is plenty of work coming up, so they do not mind spending their "first day at work" to familiarize themselves with the task that you give them. They prefer to keep a stable job, instead of having to continuously look around for new projects.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
A neat trick that I learned at oDesk is the following: Ask your workers to join a common Skype chatroom. (Or some other chat-room of your choice.) Using this chatroom, you can communicate with your workers in real time, informing them about system issues, directing them to work on specific tasks, giving clarifications, etc etc. I personally find that setting quite amazing, and makes me feel like a modern day factory owner :-). I drop by to say hello to the workers, I ask for feedback, workers welcome the new members and provide clarifications and training, etc. In general, a very happy atmosphere :-)&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;Lessons on quality control from MTurk, being applied to oDesk&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I have to admit, though, that the MTurk experience makes working with oDesk workers much more effective. When working with MTurk tasks, all requesters tend to develop various schemes of quality control, to measure the performance of each worker. These metrics make life much easier when managing big teams on oDesk. Effectively, you get automatic measurements of performance, that allow easy discovery of problematic workers. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I had experiences in the past with workers that were very articulate, very enthusiastic, very energetic, and ... completely sucked at the task at hand. In a regular work environment, such workers may never be identified as problematic cases. They are the life of the company, they bring the vide and the energy. But the quality management schemes, developed due to the quality challenges on handling Mechanical Turk tasks, become useful on oDesk as well. &lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;The extras&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Extra bonus 1? On oDesk, I never had to deal with a scammer and nobody attempted to game the system. oDesk runs a pretty strong identity verification scheme, which makes each worker a person tied to a real-world identity, as opposed to the disposable MTurk workerIDs. (I will explain in a later post how easy is to bypass the identity verification step on MTurk.) But the very fact that there is a basic reputation system (with its own flaws, but this is a topic for another post), this makes a huge difference on how workers approach the posted tasks.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Extra bonus 2? The hired oDeskers work only on your tasks! You do not have to worry about a task being buried in the 12th page of the results, no need to play SEO-style tricks to get visibility. You allocate a workforce to your task, and you proceed without worrying about the minute-by-minute competition by other requesters.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;The increased cost of oDesk&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
A "disadvantage" of oDesk is that most of the work ends up being more "expensive" than Mechanical Turk. However, this only holds when you substitute a Turker with an oDesker in an one-to-one basis. This is, however, a very short-sighted approach. Given the higher average quality of the oDeskers, it is often possible to reduce the overhead of quality assurance: &lt;a href="http://www.behind-the-enemy-lines.com/2011/11/does-lack-of-reputation-help.html"&gt;Fewer gold test and lower redundancy can decrease significantly the cost of a task&lt;/a&gt;. Therefore, when we would run a task on MTurk with a redundancy of 5 or 7, we can reach the same level of quality with just a couple (or just one) oDesk workers.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;b&gt;What I miss in oDesk, part I: Quick access to many workers&lt;/b&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
What I tend to miss in oDesk is the ability to get very large number of workers working on my tasks within minutes after posting my task. Of course, this is not surprising: On oDesk people are looking for reasonably long tasks, worth at least a few dollars. On MTurk we also get very few people that will stay with the task for long. I am trying to asses objectively what I miss, though. While I get this pleasant feeling that my task started very quickly, this nice fuzzy feeling has the counterside that for reasonably big tasks, the initial speed is never indicative of the overall speed of completion for the task.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I am trying to think how it will be possible to build a real-time platform with people willing to work for long for the tasks. I am looking forward to read more ideas by Rob Miller, Jeff Bigham, Michael Bernstein, and Greg Little on how to accomplish this in cases where we want people accessible in real-time &lt;i&gt;and &lt;/i&gt;also want the workers to keep working with my tasks for long-ish periods of time.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;i&gt;What I miss in oDesk, &lt;/i&gt;&lt;i&gt;part &lt;/i&gt;&lt;i&gt;II: The mentality of a computer-mediated interaction&lt;/i&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
The last issue with oDesk is that it is fundamentally designed for human-to-human interaction. Workers do not expect to interact with an automatic process for being hired, assigned a task, and being evaluated. I am thinking that perhaps oDesk should have a flag that indicates that a particular task is "crowdsourced", which means that there is no interview process for being hired but rather hiring is mediated by a computing process. While I would love oDesk to allow for such a shift, I am not how easy it is to take a live system with hundreds of thousands of participants and introduce such (rather drastic) changes. Perhaps oDesk can create some "oLabs" products (ala Google Labs) to test such ideas...&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Conclusion and future questions&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
While my research has focused on MTurk for quite a few years, I reached a point where I got tired of fighting scammers just to get basic results back. The oDesk environment allowed me to actually test the quality control algorithm without worrying about adversarial players trying to bypass all measures of quality control.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
The fact that I am happy with hiring people through oDesk does not mean that I am fully satisfied with the capabilities of the platform. (You would not expect me to be fully satisfied, would you?)&lt;br /&gt;
&lt;br /&gt;
Here are a few of the things that I want to see:&lt;/div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Simpler API&lt;/b&gt;. The current one can be used to post tasks and hire people automatically but it was never designed for this purpose; it was designed mainly to allow people to use oDesk through their own interfaces/systems, as opposed to using the main oDesk website. A nice tutorial taking newcomers through all the steps would be a nice addon. (I miss the tutorials that came with Java 1.0...)&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Better access to the test scores and qualifications of the contractors&lt;/b&gt;. This will allow for better algorithms for automatic hiring and automatic salary negotiation. ("Oh you have a top-1% on the Java test, this deserves a 20% increase in salary.") I see that part as a very interesting research direction as well, as I expect labor to be increasingly mediated by computing processes in the future.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Support for real-time task execution by having pools of people waiting on demand&lt;/b&gt;. This introduces some nice research questions on how to best structure the pricing and incentives for workers to be waiting for tasks to be assigned to them. The papers published over the last year by the MIT et al crowd provide interesting glimpses of what applications to expect.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Support for shifts and scheduling&lt;/b&gt;. This is a heretic direction for crowdsourcing in my mind, but a very real need. For many tasks we have a rough idea of demand fluctuations over time. Being proactive and scheduling the crowd to appear when needed can lead to the implementation of real production systems that cannot rely on the whims of the crowd.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;[?] Standardized tasks&lt;/b&gt;. With John Horton, we wrote in a past a &lt;a href="http://www.ipeirotis.com/research/publications/the-need-for-standardization-in-crowdsourcing"&gt;brief position paper&lt;/a&gt; describing the need for standardization in crowdsourcing. Although I would love to see this vision materialized, I am not fully convinced myself that this is a realistic goal. Given the very high degree of vertical expertise necessary for even the most basic tasks, I cannot see how any vendor will be willing to let others use the specialized interfaces and workflows required to accomplish a standard task. As a researcher, I would love to see this vision happening, I am pessimistic on the incentives that people have to adopt this direction.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;[?] Support for task building and quality control&lt;/b&gt;. I am not fully convinced that this is something that a labor platform need to support, but this is definitely in my wish list. This is of course something that I would like to see on MTurk as well. On the other hand, I see that most experienced employers use their own bespoke, customized, and optimized workflows; they also have their own bespoke quality control systems. So, I am not fully convinced that providing basic workflow tools and basic quality control would be a solution for anybody: too basic for the advanced users, too complex for the beginners. Again, I would love to see this happening as a researcher, I am pessimistic about the practical aspects.&lt;/li&gt;
&lt;/ul&gt;
&lt;div style="text-align: justify;"&gt;
Any other ideas and suggestions?&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-4610013927731042806?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kjN2cfZLGDQ:TIsf5upvtmk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kjN2cfZLGDQ:TIsf5upvtmk:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=kjN2cfZLGDQ:TIsf5upvtmk:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kjN2cfZLGDQ:TIsf5upvtmk:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=kjN2cfZLGDQ:TIsf5upvtmk:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kjN2cfZLGDQ:TIsf5upvtmk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kjN2cfZLGDQ:TIsf5upvtmk:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/kjN2cfZLGDQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/4610013927731042806/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/02/mturk-vs-odesk-my-experiences.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4610013927731042806?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/4610013927731042806?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/kjN2cfZLGDQ/mturk-vs-odesk-my-experiences.html" title="Mechanical Turk vs oDesk: My experiences" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/02/mturk-vs-odesk-my-experiences.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUYDR309fCp7ImA9WhRbFk0.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-8184983688795629542</id><published>2012-02-07T04:03:00.001-05:00</published><updated>2012-02-07T04:32:56.364-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-07T04:32:56.364-05:00</app:edited><title>ACM EC 2012: Some early statistics</title><content type="html">This year, together with &lt;a href="http://www.cs.ubc.ca/~kevinlb/"&gt;Kevin Leyton-Brown&lt;/a&gt;, we co-chair &lt;a href="http://www.sigecom.org/ec12/"&gt;ACM EC 2012&lt;/a&gt;, the 13th ACM Conference on Electronic Commerce, which will be held in Valencia, Spain, from June 4th to June 8th.&lt;br /&gt;
&lt;br /&gt;
Today was the submission deadline, and honestly I was a little bit worried about the number of submissions. 11 hours before the deadline we had just 119 submissions, a number significantly lower than for most of the recent EC conferences.&lt;br /&gt;
&lt;br /&gt;
My worry did not last for long. After observing the number of new papers per hour, and by extrapolating quickly I realized that we were going to get a large number of additional submissions. The extrapolation from the regression showed that we should expect 210 submissions, maybe a little lower if submission rate lowers closer to the deadline. &lt;a href="https://twitter.com/#!/ipeirotis/status/166737465787432960"&gt;The answers on Twitter indicated that most probably the opposite would happen&lt;/a&gt;. In fact, here is the submissions over time:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-jgO8ZvHQodw/TzDlqzWdG0I/AAAAAAAAunA/h_6hwIb4NWc/s1600/EC2012-submission_time.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="175" src="http://1.bp.blogspot.com/-jgO8ZvHQodw/TzDlqzWdG0I/AAAAAAAAunA/h_6hwIb4NWc/s400/EC2012-submission_time.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
Yes, most of the papers were submitted just a few hours before the deadline.&lt;br /&gt;
&lt;br /&gt;
By the deadline, we had a total of 225 papers uploaded, an all-time high number of submissions. Given that this is the first time that EC will be help outside a major city in the US and that such movements typically mean lower number of submissions and attendance), we are more than happy with the number of submissions.&lt;br /&gt;
&lt;br /&gt;
This year we also &lt;a href="http://agtb.wordpress.com/2012/02/01/ec12-innovations/"&gt;instituted the concept of tracks&lt;/a&gt;, to guarantee to the authors that their papers will be reviewed by reviewers in the same area. (A common perception is that EC is dominated by theorists who are hostile to empirical and applied work, so this separation should alleviate this concern.) Here is the approximate breakdown across tracks:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;50% Theory &amp;amp; Foundations&lt;/li&gt;
&lt;li&gt;15% Artificial Intelligence&lt;/li&gt;
&lt;li&gt;15% Empirical &amp;amp; Applications&lt;/li&gt;
&lt;li&gt;10% Theory+AI&lt;/li&gt;
&lt;li&gt;5% Theory+Empirical&lt;/li&gt;
&lt;li&gt;5% AI+
Empirical&lt;/li&gt;
&lt;li&gt;0% in all three&lt;/li&gt;
&lt;/ul&gt;
We will also introduce a new concept at EC this year: Anyone who has a paper related to EC, published or accepted for publication in another venue, conference or publication over the last year, will be able to come and present the work as a poster. We hope that this will allow the conference to serve as a meeting place for exchanging ideas about the field, in addition to being a venue where novel research is being presented for the first time. We will be posting the details soon on the &lt;a href="http://www.sigecom.org/ec12/"&gt;official website of EC'12&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Looking forward to seeing you in Valencia!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-8184983688795629542?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=DSuXRtc2KyY:OJEUk6PGjNI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=DSuXRtc2KyY:OJEUk6PGjNI:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=DSuXRtc2KyY:OJEUk6PGjNI:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=DSuXRtc2KyY:OJEUk6PGjNI:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=DSuXRtc2KyY:OJEUk6PGjNI:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=DSuXRtc2KyY:OJEUk6PGjNI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=DSuXRtc2KyY:OJEUk6PGjNI:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/DSuXRtc2KyY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/8184983688795629542/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/02/acm-ec-2012-some-early-statistics.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8184983688795629542?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8184983688795629542?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/DSuXRtc2KyY/acm-ec-2012-some-early-statistics.html" title="ACM EC 2012: Some early statistics" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-jgO8ZvHQodw/TzDlqzWdG0I/AAAAAAAAunA/h_6hwIb4NWc/s72-c/EC2012-submission_time.PNG" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/02/acm-ec-2012-some-early-statistics.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0YCSXY5fyp7ImA9WhRUEko.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1124533443098454258</id><published>2012-01-19T00:48:00.001-05:00</published><updated>2012-01-22T18:32:48.827-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-22T18:32:48.827-05:00</app:edited><title>Identify Verification (and how to bypass it)</title><content type="html">Most marketplaces, in order to function properly, require some form of identification of their users. It is a well-known problem that the ability of participants to generate easily new identities can lead to many problems.&lt;br /&gt;
&lt;br /&gt;
At the very basic level, if participants can easily create new identities, then reputation systems lose part of their power: If a user gets scores that are mediocre or bad, then it is often preferable to abandon the account with the bad scores and start again. Even more importantly, Sybil attacks, where one participant generates multiple accounts, can fool many systems that rely on peer evaluation, or assume that users are independent of each other.&lt;br /&gt;
&lt;br /&gt;
For example, in services such as Mechanical Turk, which rely on redundancy to ensure high-quality answers, many spammers create multiple accounts and try to attack simple tasks by entering the same answer in all questions. I also remember Luis von Ahn was describing an attack against reCAPTCHA, where &lt;a href="http://www.4chan.org/"&gt;4chan&lt;/a&gt; users attacked reCAPTCHA by trying to guess which of the two words was the known one, and entering "penis" as the other word :-)&lt;br /&gt;
&lt;br /&gt;
It is therefore not surprising the most marketplaces attempt to have some form of identification service. A form of identification that is considered strong is to ask for unique ID element from the registering users, e.g., the SSN of the participant, asking for place of birth, etc. Interestingly enough, it is trivially easy to bypass many such identity tests.&lt;br /&gt;
&lt;br /&gt;
Go and check the website &lt;a href="http://www.fakenamegenerator.com/"&gt;Fake Name Generator&lt;/a&gt;. You can specify the characteristics of the name that you want, and you get back an entry that you want. Someone with Japanese heritage living in the US? Sure thing, here is the entry for Mr. Souma Miura:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div style="margin-left: 1em; margin-right: 1em; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-f59XQcNx5jI/TxerPUi_00I/AAAAAAAAub4/7RH6vSdNMoM/s1600/fake-name-generator.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/-f59XQcNx5jI/TxerPUi_00I/AAAAAAAAub4/7RH6vSdNMoM/s400/fake-name-generator.PNG" width="379" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;ul&gt;
&lt;/ul&gt;
&lt;br /&gt;
You prefer something more exotic? May a person of Icelandic origin living in Cyrpus? No problem:
&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-VtgJouSNgYg/TxerwETSVDI/AAAAAAAAucE/vWIQIPfYKoU/s1600/fake-name-generator2.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="396" src="http://4.bp.blogspot.com/-VtgJouSNgYg/TxerwETSVDI/AAAAAAAAucE/vWIQIPfYKoU/s400/fake-name-generator2.PNG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
Interestingly enough, I was able to fool quite a few websites that supposedly guarantee for the identity of their participants. All of them accepted without problems the fake identities, and in some cases even the credit card numbers (not for actual charges but the fake credit card numbers were accepted as legitimate credit cards to create a profile). For obvious reasons, I will not reveal the names of the victims :-)
&lt;br /&gt;
&lt;br /&gt;
So, how can a market secure better against identity attacks? Here a few examples that I encountered:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;On&amp;nbsp;&lt;a href="http://embeemobile.com/"&gt;Embee Mobile&lt;/a&gt;&amp;nbsp;the payment to the workers is free talk time for their cell phone. While it is definitely possible to change the SIM card and the phone number, this is definitely not a cheap generation of identities.&lt;/li&gt;
&lt;li&gt;On &lt;a href="http://www.odesk.com/"&gt;oDesk&lt;/a&gt;, as part of the identification, participants are asked to send scans of their driving license and of their bank statements, in order to unlock the ability to apply to large (more than 5) projects. While it is certainly possible to fake those, it is unclear what someone can do with the money collected to an account if the cash cannot be withdrawn to a bank.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
Perhaps in the future we will see the emergence of identification services for individuals. We already have such services for websites (e.g., Verisign). It is conceivable that someone will be able to guarantee for the identify of a person, but you can see already the Big Brother concerns that such a service will raise.&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1124533443098454258?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=bRe0xhbNe-E:-RafOqJkHYI:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=bRe0xhbNe-E:-RafOqJkHYI:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=bRe0xhbNe-E:-RafOqJkHYI:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/bRe0xhbNe-E" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1124533443098454258/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/01/identify-verification-and-how-to-bypass.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1124533443098454258?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1124533443098454258?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/bRe0xhbNe-E/identify-verification-and-how-to-bypass.html" title="Identify Verification (and how to bypass it)" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-f59XQcNx5jI/TxerPUi_00I/AAAAAAAAub4/7RH6vSdNMoM/s72-c/fake-name-generator.PNG" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/01/identify-verification-and-how-to-bypass.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0ACQ3g-cSp7ImA9WhRVEUk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-8650467326217864970</id><published>2012-01-09T14:55:00.003-05:00</published><updated>2012-01-09T17:56:02.659-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-09T17:56:02.659-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="hcomp" /><title>HCOMP 2012: 4th Human Computation Workshop</title><content type="html">I am happy to announce that the Fourth&amp;nbsp;Human Computation Workshop (HCOMP 2012), will be organized this year together with the AAAI conference, on&amp;nbsp;July 22 or 23. The conference is in Toronto.&lt;br /&gt;
&lt;br /&gt;
The deadline for submitting a paper is March 30, 2012. You can submit either a "long" 6-page paper, or a "short" 2-page poster submission.&lt;br /&gt;
&lt;br /&gt;
You can see the &lt;a href="http://www.humancomputation.com/2012/Welcome.html"&gt;official web site&lt;/a&gt; or check the detailed&amp;nbsp;&lt;a href="http://www.humancomputation.com/2012/About_the_Workshop.html"&gt;Call for Papers&lt;/a&gt; at&amp;nbsp;&lt;a href="http://www.humancomputation.com/2012/"&gt;http://www.humancomputation.com/&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
Last year, we had a big room for the workshop, which was jam-packed, with more than 100 people at the room at some point, and &lt;span id="goog_867069063"&gt;&lt;/span&gt;the program was full of excellent papers&lt;span id="goog_867069064"&gt;&lt;/span&gt;. So, if you have ideas about human computation, crowdsourcing, or on merging human and machine intelligence, you want to send a paper at HCOMP!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-8650467326217864970?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VNuaWTrx2vM:Pk_RLR-4eBs:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VNuaWTrx2vM:Pk_RLR-4eBs:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VNuaWTrx2vM:Pk_RLR-4eBs:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/VNuaWTrx2vM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/8650467326217864970/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2012/01/hcomp-2012-4th-human-computation.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8650467326217864970?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/8650467326217864970?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/VNuaWTrx2vM/hcomp-2012-4th-human-computation.html" title="HCOMP 2012: 4th Human Computation Workshop" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2012/01/hcomp-2012-4th-human-computation.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck4FSH4-eyp7ImA9WhRSFEo.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6844707407349129307</id><published>2011-11-15T21:19:00.001-05:00</published><updated>2011-11-16T14:15:19.053-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-16T14:15:19.053-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="quality" /><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="pricing" /><category scheme="http://www.blogger.com/atom/ns#" term="reputation" /><title>Does lack of reputation help the crowdsourcing industry?</title><content type="html">&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;span style="color: #990000;"&gt;Can the &lt;i&gt;lack &lt;/i&gt;of a public reputation system on Amazon Mechanical Turk be the reason behind the &lt;i&gt;success &lt;/i&gt;of current crowdsourcing companies?&lt;/span&gt;&lt;/b&gt; I present an analysis that points to this direction. Unfortunately, this "feature" also leads to a stagnating crowdsourcing market with limited potential for growing.&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;b&gt;Low salaries and market for lemons&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
A contentious issue about crowdsourcing, and specifically about Amazon Mechanical Turk, is that wages are very low. It is not uncommon to see effective wages of \$1/hr, or even lower. Why is that?&lt;br /&gt;
&lt;br /&gt;
I have argued in the past that Mechanical Turk is an example of a "&lt;a href="http://www.behind-the-enemy-lines.com/2010/07/mechanical-turk-low-wages-and-market.html"&gt;market for lemons&lt;/a&gt;". Good workers are drowning in the anonymity of the crowd. Since the good workers cannot differentiate themselves from bad workers &lt;i&gt;before &lt;/i&gt;working on a task, they are doomed to receive the same level of compensation as the bad workers.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
This is not a fault of the employers: when a new employer joins the market, it is almost necessary for the employer to test the incoming workers to ensure the quality of the work. During this testing period, high-quality workers are completing the tasks side-by-side with low-quality workers, and everyone receives a low salary.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
The counter argument that I often hear is: "But the market, in the long run, should see an increase in salaries, as good workers demonstrate their quality to employers". Of course, &lt;a href="http://en.wikiquote.org/wiki/John_Maynard_Keynes"&gt;in the long run we are all dead&lt;/a&gt;. But even at the long run, and even after we are all dead, the market does not seem to be on a path to convergence to fair salaries.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Why? Here is the brief summary:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;High-quality workers are much more valuable than low-quality ones&lt;/li&gt;
&lt;li&gt;Lack of a shared reputation system depresses salaries pushing all salaries close to the level of low-quality workers&lt;/li&gt;
&lt;li&gt;Employers build their own, &lt;b&gt;&lt;i&gt;private &lt;/i&gt;&lt;/b&gt;reputation systems, learning the quality of the workers&lt;/li&gt;
&lt;li&gt;&lt;b&gt;&lt;i&gt;&lt;span style="color: #990000;"&gt;With the private quality information, employers can retain good workers by paying higher wages compared to the low-quality workers, but still lower than their "fair" quality-adjusted wage.&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;New employers cannot compete with incumbents since they do not have access to the privately built reputation systems and have to face the cost of learning the quality of the workers, while incumbents enjoy their advantage of already knowing who the good workers are&lt;/li&gt;
&lt;li&gt;Incumbents can enjoy a strong cost advantage, effectively blocking newcomers from entering the industry&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
Below I expand these arguments in a little bit more higher level of detail.&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Quality equivalence of low- and high-quality workers&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
First, let's examine the differences in payment between high- and low-quality workers. Let's take a very simple setting: Suppose that you have workers performing a task with two answers: Yes or no. The low quality are accurate $lq$% of the time. The high-quality workers are accurate $hq$% of the time. How many workers of low quality do we need to emulate one worker of high quality?&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Working in the simplest possible case, assume that we have we have $k$ low-quality workers, and each gives with probability $q$ the correct answer. We take the majority vote to be the the aggregate answer. What is the probability $P(q,k)$ that the the majority will be correct? We have that:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;div style="text-align: center;"&gt;
$P(q,k) = \sum_{i = \lceil \frac{k+1}{2} \rceil}^k \binom{k}{i} \cdot q^i \cdot(1-q)^{k-i}$&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="font-size: x-small;"&gt;(Assume, for the sake of simplicity that $k$ is odd. Otherwise, we need to add the term &lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="font-size: x-small;"&gt; $\frac{1}{2}\cdot \left( \lceil \frac{k+1}{2} \rceil - \lceil \frac{k}{2} \rceil \right) \cdot \binom{k}{k/2}\cdot q^{k/2}\cdot (1-q)^{k/2}$ in the above equation, to allocate ties appropriately)&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Given the above, we can find how many low-quality workers of quality $lq$ we need to emulate a single high-quality worker of quality $hq$: We just need to solve the equation:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;div style="text-align: center;"&gt;
$P(lq, k) = P(hq, 1)$&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Here are a few indicative pairs: To reach the 95% quality level we need:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;3 workers of quality 90%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;7 workers of quality 80%.
&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;9 workers of quality 75%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;15 workers of quality 70%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;67 workers of quality 60%.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;269 workers of quality 55%.&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
If our goal is to reach the 99% quality level, we need:&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;3 workers of quality 95%&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;5 workers of quality 90%&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;13 workers of quality 80%&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;31 workers of quality 70%&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
This means that &lt;b&gt;the fair wage of a single worker that is accurate at the 95% quality level should be ~9 times higher than the wage of the worker who is 75% accurate&lt;/b&gt;. A worker who is 99% accurate should demand 13x higher salary than someone who is 80% accurate. Notice that as the quality of the low-quality workers drops, the difference in fair wages between the high-quality and low-quality increases in a very fast rate.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Employers learning the quality of workers&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Suppose that we have an employer called PanosLabs that has worked for a long period of time with workers. At this point, 
PanosLabs has a long track record for many workers, and the quality estimates for each worker are pretty solid. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Now, this knowledge of worker quality allows PanosLabs to pay the good workers higher salaries. Let's assume that 
PanosLabs decided to be very "generous". For the high-quality 99%-accurate workers, PanosLabs &lt;b&gt;&lt;i&gt;quadruples &lt;/i&gt;&lt;/b&gt;the salary, compared to the general pool. Similarly, for workers that are 95%-accurate, PanosLabs &lt;b&gt;&lt;i&gt;triples &lt;/i&gt;&lt;/b&gt;the salary compared to the general pool. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Assuming that the general pool of workers is at the 80% accuracy level, PanosLabs gets the following bargain: It is now possible to cut costs significantly, while maintaining the same quality level. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Initially, PanosLabs was hiring 13 workers per case, paying each \$1/hr; this is an effective wage of $13/hr for reaching the 99% quality level. &lt;b&gt;Now, PanosLabs can have the 99% quality level by just employing a single 99% worker, for the cost of \$4/hr. This is a cost reduction of 70%! &lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Great bargain eh? This is the benefit of knowing thy worker...&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Increasing the barriers to entry&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Now let's assume that a new employer, called RotisLabs arrives at the market. The high-quality workers are now happily employed at PanosLabs, receiving a salary that is 4X the running market salary for their task.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
RotisLabs coming to the crowdsourcing market, is in a pickle. RotisLabs has no way of identifying and attracting the high quality workers without attracting the workers to work for RotisLabs first. Why?&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;There is no history of employment.&lt;/b&gt; In the "real world" knowing that an engineer worked at, say, Google gives some signal of quality. In our setting RotisLabs cannot check if a worker has worked for PanosLabs.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;It is not possible to check how much the workers get paid for other tasks&lt;/b&gt;. In the "real world" prices serve as signals. An employee that gets a high salary also signals to other employers that is a high performer. However, RotisLabs cannot check the prices that workers receive.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Check now the situation of RotisLabs: The competitor, PanosLabs, generates 99% accurate work at the cost of \$4/hr. What are the options of RotisLabs?&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;First option: RotisLabs can pay \$1/hr.&lt;/b&gt; This option attracts the following workers: The low-quality, 80%-accurate workers that did not get increases by PanosLabs, and, &lt;i&gt;if lucky&lt;/i&gt;, some new 99%-accurate workers that just arrived in the market. &lt;b&gt;&lt;i&gt;However, this pay rate does not attract the high-quality workers that stick with PanosLabs, severely limiting the pool of good workers accessible to RotisLabs&lt;/i&gt;&lt;/b&gt;. Notice that, at this pay level, RotisLabs has a cost of \$13/hr to reach the 99%-quality level, while competing with PanosLabs that has 70% lower cost of production, i.e., \$4/hr. If RotisLabs has enough cash &lt;i&gt;and &lt;/i&gt;patience, will stick to the market until learning the quality of workers. In most cases, though, RotisLabs will just realize that it is not possible to compete.&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;Second option: RotisLabs can pay \$4/hr. &lt;/b&gt;This option may attract the 99%-accurate workers that work for PanosLabs. But this will also attract the 80% workers! Our dear friend, RotisLabs, cannot separate the two. Therefore, to ensure the 99%-quality level, RotisLabs needs to still hire 13 workers per case, to account for the cases where many 80% workers work on an example. This increases the overall cost of production at \$52/hr. Ooops! PanosLabs can reach the same level of quality with a cost of just \$4/hr.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
You can see that knowing the quality of the workers can give a &lt;b&gt;tremendous &lt;/b&gt;benefit to the incumbent players that invest into learning the quality of the workers. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Interestingly enough, due to the depressed salaries that is a direct consequence of the lack of reputation systems, the established employer effectively passed the search costs to the employees: While learning the quality of the workers, the employer is paying salaries corresponding to the lowest expected level of quality. It is up to the workers to carry the burden of low salaries until proving themselves (again and again, for every single employer...)&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;Lack of shared reputation system: The foundation of the crowdsourcing industry?&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
The lack of  a (shared) reputation system is a godsend for companies that enjoy a first movers advantage. They can keep their costs down, while keeping their own employers happy, (in a relative sense: &lt;i&gt;"cant you see how much better I am paying you compared to the general pool?"&lt;/i&gt;). &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
The anonymity generates the conditions for "market of lemons" salaries, which keep the costs down. At the same time, the smart and established employers can find and reach out to the high quality workers. By paying these workers "generously", the smart employers can lock-in the workers into "golden cages": offer salaries that are higher than those for the general population, but still much much lower than the level of the fair wages for the produced quality levels. &lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
When even these 4x or 5x (unrealistic and fictional) salary increases, mentioned in the example above, are great bargains, you can imagine the margins that crowdsourcing companies can command. &lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="color: #990000;"&gt;&lt;b&gt;In a very perverse manner, the anonymity imposed by Mechanical Turk is now effectively serving as the foundation of the current crowdsourcing industry. The anonymity keeps worker costs down, allowing most companies to offer solutions that are very cost competitive compared to alternatives. At the same time, this policy is hurting the Amazon MTurk marketplace by effectively generating huge barriers to entry for newcomer employers, and depressing the salaries of newcomer employees. &lt;/b&gt;&lt;i&gt;(The Masters qualification is a step in the right direction, but too crude to serve as an effective signalling mechanism.)&lt;/i&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;The future?&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Let's see who will manage to generate the appropriate market for crowdsourcing that will resolve these issues.  One thing is clear: the direction towards improving crowdsourcing markets requires salaries to increase significantly. Interestingly enough, this is expected to lower the overall cost of production as well, as the cost of  quality control will be significantly lower.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
As &lt;a href="http://www.slideshare.net/ipeirotis/crowdsourcing-lessons-from-henry-ford"&gt;I said in the crowdsourcing panel&lt;/a&gt; at the WWW2011 conference last Spring:&lt;/div&gt;
&lt;ul&gt;
&lt;li style="text-align: justify;"&gt;It is not about the cost! &lt;/li&gt;
&lt;li style="text-align: justify;"&gt;It is not about the crowd! &lt;/li&gt;
&lt;li style="text-align: justify;"&gt;It is not about simple tasks! &lt;/li&gt;
&lt;li style="text-align: justify;"&gt;Crowdsourcing is best for “parallel, scalable, automatic interviews” and for finding quickly good workers&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span style="color: #990000;"&gt;Find the best trained workers, fast,  pay them well, and keep them!&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6844707407349129307?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=_bcOxins_zQ:wc_FtLgWHho:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=_bcOxins_zQ:wc_FtLgWHho:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=_bcOxins_zQ:wc_FtLgWHho:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/_bcOxins_zQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6844707407349129307/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/11/does-lack-of-reputation-help.html#comment-form" title="11 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6844707407349129307?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6844707407349129307?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/_bcOxins_zQ/does-lack-of-reputation-help.html" title="Does lack of reputation help the crowdsourcing industry?" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>11</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/11/does-lack-of-reputation-help.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkIGQ344cSp7ImA9WhRSFEs.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6875940019748112594</id><published>2011-11-10T22:18:00.001-05:00</published><updated>2011-11-16T12:28:42.039-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-16T12:28:42.039-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="businessweek" /><title>BusinessWeek...</title><content type="html">&lt;a href="http://www.businessweek.com/magazine/humans-plus-computers-equals-better-crowdsourcing-11102011.html"&gt;BusinessWeek on my research&lt;/a&gt;&amp;nbsp;:-)&lt;br /&gt;
&lt;br /&gt;
Special thanks to my collaborators that made this research possible: &lt;a href="http://people.stern.nyu.edu/fprovost/"&gt;Foster Provost&lt;/a&gt;, &lt;a href="http://pages.stern.nyu.edu/~jwang5/"&gt;Jing Wang&lt;/a&gt;, &lt;a href="http://www.linkedin.com/in/joshattenberg"&gt;Josh Attenberg&lt;/a&gt;, &lt;a href="http://uca.edu/computerscience/facultystaff/shengli-victor-sheng/"&gt;Shengli Sheng&lt;/a&gt;. Additional thanks go to&amp;nbsp;&lt;a href="http://adsafemedia.com/"&gt;AdSafe Media&lt;/a&gt; and, of course,&amp;nbsp;&lt;a href="http://www.tagasauris.com/"&gt;Tagasauris&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6875940019748112594?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=XtXhz6HDcEw:G0gW8LJhgM8:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=XtXhz6HDcEw:G0gW8LJhgM8:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=XtXhz6HDcEw:G0gW8LJhgM8:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/XtXhz6HDcEw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6875940019748112594/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/11/businessweek.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6875940019748112594?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6875940019748112594?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/XtXhz6HDcEw/businessweek.html" title="BusinessWeek..." /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>4</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/11/businessweek.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkYGRHg7eSp7ImA9WhdbE0s.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-913252986112122025</id><published>2011-10-11T16:15:00.001-04:00</published><updated>2011-10-11T16:15:25.601-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-11T16:15:25.601-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="conference" /><category scheme="http://www.blogger.com/atom/ns#" term="cfp" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="call for papers" /><category scheme="http://www.blogger.com/atom/ns#" term="aca" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><title>Collective Intelligence 2012: Deadline November 4, 2011</title><content type="html">&lt;br /&gt;
For all those of you interested in crowdsourcing, I would like to bring your attention to a new conference,&amp;nbsp;named &lt;a href="http://www.ci2012.org/"&gt;Collective Intelligence 2012&lt;/a&gt;,&amp;nbsp;being organized at MIT this spring (April 18-20, 2012) by &lt;a href="http://cci.mit.edu/malone/"&gt;Tom Malone&lt;/a&gt; and &lt;a href="http://www.cs.cmu.edu/~biglou/"&gt;Luis von Ahn&lt;/a&gt;. The conference is expected to have a set of 15-20 invited speakers (disclaimer: I am one of them), and also accepts papers submitted for publication. &lt;b&gt;The deadline is November 4th, 2011&lt;/b&gt;, so if you have something that you would be willing to share with a wide audience interested in collective intelligence, this may be a place to consider.&lt;br /&gt;
&lt;br /&gt;
The call for papers follows:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Overview&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Collective intelligence has existed at least as long as humans have, because families, armies, countries, and companies have all--at least sometimes--acted collectively in ways that seem intelligent. But in the last decade or so a new kind of collective intelligence has emerged: groups of people and computers, connected by the Internet, collectively doing intelligent things. For example, Google technology harvests knowledge generated by millions of people creating and linking web pages and then uses this knowledge to answer queries in ways that often seem amazingly intelligent. Or in Wikipedia, thousands of people around the world have collectively created a very large and high quality intellectual product with almost no centralized control, and almost all as volunteers!&lt;br /&gt;
&lt;br /&gt;
These early examples of Internet-enabled collective intelligence are not the end of the story but just the beginning. And in order to understand the possibilities and constraints of these new kinds of intelligence, we need a new interdisciplinary field. Forming such a field is one of the goals of this conference.&lt;br /&gt;
&lt;br /&gt;
We seek papers about behavior that is both collective and intelligent. &amp;nbsp;By collective, we mean groups of individual actors, including, for example, people,&lt;br /&gt;
computational agents, and organizations. &amp;nbsp;By intelligent, we mean that the collective behavior of the group exhibits characteristics such as, for example,&lt;br /&gt;
perception, learning, judgment, or problem solving.&lt;br /&gt;
&lt;br /&gt;
Topics of interest include but are not limited to:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;human computation&lt;/li&gt;
&lt;li&gt;social computing&lt;/li&gt;
&lt;li&gt;crowdsourcing&lt;/li&gt;
&lt;li&gt;wisdom of crowds (e.g., prediction markets)&lt;/li&gt;
&lt;li&gt;group memory and problem-solving&lt;/li&gt;
&lt;li&gt;deliberative democracy&lt;/li&gt;
&lt;li&gt;animal collective behavior&lt;/li&gt;
&lt;li&gt;organizational design&lt;/li&gt;
&lt;li&gt;public policy design (e.g., regulatory reform)&lt;/li&gt;
&lt;li&gt;ethics of collective intelligence (e.g., "digital sweatshops")&amp;nbsp;&lt;/li&gt;
&lt;li&gt;computational models of group search and optimization&lt;/li&gt;
&lt;li&gt;emergence and evolution of intelligence&lt;/li&gt;
&lt;li&gt;new technologies for making groups smarter&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
For a more complete description of the scope, please click here. For any questions, please email contact@ci2012.org.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Dates and Location&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The conference will be held April 18-20, 2012 on the MIT campus in Cambridge, MA. &amp;nbsp;Accommodations in nearby hotels will be available for conference attendees.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Format&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The conference will consist of:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;invited talks from prominent researchers in different areas related to collective intelligence&lt;/li&gt;
&lt;li&gt;oral paper presentations&lt;/li&gt;
&lt;li&gt;poster sessions&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Submission&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Papers of three types are invited:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Reports of original research results&lt;/li&gt;
&lt;li&gt;Reviews of previous research in one or more fields relevant to collective intelligence&lt;/li&gt;
&lt;li&gt;Position papers about research agendas for the field of collective intelligence&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
Some of the papers submitted will be invited for oral presentation, others for presentation as posters.&lt;br /&gt;
&lt;br /&gt;
Papers may be up to 8 pages in length. The deadline for submission is November 4, 2011. Download the submission format. Papers shall be submitted by email to submissions@ci2012.org.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Important Dates&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Paper submission deadline: November 4, 2011&lt;/li&gt;
&lt;li&gt;Notification of paper acceptance / rejection: January 15, 2012&lt;/li&gt;
&lt;li&gt;Camera-ready papers due: February 15, 2012&lt;/li&gt;
&lt;li&gt;Conference dates: April 18-20, 2012&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-913252986112122025?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=a9HJ1Qphe08:mh8B09brzog:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=a9HJ1Qphe08:mh8B09brzog:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=a9HJ1Qphe08:mh8B09brzog:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/a9HJ1Qphe08" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/913252986112122025/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/10/collective-intelligence-2012-deadline.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/913252986112122025?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/913252986112122025?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/a9HJ1Qphe08/collective-intelligence-2012-deadline.html" title="Collective Intelligence 2012: Deadline November 4, 2011" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/10/collective-intelligence-2012-deadline.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUICRXk8fyp7ImA9WhdVFEU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3153650136136268757</id><published>2011-09-03T22:04:00.000-04:00</published><updated>2011-09-19T21:52:44.777-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-19T21:52:44.777-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="pricing" /><category scheme="http://www.blogger.com/atom/ns#" term="probability" /><title>Probabilities and MTurk Executives: A Troubled Story</title><content type="html">On the Mechanical Turk blog, there is a blog post that &lt;a href="http://mechanicalturk.typepad.com/blog/2011/09/cooking-.html"&gt;describes the need to build custom qualifications for workers&lt;/a&gt;. (&lt;b&gt;Update&lt;/b&gt;: the old link was removed after I posted this analysis, and the new version does not contain any of the problematic math analysis.)&amp;nbsp;While the argument is correct, it is backed by some horrendous math analysis. For your viewing pleasure, here is the quote:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;
This difference in accuracy is magnified if you’re using plurality.  Supposed you use plurality of 2 (asking 2 Workers the same question).  With Masters, if 2 Workers with average accuracy (99%) agree on an answer there is a 98% probability that it’s the correct answer.  With the broader group, if 2 Workers with average accuracy (90%) agree on an answer there is an 81% probability that it’s the correct answer.   And if you happen to get 2 68% accurate Workers submitting assignments for the same HIT, the probability the answer is accurate is only 46%!&lt;/blockquote&gt;
&lt;br /&gt;
Dear Sharon: &lt;br /&gt;
&lt;br /&gt;
We do appreciate your efforts on improving MTurk and on giving correct advice.&lt;br /&gt;
&lt;br /&gt;
But this analysis that attempts to back a correct argument, is absolutely wrong. It is so wrong that it hurts. Just think at a very intuitive level: how is it possible to ask two workers of certain accuracy, see that they agree, and expect the accuracy of the &lt;b&gt;&lt;i&gt;corroborated &lt;/i&gt;&lt;/b&gt;answer to be lower? This is simply not possible!&lt;br /&gt;
&lt;br /&gt;
Here is the correct analysis:&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;br /&gt;
Supposed you use plurality of 2 (asking 2 Workers the same question).  With Masters, if 2 Workers with average accuracy (99%) agree on an answer then the probability that this answer is incorrect is &lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{incorrect}|\mathit{agreement}) =&amp;nbsp;\frac{Pr(\mathit{worker1\ incorrect\ and\ worker2\ incorrect})}{Pr(\mathit{agreement})}$.&lt;br /&gt;
&lt;br /&gt;
Assuming (conditional) independence of the workers:&lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{incorrect}|\mathit{agreement}) = $&lt;br /&gt;
&lt;br /&gt;
$\frac{Pr(\mathit{worker1\ incorrect}) \cdot  Pr(\mathit{worker2\ incorrect})}{Pr(\mathit{agreement})}=\frac{(1-p)^2}{p^2+(1-p)^2}$.&lt;br /&gt;
&lt;br /&gt;
where $p$ is the probability of a worker being correct.&lt;br /&gt;
&lt;br /&gt;
With 99% accuracy, the probability of a worker being correct is $p=0.99$. So:&lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{incorrect}|\mathit{agreement}) = \frac{0.01 \cdot 0.01}{0.01 \cdot 0.01 +&amp;nbsp;0.99 \cdot 0.99}$&lt;br /&gt;
&lt;br /&gt;
$\Rightarrow Pr(\mathit{incorrect}|\mathit{agreement}) = 0.000101$.&lt;br /&gt;
&lt;br /&gt;
Since $Pr(\mathit{correct}|\mathit{agreement}) = 1-Pr(\mathit{incorrect}|\mathit{agreement})$, therefore, with Masters, if 2 Workers with average accuracy (99%) agree on an answer, there is a $1-0.000101 \approx 99.99\%$ probability that it’s the correct answer.&lt;br /&gt;
&lt;br /&gt;
With the broader group, if 2 Workers with average accuracy (90%) agree on an answer there is an $1-\frac{ 0.1 \cdot 0.1}{0.1 \cdot 0.1 +&amp;nbsp;0.9 \cdot 0.9} \approx&amp;nbsp;&amp;nbsp;98.78\%$ probability that it’s the correct answer. And if you happen to get two 68%-accurate workers submitting assignments for the same HIT (and they both agree), the probability the answer is accurate is only $1-\frac{ 0.32 \cdot 0.32}{0.32 \cdot 0.32 +&amp;nbsp;0.68 \cdot 0.68} \approx&amp;nbsp;&amp;nbsp;81.87\%$!&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;
&lt;br /&gt;
How Sharon got confused? The analysis that she presents calculates not the accuracy of the answer when the workers agree, but instead it calculates &lt;b&gt;&lt;i&gt;how often the two workers will agree &lt;span class="Apple-style-span" style="color: #990000;"&gt;and &lt;/span&gt;agree on the correct answer&lt;/i&gt;&lt;/b&gt;. Indeed, with workers that have 68% accuracy, we will observe agreement on the correct answer only 48% of the time. (And, 10% of the time, they will agree on the incorrect answer.) More importantly, though, 42% of the time, the two workers will disagree, and we will need to bring an extra worker, increasing the cost by 50%.&lt;br /&gt;
&lt;br /&gt;
Why Sharon got confused? One explanation is that she is victim of the&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Conjunction_fallacy"&gt;conjunction&amp;nbsp;fallacy&lt;/a&gt;&amp;nbsp;or that she does not understand conditional probabilities.&amp;nbsp;However, I believe it is not that. I bet that she did not get puzzled by the results because the presented math confirmed another (correct) intuition that she had about the market:&amp;nbsp;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;redundancy when relying on low-quality workers is not cost-effective&lt;/span&gt;&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
Consider this: if you have 3 workers of 68% accuracy, the combination of the three (e.g., using majority vote) will result in an &lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;average &lt;/span&gt;&lt;/i&gt;&lt;/b&gt;accuracy of only 75%. In other words only 3 out of 4 times the majority will generate the correct answer. To reach 90% accuracy, we need 11 workers with 68% accuracy each. And&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; to reach 99% accuracy, we need 39 workers of 68% accuracy&lt;/span&gt;&lt;/b&gt;! (I will present the math in a later blog post.)&lt;br /&gt;
&lt;br /&gt;
Even using "moderately&amp;nbsp;high quality" workers, simulating a worker that is 99% accurate tends to be an expensive proposition. &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;We need five workers that are 90% accurate to get 99% accuracy&lt;/span&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, yes, the high-quality Masters workers are worth their extra price. In fact, they are worth their weight in gold. Paying only 20% more to access a guaranteed pool of high-quality "Masters" workers is a &lt;b&gt;great&lt;/b&gt; bargain, given the quality differences with the general worker pool.&lt;br /&gt;
&lt;br /&gt;
Actually, if I were a 99% accurate worker I would feel offended that I do not get at least double or triple the running wage for the common workers. There is a great mispricing of the services provided by high-quality workers, and most requesters today exploit just this fact to keep the wages down, while still managing to get high quality results from the tested, reliable workers.&lt;br /&gt;
&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3153650136136268757?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Jw6Hm7ELKGE:YHCdsH08NQw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Jw6Hm7ELKGE:YHCdsH08NQw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Jw6Hm7ELKGE:YHCdsH08NQw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/Jw6Hm7ELKGE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3153650136136268757/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/09/probabilities-and-mturk-executives.html#comment-form" title="6 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3153650136136268757?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3153650136136268757?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/Jw6Hm7ELKGE/probabilities-and-mturk-executives.html" title="Probabilities and MTurk Executives: A Troubled Story" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>6</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/09/probabilities-and-mturk-executives.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkYMSHw4eSp7ImA9WhdXFUk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1453769556608044343</id><published>2011-08-28T11:09:00.001-04:00</published><updated>2011-08-28T11:09:49.231-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-28T11:09:49.231-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="reviews" /><category scheme="http://www.blogger.com/atom/ns#" term="surveys" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><title>The impact of online reviews: An annotated bibliography</title><content type="html">A few weeks back, I received some questions about online consumer reviews, their impact on sales, and other related questions. At that point, I realized that while I had a good grasp of the technical literature within Computer Science venues, my grasp of the overall empirical literature within Marketing and Information Systems venues was rather shaky, so I had to do a better work in preparing a literature review.&lt;br /&gt;
&lt;br /&gt;
So, I did whatever a self-respecting professor would do in such a situation: I asked my PhD student, &lt;a href="http://pages.stern.nyu.edu/~bli/"&gt;Beibei Li&lt;/a&gt;, to compile a list of such papers, write a brief summary of each, and send me the list. She had passed her qualification exam by studying exactly this area, so she was the resident expert in the topic.&lt;br /&gt;
&lt;br /&gt;
Beibei did not disappoint me. A few hours later I had a very good list of papers in my mailbox, together with the description. It was so good, that I thought that many other people would be interested in the list. &lt;br /&gt;
&lt;br /&gt;
So, without further ado, I present you Beibei's annotated bibliography about online reviews and their business impact.&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;User behavior and online reviews&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Nan Hu, Paul Pavlou and Jie Zhang, in their paper "&lt;a href="http://dx.doi.org/10.1145/1562764.1562800"&gt;Overcoming the J-shaped distribution of product reviews&lt;/a&gt;" have shown that the graphical representation of product reviews has a J-shaped distribution: mostly 5-star ratings, some 1-star ratings, and hardly any ratings in between. What can explain this distribution? They attribute this rating distribution into two biases:&lt;/li&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;Purchasing bias&lt;/b&gt;: People that buy a product do not constitute a random sample of the population. People buy products that they believe they will enjoy. So, the reviews are written by people that are more likely to like the product. Since only people with higher product valuations purchase a product, those with lower valuations are less likely to purchase the product, and they will not write a (negative) product review. Purchasing bias causes the positive skewness in the distribution of product reviews and inflates the average.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Underreporting bias&lt;/b&gt;: Among people who purchased a product, those with extreme ratings (5-star or 1-star) are more likely to express their views to “brag or moan” than those with moderate views.&lt;/li&gt;
&lt;/ul&gt;&lt;li&gt;Xinxin Li and Lorin Hitt, in their 2008 paper "&lt;a href="http://ericchaing.org/files/Li_2008_ISR.pdf"&gt;Self-Selection and Information Role of Online Product Reviews&lt;/a&gt;" have found that online reviews may be subject to a &lt;b&gt;self-selection bias&lt;/b&gt;: products are not randomly assigned to reviewers. Rather, early buyers (buyers who also post the first reviews) self-select product that they believe they may enjoy, in the absence of any existing information. This is in contrast to other buyers that wait for more signals about the quality of a product to emerge, before being convinced to buy, and therefore have a lower prior expectation about the product quality. As a consequence, the preferences of early buyers systematically differ from the broader consumer population, the early reviews can be biased, either in a positive or negative way. Such bias in reviews will affect sales and reduce consumer surplus, even if all reviews are truthful.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Wendy W. Moe and Michael Trusov in their paper "&lt;a href="http://www.rhsmith.umd.edu/faculty/wmoe/moe_trusov.pdf"&gt;Measuring the Value of Social Dynamics in Online Product Ratings Forums&lt;/a&gt;", looked into how social influences affect the subsequent ratings and sales. They demonstrated that reviewer rating behavior is significantly affected by previous ratings. In other words, product reviews not only reflect the customers' experience with the product, but they also affect the ratings of later reviews as well.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Chrysanthos Dellarocas, Guodong (Gordon) Gao, and Ritu Narayan in their paper "&lt;a href="http://www.dellarocas.com/images/papers/jmis2010.pdf"&gt;Are Consumers More Likely to Contribute Online Reviews for Hit or Niche Products?&lt;/a&gt;" show that consumers tend to prefer posting reviews for obscure movies but also for hit movies that have already a large number of online reviews. The recommendation of the authors to owners of review websites is that volume of previously posted reviews should become less prevalent in order to encourage posting of reviews for lesser-known products.&lt;/li&gt;
&lt;/ul&gt;&lt;b&gt;Online product reviews and product sales&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Judy Chevalier and Dina Mayzlin, in their 2006 paper "&lt;a href="http://www.journals.marketingpower.com/doi/abs/10.1509/jmkr.43.3.345?journalCode=jmkr"&gt;The Effect of Word of Mouth on Sales: Online Book Reviews&lt;/a&gt;" have first demonstrated that online ratings have significant impact on book sales. The key trick was to monitor the sales of the same book in parallel on Amazon.com and on Barnes &amp;amp; Noble. Since the two sites were selling the same book, any external effect would be similar to both websites. However, reviews posted on Amazon or on BN.com would influence sales only on the respective websites. Through this "differences in differences" method, Chevalier and Mayzlin could isolate and measure the effect of product reviews, without worrying about other confounding factors.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Yong Liu, in the 2006 paper "&lt;a href="http://www.journals.marketingpower.com/doi/abs/10.1509/jmkg.70.3.74?journalCode=jmkg"&gt;Word of Mouth for Movies: Its Dynamics and Impact on Box Office Revenue&lt;/a&gt;" have looked at the same topic, but focused on the movie box office. Different from Chevalier and Mayzlin, his finding suggested that the valence of reviews does not matter for box office sales, however the review volume does.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Pradeep K. Chintagunta, Shyam Gopinath and Sriram Venkataraman, in their 2010 paper "&lt;a href="http://mktsci.journal.informs.org/content/early/2010/05/27/mksc.1100.0572.abstract"&gt;The Effects of Online User Reviews on Movie Box Office Performance: Accounting for Sequential Rollout and Aggregation Across Local Markets&lt;/a&gt;" have further studied the impact (valence, volume, and variance) of online reviews by looking at the local geographic movie box office, rather than the national-level aggregate box office performance. After accounting for various potential complications in the analysis, they suggested that it is the valence that seems to matter and not the volume.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Jonah Berger, Alan T. Sorensen and Scott J. Rasmussen, in their 2010 paper "&lt;a href="http://marketing.wharton.upenn.edu/documents/research/Negative_Publicity.pdf"&gt;Positive Effects of Negative Publicity: When Negative Reviews Increase Sales&lt;/a&gt;" found that negative reviews can boost sales for unknown books, but hurt sales for books with established authors. This happens because negative reviews bring visibility to unknown books. Whereas for authors who are already well known, publicity does not boost the awareness of their books, instead, the valence of the publicity becomes more important.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Chris Forman, Anindya Ghose and Batia Wiesenfeld, in their 2008 paper "&lt;a href="http://isr.journal.informs.org/content/19/3/291.abstract"&gt;Examining the Relationship Between Reviews and Sales: The Role of Reviewer Identity Disclosure in Electronic Markets&lt;/a&gt;" have looked at the role of reviewer identity disclosure (e.g., real name and location of the reviewer) in examining the relationship between Amazon book reviews and sales. They found that the prevalence of reviewer disclosure of identity information is associated with increases in helpfulness rating of the review and the subsequent online product sales. This is because community members more positively assess reviewers who disclose identity-descriptive information, and then use their assessment of reviewers as a heuristic shaping their evaluation of the product reviewed.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Nikolay Archak, Anindya Ghose and Panagiotis G. Ipeirotis (yours truly), in the 2011 paper "&lt;a href="http://mansci.journal.informs.org/content/57/8/1485.short"&gt;Deriving the Pricing Power of Product Features by Mining Consumer Reviews&lt;/a&gt;", examine the idea that the textual content of the product reviews is an important determinant of consumers' choices, over and above the valence and volume of reviews. Using text mining tools, they incorporated review text by decomposing textual reviews into segments describing different product features. This work demonstrates how textual data can be used to learn consumers' relative preferences for different product features and also how text can be used for predictive modeling of future changes in sales.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Anindya Ghose and Panagiotis G. Ipeirotis (yours truly, again), in the 2011 paper "&lt;a href="http://pages.stern.nyu.edu/~panos/publications/tkde2010-usefulness.pdf"&gt;Estimating the Helpfulness and Economic Impact of Product Reviews: Mining Text and Reviewer Characteristics&lt;/a&gt;", explored online review's impact on helpfulness and product sales, using multiple aspects of review text, such as subjectivity levels, various measures of readability and extent of spelling errors. The analysis has revealed that the extent of subjectivity, informativeness, readability, and linguistic correctness in reviews matters in influencing sales and perceived usefulness. See also the &lt;a href="http://behind-the-enemy-lines.blogspot.com/2010/01/did-you-find-this-helpful.html"&gt;related blog post&lt;/a&gt; that I wrote in January 2010 (yes, even after acceptance, it took 1.5 years for the paper to appear in print).&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Yubo Chen, Qi Wang and Jinhong Xie, in their paper "&lt;a href="http://www.journals.marketingpower.com/doi/abs/10.1509/jmkr.48.2.238?journalCode=jmkr"&gt;Online Social Interactions: A Natural Experiment on Word of Mouth Versus Observational Learning&lt;/a&gt;" studied how word-of-mouth (WOM, i.e., others’ opinions) differs from observational learning (i.e., others’ purchase actions) in influencing sales. They have found that :&lt;/li&gt;
&lt;ul&gt;&lt;li&gt;negative WOM is more influential than positive WOM;&lt;/li&gt;
&lt;li&gt;positive observational learning information significantly increases sales but negative one has no effect (e.g., reporting purchase statistics help popular products, without hurting niche ones);&lt;/li&gt;
&lt;li&gt;the sales impact of observational learning increases with WOM volume&lt;/li&gt;
&lt;/ul&gt;&lt;li&gt;Michael Luca, in his "job market paper" "&lt;a href="http://people.bu.edu/mluca/JMP.pdf"&gt;Reviews, Reputation, and Revenue: The Case of Yelp.com&lt;/a&gt;" used a nice trick for estimating the causal effect of consumer reviews from Yelp.com on restaurant demand. Using revenue data from the state of Washington, he examined what is the effect of having an extra "half star" in Yelp. The key trick is to exploit the discontinuity in the way that Yelp assigns aggregate scores: A restaurant with 3.76 average review rating gets a 4-star review, while a restaurant with 3.74 average review rating gets a 3.5-star review. So, if there is a big gap in the revenues between restaurants with scores of 3.76 and 3.74, then this revenue gap (which actually exists) can be attributed to Yelp, and to its summary rating. (&lt;a href="http://afinetheorem.wordpress.com/2011/06/30/reviews-reputation-and-revenue-the-case-of-yelp-com-m-luca-2010/"&gt;This blog posts presents further analysis of the paper&lt;/a&gt;, and also mentions similar use of this discontinuity trick to study the effect of sanitary scores in NYC: a restaurant may get an "A" score with $x$ penalty points, and another get a "B" with $x+1$ penalty points). Luca found discontinuous jumps in restaurant sales that follow the discontinuous jumps in the ratings around the rounding thresholds. This finding strongly suggested that changes in ratings (e.g., from just below a rounding threshold to just above a rounding threshold) can have significant causal impact on restaurant demand.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&amp;nbsp;&lt;b&gt;Online word of mouth and firms&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Michael Trusov, Randolph E. Bucklin, and Koen Pauwels in their 2009 paper "&lt;a href="http://bear.warrington.ufl.edu/weitz/mar7786/Articles/Trusov%20et%20al%202009%20social%20network.pdf"&gt;Effects of Word-of-Mouth Versus Traditional Marketing: Findings from an Internet Social Networking Site&lt;/a&gt;" compared the effects of word-of-mouth marketing versus traditional marketing, as judged from the member growth at an Internet social networking site. They found that WOM referrals (i.e., invitations) not only produce a substantially higher short-term response, but also have substantially longer carryover effects in the long run than traditional marketing actions (e.g., promotion events, media appearances).&lt;/li&gt;
&lt;li&gt;&amp;nbsp;David Godes and Dina Mayzlin, in their 2009 paper "&lt;a href="http://dx.doi.org/10.1287/mksc.1080.0444"&gt;Firm-Created Word-of-Mouth Communication: Evidence from a Field Test&lt;/a&gt;" examined how a firm should try to create useful word-of-mouth. They looked at who creates WOM and what kind WOM and matters. They found that for a product with a low initial awareness level, WOM that is most effective at driving sales is created by less loyal (not highly loyal) customers and occurs between acquaintances (not friends). They also found that although "opinion leadership" is useful in identifying potentially effective spreaders of WOM among very loyal customers, it is less useful for the sample of less loyal customers.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Jackie Y. Luan and Scott Neslin, in their paper "&lt;a href="http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1462336"&gt;The Development and Impact of Consumer Word of Mouth in New Product Diffusion&lt;/a&gt;" focused on how word-of-mouth (WOM) influences new product adoption in the video game market. Specifically, they were able to measure how effectively firms' marketing efforts generate WOM (buzz) and to determine whether WOM influences product adoption primarily through an informative role (i.e., helping the consumer learn product quality) or a persuasive role (i.e., exerting a direct impact on sales, for example, by increasing awareness).&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
If you have any other papers that you think that should be included in the list, please add your recommendation in the comments, together with a brief description of the conceptual and methodological contribution of the paper.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1453769556608044343?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=ml840wQUkDk:-iSnrldUYzs:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=ml840wQUkDk:-iSnrldUYzs:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=ml840wQUkDk:-iSnrldUYzs:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/ml840wQUkDk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1453769556608044343/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/08/impact-of-online-reviews-annotated.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1453769556608044343?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1453769556608044343?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/ml840wQUkDk/impact-of-online-reviews-annotated.html" title="The impact of online reviews: An annotated bibliography" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/08/impact-of-online-reviews-annotated.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0YDRHs9eCp7ImA9WhdSFkk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-7314073683116632672</id><published>2011-07-25T18:28:00.002-04:00</published><updated>2011-07-25T22:59:35.560-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-25T22:59:35.560-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="reputation" /><title>Native vs Grapevine Reputation on MTurk</title><content type="html">The Mechanical Turk blog has a new entry today, by Sharon (Chiarella), titled &lt;a href="http://mechanicalturk.typepad.com/blog/2011/07/cooking-with-sharon-tip-3-manage-your-reputation.html"&gt;"Cooking with Sharon" &amp;amp; Tip #3 Manage Your Reputation&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
In the article, Sharon encourages requesters to do the following:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;i&gt;&lt;b&gt;Pay well&lt;/b&gt; - Don’t be fooled into underpaying Workers by comparing your HITs to low priced HITs that aren’t being completed.&lt;/i&gt;&lt;/li&gt;
&lt;li&gt;&lt;i&gt;&lt;b&gt;Pay fairly&lt;/b&gt; – Don’t reject an Assignment unless you’re SURE it’s the Worker who is wrong.&lt;/i&gt;&lt;/li&gt;
&lt;li&gt;&lt;i&gt;&lt;b&gt;Pay quickly&lt;/b&gt; – If you approve or reject Assignments once a week, Workers may do a few HITs and then wait to see if they are paid before doing more.  This is especially true if you’re a new Requester and haven’t established your reputation yet.&lt;/i&gt;&lt;/li&gt;
&lt;/ul&gt;Sharon then explains that workers do talk with each other in the forums, on Turkopticon, and so on, and collectively establish the reputation of the requester based on these factors.&amp;nbsp;While there is nothing wrong with this "grapevine"-based reputation, it also illustrates some obvious features that the Mechanical Turk platform is missing.&lt;br /&gt;
&lt;br /&gt;
Instead of outsourcing the task to third-party forums, Amazon should provide features that make the reputation of the requester more transparent, visible, and objective.&lt;br /&gt;
&lt;br /&gt;
For example, each requester could have a profile, in which the workers can see:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;The total number of HITs, and rewards posted by the requester&lt;/li&gt;
&lt;li&gt;The rejection rate for the requester&lt;/li&gt;
&lt;li&gt;The distribution of working time for the HITs of the requester&lt;/li&gt;
&lt;li&gt;The effective hourly wage for the tasks completed for the requester&lt;/li&gt;
&lt;li&gt;The payment lag from completion of the task until payment&lt;/li&gt;
&lt;/ul&gt;These are all elements that workers would find useful. They are statistics that contribute to the transparency of the market, and their objective nature&amp;nbsp;makes the establishment of reputation much faster. Such objective characteristics are complementing the more subjective features used in the the grapevine-based reputation systems (Turker Nation, Turkopticon, etc), where only a subset of workers contribute and measure personal perceptions (e.g., was this task "well-paid" or not?). Of course, subjective reputation systems will continue to play their role, providing information that cannot be easily quantified. But they should not be the only reputation signal for the market.&lt;br /&gt;
&lt;br /&gt;
Could there be side-effects if such a system is deployed? Yes. I can see some cases where this profile can introduce strange incentives in the market. (For example, it may be good to have a few of my tasks spammed and still pay immediately for the results, so that I can have high acceptance rate, HITs that require only a little bit of time to be completed, and show a high hourly wage.) But these are just details that can be addressed. There is no way that overall the market could suffer when such statistics become publicly available. &lt;span class="Apple-style-span" style="font-size: x-small;"&gt;(Sorry Mr \$0.23/hr-requester, you are not &lt;i&gt;that &lt;/i&gt;valuable.)&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
Markets operate based on trust and are better with increased information efficiency. Any step towards this direction is a good step for the market participants and, by extension, for the market owner.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-7314073683116632672?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=IHN26gtHr3E:7Z9-uy4R95A:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=IHN26gtHr3E:7Z9-uy4R95A:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=IHN26gtHr3E:7Z9-uy4R95A:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/IHN26gtHr3E" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/7314073683116632672/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/07/native-vs-grapevine-requester.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/7314073683116632672?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/7314073683116632672?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/IHN26gtHr3E/native-vs-grapevine-requester.html" title="Native vs Grapevine Reputation on MTurk" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/07/native-vs-grapevine-requester.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8MRHs5eCp7ImA9WhdSFUw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3925690082343510518</id><published>2011-07-22T18:57:00.002-04:00</published><updated>2011-07-24T09:08:05.520-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-24T09:08:05.520-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="evaluation" /><category scheme="http://www.blogger.com/atom/ns#" term="education" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="teaching" /><category scheme="http://www.blogger.com/atom/ns#" term="cheating" /><category scheme="http://www.blogger.com/atom/ns#" term="incentives" /><title>A tale about parking</title><content type="html">The media attention to my &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/07/why-i-will-never-pursue-cheating-again.html"&gt;prior blog post &lt;/a&gt;was really not something that I enjoyed. Not so much for the attention itself but for focusing on exactly the wrong issues. That post was NOT about me and my evaluation. This is not the main point. I thought that the salary issue was worth mentioning (apparently, it was not) but it was, indeed, a MINOR part of the issue. &lt;br /&gt;
&lt;br /&gt;
In fact, after reflecting on this point, I realized the following: &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;Even if I had received a $1M bonus from NYU for my efforts, the basic problem would still be there: the teaching experience would degenerate into a witch hunt, focusing on cheating, instead of being about learning&lt;/span&gt;.&lt;/b&gt; And yes, I would still write the same blog post even if I were fully satisfied with my annual evaluation. &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In fact, the blog post was in my folder of draft posts for a few months now, long before receiving my annual evaluation.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
If you want a a parallel, consider this hypothetical story:&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;A tale about parking&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Suppose that you live in a city with a huge traffic problem, and a resulting huge parking problem. Too many cars on the street.&lt;br /&gt;
&lt;br /&gt;
People try to find parking and they drive around, drive around. A lot. Some drivers get frustrated and they double park. Some drivers are stupid enough to double park during rush hour, block the traffic, and leave the car unattended. As expected, the police arrives and assigns a ticket to the offender, sometimes taking the car as well. However, during quiet hours, when there is no traffic many drivers double park, but they do not block the traffic, and nobody gives them a ticket.&lt;br /&gt;
&lt;br /&gt;
Suddenly, in one neighborhood only, call it Redwich Village, a lone policeman starts assigning tickets for every parking violation. No matter if it is minor or major. No matter if the driver just stepped out, or if it is the first time that the driver double parked. Zero-tolerance policy.&lt;br /&gt;
&lt;br /&gt;
By doing that, and being more vigilant, our lone policeman assigns 10 times more tickets that before. By doing that, he also lost countless hours fighting with the offenders. This continuous fight, also annoys some other residents of the neighborhood that want the policeman to focus on policing the neighborhood, and not spend all his time giving parking tickets.&lt;br /&gt;
&lt;br /&gt;
But even our lone policeman gets frustrated: he realizes that he did not become a policeman to give parking tickets. While it is part of his duties, he feels that it is just better not to be so aggressive. His boss also gets a report that many neighborhood residents are annoyed. His boss knows that the complaints are due to the zero-tolerance policy on parking tickets. So he says that he would like our lone policeman to both continue this&amp;nbsp;idiosyncratic&amp;nbsp;zero-tolerance policy enforced just by our lone policeman, and be as diligent with his other duties as before.&lt;br /&gt;
&lt;br /&gt;
Our lone policeman goes on and reflects on the overall experience. He realizes that he is fighting a losing battle. As the number of cars increase in the city, there will be more people parking illegally.&lt;br /&gt;
&lt;br /&gt;
So, our lone policeman suggests that we need to do something more fundamental about the parking problem: He suggests that people could carpool, use bicycles, mass transit, or simply walk. And he asks for people to think of more such alternatives. If there are less cars in the city, the problem will be resolved.&lt;br /&gt;
&lt;br /&gt;
He describes all his thoughts in his blog, in a long post, titled "Why I will never give parking tickets again." He describes the futility of parking tickets to fight the underlying problem, and vows never to be so vigilant about parking tickets. He will be as vigilant as all the other policemen, which is as vigilant as he was before. &lt;br /&gt;
&lt;br /&gt;
His blog post goes viral. Media pick up fragments, everyone reads whatever they want to read. Some headlines:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;"Parking tickets in Redwich Village increase by 1000%. Is it impossible to park your car in Redwich?"&lt;/li&gt;
&lt;li&gt;"Parking-related violations skyrocket in&amp;nbsp;Redwich&amp;nbsp;Village. Policeman punished for enforcing the rules."&lt;/li&gt;
&lt;li&gt;"RedWich Village sucks. Only scumbags live in RedWich Village, what did you expect? Any lawful behavior?"&lt;/li&gt;
&lt;li&gt;"Stupid city residents: We know that all people that live in cities are cheaters and park illegally"&lt;/li&gt;
&lt;li&gt;"Why the government does not reward this honest policeman?"&lt;/li&gt;
&lt;li&gt;"Why this policeman is vowing not to obey the law? Oh the society..."&lt;/li&gt;
&lt;/ul&gt;Now, some of the business owners of Redwich Village are annoyed because people may not drive to Redwich, if they think it is impossible to find parking. Some residents are also annoyed because real estate prices may go down if people believe that Redwich is a place where you cannot park your car. After all, it is all a matter of reputation.&lt;br /&gt;
&lt;br /&gt;
And in this bruhaha, nobody pays any attention to the underlying problem. Is increased vigilance the solution to the parking problem? Should we give more tickets? Should we install cameras? Or should we try to follow the suggestions of our lone policeman and think of other ways to reduce traffic, and therefore resolve the parking problem on a more fundamental level?&lt;br /&gt;
&lt;br /&gt;
The blog post of our lone policeman is neither about the policeman nor about Redwich. It is about the fact that there is too much traffic in the whole city. Which in turn causes the parking problem. Parking scarcity is the symptom, not the real problem. And while he wrote about the traffic problem and suggested solutions, 99% of the coverage was about Rewich and about his own evaluation.&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
This is exactly how the discussion about cheating evolved in the media. Instead of focusing on how to make student evaluation objective and cheating-proof, the discussion focused on whether my salary went sufficiently up or not. &lt;b&gt;This is not the main point.&lt;/b&gt; It is not even a minor point, in reflection. The real question is on how we can best evaluate our students and which evaluation strategies are robust to cheating, encourage creativity, and evaluate true learning.&lt;br /&gt;
&lt;br /&gt;
And this is not a discussion that can be done while screaming.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3925690082343510518?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=JR2kmAeQIOc:wrg_chuh7TI:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=JR2kmAeQIOc:wrg_chuh7TI:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=JR2kmAeQIOc:wrg_chuh7TI:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/JR2kmAeQIOc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3925690082343510518/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/07/tale-about-parking.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3925690082343510518?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3925690082343510518?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/JR2kmAeQIOc/tale-about-parking.html" title="A tale about parking" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/07/tale-about-parking.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEMEQXk_fSp7ImA9WhdSE0U.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6670828593372886603</id><published>2011-07-17T17:30:00.028-04:00</published><updated>2011-07-22T20:53:20.745-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-22T20:53:20.745-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="evaluation" /><category scheme="http://www.blogger.com/atom/ns#" term="education" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="teaching" /><category scheme="http://www.blogger.com/atom/ns#" term="cheating" /><category scheme="http://www.blogger.com/atom/ns#" term="incentives" /><title>Why I will never pursue cheating again</title><content type="html">The post is temporarily removed. I will restore it after ensuring that there are no legal liabilities for myself or my employer.&lt;br /&gt;
&lt;br /&gt;
Until then, you can read my commentary in my new blog post: &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/07/tale-about-parking.html"&gt;A tale about parking.&lt;/a&gt;&lt;br /&gt;
&lt;br /&gt;
The discussion on &lt;a href="http://hackerne.ws/item?id=2774254"&gt;Hacker News&lt;/a&gt; was good as well.&amp;nbsp;Also see the response that I posted at the &lt;a href="http://www.businessinsider.com/nyu-professor-class-cheating-2011-7#comment-4e273a85cadcbb434e020000"&gt;Business Insider&lt;/a&gt; website and the coverage at &lt;a href="http://www.insidehighered.com/news/2011/07/22/nyu_professor_s_blog_post_sets_off_debate_on_plagiarism"&gt;Inside Higher Education&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6670828593372886603?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Y1HiH9ebOpU:bvNlYmY6qYc:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Y1HiH9ebOpU:bvNlYmY6qYc:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Y1HiH9ebOpU:bvNlYmY6qYc:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/Y1HiH9ebOpU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6670828593372886603/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/07/why-i-will-never-pursue-cheating-again.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6670828593372886603?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6670828593372886603?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/Y1HiH9ebOpU/why-i-will-never-pursue-cheating-again.html" title="Why I will never pursue cheating again" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/07/why-i-will-never-pursue-cheating-again.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0MDRXo8cCp7ImA9WhZaEUU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-2169164950293332365</id><published>2011-06-26T12:57:00.008-04:00</published><updated>2011-06-27T09:31:14.478-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-27T09:31:14.478-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="newsweek" /><category scheme="http://www.blogger.com/atom/ns#" term="minimum wage" /><category scheme="http://www.blogger.com/atom/ns#" term="tutorial" /><category scheme="http://www.blogger.com/atom/ns#" term="extreme value theory" /><title>Extreme value theory 101, or Newsweek researching minimum wage on Mechanical Turk</title><content type="html">Last week, Newsweek published an article titled &lt;a href="http://www.newsweek.com/2011/06/19/the-real-minimum-wage.html"&gt;The Real Minimum Wage&lt;/a&gt;. The authors report that "&lt;i&gt;in a weeks-long experiment, we posted simple, hourlong jobs (listening to audio recordings and counting instances of a specific keyword) and continually lowered our offer until we found the absolute bottom price that multiple people would accept, and then complete the task&lt;/i&gt;." &lt;br /&gt;
&lt;br /&gt;
The results "showed" that Americans are the ones willing to accept the lowest possible salary for working on a task, compared even to people in India, Romania, Philippines, etc. In fact, they found the that there are Americans willing to work for 25 cents per hour, while they could not find anyone willing to work for less than \$1/hr in any other country. The conclusion of the article? Americans are more desperate than anyone else in the world.&lt;br /&gt;
&lt;br /&gt;
What is the key problem of this study? There are many more US-based workers on Mechanical Turk compared to other nationalities. So, if you have a handful of workers from other countries, and hundreds of workers from the US, you are guaranteed to find more extreme findings for the US. Why? To put it simply, you are searching harder within the US to find small values, compared to the effort placed on other countries. &lt;span class="Apple-style-span" style="font-size: x-small;"&gt; (There are other issues as well, e.g., workers that would work on this task are not necessarily representative of the overall population; the same workers are exposed to multiple, decreasing salaries, issues of anchoring, issues of workers falsely reporting to be from the US, whether the authors checked IP geo-location, etc. While all these are valid concerns, they are secondary to the very basic statistical problem.)&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Finding a Minimum Value: A Probabilistic Approach&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
On an abstract, statistical level, by testing workers from multiple countries, to determine their minimum wage, we sample multiple "minimum wage distributions" trying to find the smallest value within each one of them.&lt;br /&gt;
&lt;br /&gt;
Each probability distribution corresponds to the minimum wages that workers from different countries are willing to accept. Let's call the CDF's of distributions $F_i(x)$, with, say, $F_1(x)$ being the distribution for minimum wages for US, $F_2(x)$ for India, $F_3(x)$ for UK, etc etc.&lt;br /&gt;
&lt;br /&gt;
As an simplifying example, assume that $F(x)$ is a uniform distribution, with minimum value \$0 and a maximum value \$10, &lt;b&gt;for an average acceptable minimum wage of \$5&lt;/b&gt;. This means that:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;10% of the population will accept a minimum wage below \$1, (i.e., $F(\$1)=0.1$)&lt;/li&gt;
&lt;li&gt;20% of the population will accept a minimum wage below \$2, (i.e., $F(\$2)=0.2$)&lt;/li&gt;
&lt;li&gt;...&lt;/li&gt;
&lt;li&gt;90% of the population will accept a minimum wage below \$9, (i.e., $F(\$9)=0.9$)&lt;/li&gt;
&lt;li&gt;100% of the population will accept a minimum wage below \$10, (i.e., $F(\$10)=1.0$)&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
Now, let's assume that we sample $n$ workers from one of the country-specific distributions. After running the experiment, we get back measurements $x_1, \ldots, x_n$, each one corresponding to the minimum wage for each of the workers that participated in the study, who comes from the country that we are measuring.&lt;br /&gt;
&lt;br /&gt;
What is the probability of one of these wages being below, say, $z=\$0.25$? Here is the probability calculation:&lt;br /&gt;
&lt;br /&gt;
$\begin{eqnarray}&lt;br /&gt;
Pr(\mathit{min~wage} &amp;lt; z) &amp;amp;=&amp;amp; 1 - Pr(\mathit{all~wages} \geq z)\\&lt;br /&gt;
&amp;amp; =&amp;amp; 1 - Pr(x_1 \geq z, \ldots, x_n \geq z)&lt;br /&gt;
\end{eqnarray}$&lt;br /&gt;
&lt;br /&gt;
Assuming independence across the sampled values, we have:&lt;br /&gt;
&lt;br /&gt;
$\begin{eqnarray}&lt;br /&gt;
Pr(\mathit{min~wage} &amp;lt; z) &amp;amp;=&amp;amp; 1 - \prod_{i=1}^n Pr(x_i \geq z) \\&lt;br /&gt;
&amp;amp; =&amp;amp; 1 - \left(1 - F(z) \right)^n&lt;br /&gt;
\end{eqnarray}$&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
So, if we sample $n$ workers, set the minimum wage at $z=0.25$ , and assume uniform distribution for $F$, then $F(\$0.25)=0.025$ and the probability that we will find at least one worker willing to work for 25 cents is:&lt;br /&gt;
&lt;br /&gt;
$Pr(\mathit{min~wage} &amp;lt; z) = 1 - 0.975^n$&lt;br /&gt;
&lt;br /&gt;
Plotting this, as a function of $n$, we have the following:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-jhK21E6rq0g/Tgcy4q1sQlI/AAAAAAAAs80/QKYLVhzW8h4/s1600/extreme-value-theory.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="253" src="http://3.bp.blogspot.com/-jhK21E6rq0g/Tgcy4q1sQlI/AAAAAAAAs80/QKYLVhzW8h4/s400/extreme-value-theory.gif" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;b&gt;As we get more and more workers, the more likely it is to find a value that will be at or below 25 cents/hour. &lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, how this approach explains the findings of Newsweek?&lt;br /&gt;
&lt;br /&gt;
We know that all countries are not equally represented on Mechanical Turk. Most workers are from the US (50% or so), followed by India (35% or so), and then by Canada (2%), UK (2%), Philippines (2%), and a variety of other countries with similarly small percentages. This means that in the study, we expect to have more Americans participating, followed by Indians, and then a variety of other countries. So, even if the distribution of minimum wages was identical across all countries, we expect to find lower wages in the country with the largest number of participants.&lt;br /&gt;
&lt;br /&gt;
Since the majority of the workers on Mechanical Turk are from US, followed by India, followed by Canada, and UK, etc, &lt;b&gt;the illustration by Newsweek simply gives us the country of origin of the workers, in reverse order of popularity!&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-lTDFUn167jw/Tgc0YC46ZCI/AAAAAAAAs84/-4IQ3IVRSwQ/s1600/Illustration-by-Newsweek.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="266" src="http://3.bp.blogspot.com/-lTDFUn167jw/Tgc0YC46ZCI/AAAAAAAAs84/-4IQ3IVRSwQ/s400/Illustration-by-Newsweek.jpg" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
At this point, someone may ask: what happens if the distribution is not uniform but, say, lognormal? (A much more plausible distribution for minimum acceptable wages.) For this specific question, as you can see from the analysis above, this does not make much of a difference: The only thing that we need to know if the value of $F(z)$ for the $z$ value of interest.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Going in depth: Extreme Value Theory&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
A more general question is: What is the expected maximum (or minimum) value that we expect to find when we sample from an arbitrary distribution? This is the topic of &lt;a href="http://en.wikipedia.org/wiki/Extreme_value_theory"&gt;extreme value theory&lt;/a&gt;, a field in statistics that tries to predict the probability of extreme events (e.g., what is the possible biggest possible drop in the stock market? what is the biggest rainfall in this region?) Given the events in the financial markets in 2008, this theory has received significant attention in the last few years.&lt;br /&gt;
&lt;br /&gt;
What is nice about this theory is that the fundamentals can be summarized very succinctly. The &lt;a href="http://en.wikipedia.org/wiki/Fisher%E2%80%93Tippett%E2%80%93Gnedenko_theorem"&gt;Fisher–Tippett–Gnedenko theorem&lt;/a&gt; states that, if we sample from a distribution, the maximum values that we expect to find will be a random variable, belonging to one of the three distributions:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;If the distribution from which we are sampling has a &lt;b&gt;tail that decreases exponentially&lt;/b&gt; (e.g., normal distribution, exponential, Gamma, etc), then the maximum value is described by the (reversed) &lt;a href="http://en.wikipedia.org/wiki/Gumbel_distribution"&gt;Gumbel distribution&lt;/a&gt; (aka "type I extreme value distribution")&lt;/li&gt;
&lt;li&gt;If the distribution from which we are sampling has a &lt;b&gt;tail that decreases as a polynomial (i.e., has a "long tail")&lt;/b&gt; (e.g., power-laws, Cauchy, Student-t, etc), then the maximum value is described by the &lt;a href="http://en.wikipedia.org/wiki/Fr%C3%A9chet_distribution"&gt;Frechet distribution&lt;/a&gt; (aka "type II extreme value distribution")&lt;/li&gt;
&lt;li&gt;If the distribution from which we are sampling has a &lt;b&gt;tail that is finite (i.e., has a "short tail")&lt;/b&gt; (e.g., uniform, Beta, etc), then the maximum follows the  (reversed) &lt;a href="http://en.wikipedia.org/wiki/Weibull_distribution"&gt;Weibull distribution&lt;/a&gt; (aka "type III extreme value distribution")&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
The three types of the distributions are all special cases of the &lt;a href="http://en.wikipedia.org/wiki/Generalized_extreme_value_distribution"&gt;generalized extreme value distribution&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
This theory has significant applications not only when modeling risk (stock market, weather, earthquakes, etc), but also when modeling decision-making for humans: Often, we model humans as utility maximizers, who are making decisions that maximize their own well-being. This maximum-seeking behavior results often in the distributions described above. I will give a more detailed description in a later blog post.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-2169164950293332365?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=SSLObarhYKo:TTWA9sVtdvw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=SSLObarhYKo:TTWA9sVtdvw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=SSLObarhYKo:TTWA9sVtdvw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/SSLObarhYKo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/2169164950293332365/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/06/extreme-value-theory-101-or-newsweek.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2169164950293332365?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2169164950293332365?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/SSLObarhYKo/extreme-value-theory-101-or-newsweek.html" title="Extreme value theory 101, or Newsweek researching minimum wage on Mechanical Turk" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-jhK21E6rq0g/Tgcy4q1sQlI/AAAAAAAAs80/QKYLVhzW8h4/s72-c/extreme-value-theory.gif" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/06/extreme-value-theory-101-or-newsweek.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0ENQ30zcCp7ImA9WhdXFE8.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-5585467094888505184</id><published>2011-06-24T12:29:00.004-04:00</published><updated>2011-08-27T03:21:32.388-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-27T03:21:32.388-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="academia" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><category scheme="http://www.blogger.com/atom/ns#" term="hcomp" /><title>Accepted papers for the 3rd Human Computation Workshop (HCOMP 2011)</title><content type="html">We have &lt;a href="http://www.humancomputation.com/Program.html"&gt;posted online the schedule&lt;/a&gt; for the &lt;a href="http://www.humancomputation.com/Welcome.html"&gt;3rd Human Computation Workshop (HCOMP 2011)&lt;/a&gt;, which will be organized as part of &lt;a href="http://www.aaai.org/Conferences/AAAI/aaai11.php"&gt;AAAI 2011&lt;/a&gt;, in San Francisco, on August 8th. The &lt;a href="https://www.aaai.org/Forms/aaai-registration-form.php"&gt;registration fee&lt;/a&gt; for participating in the workshop is a pretty modest \$125 for graduate students, and \$155 for other participants. Just make sure to register before July 1st to get these rates, as afterwards the rates jump to \$165 and \$185. I should also mention that, following the tradition established in Paris in HCOMP 2009, we will have a group dinner for all the participants after the workshop to continue the discussions from the day...&lt;br /&gt;
&lt;br /&gt;
We have a strong program, with 16 long papers accepted, and 16 papers being presented as demos and posters. Below you can find the titles of the papers and their abstracts. The PDF versions of the papers &lt;s&gt;will be posted online by AAAI, after the completion of the conference&lt;/s&gt; &lt;a href="http://www.aaai.org/Library/Workshops/ws11-11.php"&gt;are available through the AAAI Digital Library&lt;/a&gt;. Until then, you can search Google, or just ask the authors for a pre-print. So, if you are interested in crowdsourcing and human computation, we hope to see you there in San Francisco in August!&lt;br /&gt;
&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;Long Papers&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;Large-Scale Live Active Learning: Training Object Detectors with Crawled Data and Crowds&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Sudheendra Vijayanarasimhan, Kristen Grauman (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Active learning and crowdsourcing are promising ways to efficiently build up training sets for object recognition, but thus far techniques are tested in artificially controlled settings. Typically the vision researcher has already determined the dataset's scope, the labels ``actively" obtained are in fact already known, and/or the crowd-sourced collection process is iteratively fine-tuned. We present an approach for *live learning* of object detectors, in which the system autonomously refines its models by actively requesting crowd-sourced annotations on images crawled from the Web. To address the technical issues such a large-scale system entails, we introduce a novel part-based detector amenable to linear classifiers, and show how to identify its most uncertain instances in sub-linear time with a hashing-based solution. We demonstrate the approach with experiments of unprecedented scale and autonomy, and show it successfully improves the state-of-the-art for the most challenging objects in the PASCAL benchmark. In addition, we show our detector competes well with popular nonlinear classifiers that are much more expensive to train.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Robust Active Learning using Crowdsourced Annotations for Activity Recognition&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Liyue Zhao, Gita Sukthankar (UCF); Rahul Sukthankar (Google Research/CMU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Recognizing human activities from wearable sensor data is an important problem, particularly for health and eldercare applications. However, collecting sufficient labeled training data is challenging, especially since interpreting IMU traces is difficult for human annotators. Recently, crowdsourcing through services such as Amazon's Mechanical Turk has emerged as a promising alternative for annotating such data, with active learning serving as a natural method for affordably selecting an appropriate subset of instances to label. Unfortunately, since most active learning strategies are greedy methods that select the most uncertain sample, they are very sensitive to annotation errors (which corrupt a significant fraction of crowdsourced labels). This paper proposes methods for robust active learning under these conditions. Specifically, we make three contributions: 1) we obtain better initial labels by asking labelers to solve a related task; 2) we propose a new principled method for selecting instances in active learning that is more robust to annotation noise; 3) we estimate confidence scores for labels acquired from MTurk and ask workers to relabel samples that receive low scores under this metric. The proposed method is shown to significantly outperform existing techniques both under controlled noise conditions and in real active learning scenarios. The resulting method trains classifiers that are close in accuracy to those trained using ground-truth data.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Beat the Machine: Challenging workers to find the unknown unknowns&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Josh Attenberg, Panos Ipeirotis, Foster Provost (NYU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
This paper presents techniques for gathering data that expose errors of automatic classification models. Prior work has demonstrated the promise of having humans seek training data, as an alternative to active learning, in cases where there is extreme class imbalance. We now explore the direction where we ask humans to identify cases what will cause the classification system to fail. Such techniques are valuable in revealing problematic cases that do not reveal themselves during the normal operation of the system, and may include cases that are rare but catastrophic. We describe our approach for building a system to satisfy this requirements, trying to encourage humans to provide us with such data points. In particular, we reward a human when the provided example is difficult for the model to handle, and the reward is proportional to the magnitude of the error. In a sense, the humans are asked to ''Beat the Machine'' and find cases where the automatic model (''the machine'') is wrong. Our experimental data show that the density of the identified problems is an order of magnitude higher compared to alternative approaches, and that the proposed technique can identify quickly the ``big flaws'' that would typically remain uncovered.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Human Intelligence Needs Artificial Intelligence&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Daniel Weld, Mausam Mausam, Peng Dai (University of Washington)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing platforms, such as Amazon Mechanical Turk, have enabled the construction of scalable applications for tasks ranging from product categorization and photo tagging to audio transcription and translation. These vertical applications are typically realized with complex, self-managing workflows that guarantee quality results. But constructing such workflows is challenging, with a huge number of alternative decisions for the designer to consider. Artificial intelligence methods can greatly simplify the process of creating complex crowdsourced workflows. We argue this thesis by presenting the design of TurKontrol 2.0, which uses machine learning to continually refine models of worker performance and task difficulty. Using these models, TurKontrol 2.0 uses decision-theoretic optimization to 1) choose between alternative workflows, 2) optimize parameters for a workflow, 3) create personalized interfaces for individual workers, and 4) dynamically control the workflow. Preliminary experience suggests that these optimized workflows are significantly more economical than those generated by humans.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Worker Motivation in Crowdsourcing and Human Computation&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Nicolas Kaufmann; Thimo Schulze (University of Mannheim)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Many human computation systems use crowdsourcing markets like Amazon Mechanical Turk to recruit human workers. The payment in these markets is usually very low, and still collected demographic data shows that the participants are a very diverse group including highly skilled full time workers. Many existing studies on their motivation are rudimental and not grounded on established motivation theory. Therefore, we adapt different models from classic motivation theory, work motivation theory and Open Source Software Development to crowdsourcing markets. The model is tested with a survey of 431 workers on Mechanical Turk. We find that the extrinsic motivational categories (immediate payoffs, delayed payoffs, social motivation) have a strong effect on the time spent on the platform. For many workers, however, intrinsic motivation aspects are more important, especially the different facets of enjoyment based motivation like “task autonomy” and “skill variety”. Our contribution is a preliminary model based on established theory intended for the comparison of different crowdsourcing platforms.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Honesty in an Online Labor Market&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Winter Mason, Siddharth Suri, Daniel Goldstein (Yahoo! Research)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The efficient functioning of markets and institutions assume a certain degree of honesty from participants. In labor markets, for instance, employers benefit from employees who will render meaningful work, and employees benefit from employers who will pay the promised amount for services rendered. We use an established method for detecting dishonest behavior in a series of experiments conducted on \amt, a popular online labor market. Our first experiment estimates a baseline amount of dishonesty for this task in the population sample. The second experiment tests the hypothesis that the level of dishonesty in the population will be sensitive to the relative amount that can be gained by dishonest reporting, and the third experiment, manipulates the degree to which dishonest reporting can be detected at the individual level. We conclude with a demographic and cross-cultural analysis of the predictors of dishonest reporting in this market.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Building a Persistent Workforce on Mechanical Turk for Multilingual Data Collection&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;David Chen (UT Austin); William Dolan (Microsoft Research)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Traditional methods of collecting translation and paraphrase data are prohibitively expensive, making constructions of large, new corpora difficult. While crowdsourcing offers a cheap alternative, quality control and scalability can become problematic. We discuss a novel annotation task that uses videos as the stimulus which discourages cheating. It also only requires monolingual speakers, thus making it easier to scale since more workers are qualified to contribute. Finally, we employed a multi-tiered payment system that helps retain good workers over the long-term, resulting in a persistent, high-quality workforce. We present the results of one of the largest linguistic data collection efforts using Mechanical Turk, yielding 85K English sentences and more than 1k sentences for each of a dozen more languages. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;CrowdSight: Rapidly Prototyping Intelligent Visual Processing Apps&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Mario Rodriguez (UCSC); James Davis&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We describe a framework for rapidly prototyping applications which require intelligent visual processing, but for which there does not yet exist reliable algorithms, or for which engineering those algorithms is too costly. The framework, CrowdSight, leverages the power of crowdsourcing to offload intelligent processing to humans, and enables new applications to be built quickly and cheaply, affording system builders the opportunity to validate a concept before committing significant time or capital. Our service accepts requests from users either via email or simple mobile applications, and handles all the communication with a backend human computation platform. We build redundant requests and data aggregation into the system freeing the user from managing these requirements. We validate our framework by building several test applications and verifying that prototypes can be built more easily and quickly than would be the case without the framework. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Digitalkoot: Making Old Archives Accessible Using Crowdsourcing&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Otto Chrons, Sami Sundell (Microtask)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper, we present Digitalkoot, a system for fixing errors in the Optical Character Recognition (OCR) process of old texts through the use of human computation. By turning the work into simple games, we are able to attract a great number of volunteers to donate their time and cognitive capacity for the cause. Our analysis shows how untrained people can reach very high accuracy through the use of crowdsourcing. Furthermore we analyze the effect of social media and gender on participation levels and the amount of work accomplished. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Error Detection and Correction in Human Computation: Lessons from the WPA&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;David Alan Grier (GWU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Human Computation is, of course, a very old field with a forgotten literature that treats many of the key problems, especially error detection and correction. The obvious methods of error detection, duplicate calculation, have proven to be subject to Babbage's Rule: Different workers using the same methods on the same data will tend to make the same errors. To avoid the consequences of this rule, early human computers developed a disciplined regimen to identify and correct mistakes. This paper reconstructs those methods, puts them in a modern context and identifies their implications for the modern version of human computation. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Programmatic gold: targeted and scalable quality assurance in crowdsourcing&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Dave Oleson, Vaughn Hester, Alex Sorokin, Greg Laughlin, John Le, Lukas Biewald (CrowdFlower)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing is an effective tool for scalable data annotation in both research and enterprise contexts. Due to crowdsourcing's open participation model, quality assurance is critical to the success of any project. Present methods rely on EM-style post-processing or manual annotation of large gold standard sets. In this paper we present an automated quality assurance process that is inexpensive and scalable. Our novel process relies on programmatic gold creation to provide targeted training feedback to workers and to prevent common scamming scenarios. We find that it decreases the amount of manual work required to manage crowdsourced labor while improving the overall quality of the results. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;An Iterative Dual Pathway Structure for Speech-to-Text Transcription&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Beatrice Liem, Haoqi Zhang, Yiling Chen (Harvard University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper, we develop a new human computation algorithm for speech-to-text transcription that can potentially achieve the high accuracy of professional transcription using only microtasks deployed via an online task market or a game. The algorithm partitions audio clips into short 10-second segments for independent processing and joins adjacent outputs to produce the full transcription. Each segment is sent through an iterative dual pathway structure that allows participants in either path to iteratively refine the transcriptions of others in their path while being rewarded based on transcriptions in the other path, eliminating the need to check transcripts in a separate process. Initial experiments with local subjects show that produced transcripts are on average 96.6% accurate. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;An Extendable Toolkit for Managing Quality of Human-based Electronic Services&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;David Bermbach, Robert Kern, Pascal Wichmann, Sandra Rath, Christian Zirpins (KIT)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Micro-task markets like Amazon MTurk enable online workers to provide human intelligence as Web-based on demand services (so called people services). Businesses facing large amounts of knowledge work can benefit from increased flexibility and scalability of their workforce but need to cope with reduced control of result quality. While this problem is well recognized, it is so far only rudimentarily addressed by existing platforms and tools. In this paper, we present a flexible research toolkit which enables experiments with advanced quality management mechanisms for generic micro-task markets. The toolkit enables control of correctness and performance of task fulfillment by means of dynamic sampling, weighted majority voting and worker pooling. We demonstrate its application and performance for an OCR scenario building on Amazon MTurk. The toolkit however enables the development of advanced quality management mechanisms for a large variety of people service scenarios and platforms. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;What’s the Right Price? Pricing Tasks for Finishing on Time&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Siamak Faridani, Bjoern Hartmann (UC Berkeley); Panos Ipeirotis (NYU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Many practitioners currently use rules of thumb to price tasks on online labor markets. Incorrect pricing leads to task starvation or inefficient use of capital. Formal optimal pricing policies can address these challenges. In this paper we argue that an optimal pricing policy must be based on the tradeoff between price and desired completion time. We show how this duality can lead to a better pricing policy for tasks in online labor markets. This paper makes three contributions. First, we devise an algorithm for optimal job pricing using a survival analysis model. We then show that worker arrivals can be modeled as a non-homogenous Poisson Process (NHPP). Finally using NHPP for worker arrivals and discrete choice models we present an abstract mathematical model that captures the dynamics of the market when full market information is presented to the task requester. This model can be used to predict completion times and optimal pricing policies for both public and private crowds. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Pricing Mechanisms for Online Labor Market&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Yaron Singer, Manas Mittal (UC Berkeley EECS)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In online labor markets, determining the appropriate incentives is a difficult problem. In this paper, we present dynamic pricing mechanisms for determining the optimal prices for such tasks. In particular, the mechanisms are designed to handle the intricacies of the markets like mechanical turk (workers are coming online, requesters have budgets, etc.). The mechanisms have desirable theoretical guarantees (incentive compatibility, budget feasibility, and competitive ration performance) and perform well in practice. Experiments demonstrate the effectiveness and feasibility of using such mechanisms in practice. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Labor Allocation in Paid Crowdsourcing: Experimental Evidence on Positioning, Nudges and Prices&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;John Horton (ODesk); Dana Chandler (MIT)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
This paper reports the results of a natural field experiment where workers from a paid crowdsourcing environment self-select into tasks and are presumed to have limited attention. In our experiment, workers labeled any of six pictures from a 2 x 3 grid of thumbnail images. In the absence of any incentives, workers exhibit a strong default bias and tend to select images from the top-left (``focal'') position; the bottom-right (``non-focal'') position, was the least preferred. We attempted to overcome this bias and increase the rate at which workers selected the least preferred task, by using a combination of monetary and non-monetary incentives. We also varied the saliency of these incentives by placing them in either the focal or non-focal position. Although both incentive types caused workers to re-allocate their labor, monetary incentives were more effective. Most interestingly, both incentive types worked better when they were placed in the focal position and made more salient. In fact, salient non-monetary incentives worked about as well as non-salient monetary ones. Our evidence suggests that user interface and cognitive biases play an important role in online labor markets and that salience can be used by employers as a kind of ``incentive multiplier''. &lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;hr align="center" style="background-color: #691f01; border: 1px; color: #691f01; display: block; height: 2px;" width="50%" /&gt;&lt;br /&gt;
&lt;b&gt;Posters&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;Developing Scripts to Teach Social Skills: Can the Crowd Assist the Author?&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Fatima Boujarwah, Jennifer Kim, Gregory Abowd, Rosa Arriaga (Georgia Tech)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The social world that most of us navigate effortlessly can prove to be a perplexing and disconcerting place for individuals with autism. Currently there are no models to assist non-expert authors as they create customized social script-based instructional modules for a particular child. We describe an approach to using human computation to develop complex models of social scripts for a plethora of complex and interesting social scenarios, possible obstacles that may arise in those scenarios, and potential solutions to those obstacles. Human input is the natural way to build these models, and in so doing create valuable assistance for those trying to navigate the intricacies of a social life.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;CrowdLang - First Steps Towards Programmable Human Computers for General Computation&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Patrick Minder, Abraham Bernstein (University of Zurich)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing markets such as Amazon’s Mechanical Turk provide an enormous potential for accomplishing work by combining human and machine computation. Today crowdsourcing is mostly used for massive parallel information processing for a variety of tasks such as image labeling. However, as we move to more sophisticated problem-solving there is little knowledge about managing dependencies between steps and a lack of tools for doing so. As the contribution of this paper, we present a concept of an executable, model-based programming language and a general purpose framework for accomplishing more sophisticated problems. Our approach is inspired by coordination theory and an analysis of emergent collective intelligence. We illustrate the applicability of our proposed language by combining machine and human computation based on existing interaction patterns for several general computation problems.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Ranking Images on Semantic Attributes using CollaboRank&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Jeroen Janssens, Eric Postma, Jaap Van den Herik (Tilburg University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper, we investigate to what extent a large group of human workers is able to produce collaboratively a global ranking of images, based on a single semantic attribute. To this end, we developed CollaboRank, which is a method that formulates and distributes tasks to human workers, and aggregates their personal rankings into a global ranking. Our results show that a relatively high consensus can be achieved, depending on the type of the semantic attribute.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Artificial Intelligence for Artificial Artificial Intelligence&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Peng Dai, Mausam, Daniel Weld (University of Washington)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing platforms such as Amazon Mechanical Turk have become popular for a wide variety of human intelligence tasks; however, quality control continues to be a significant challenge. Recently, Dai et al (2010) propose TurKontrol, a theoretical model based on POMDPs to optimize iterative, crowd-sourced workflows. However, they neither describe how to learn the model parameters, nor show its effectiveness in a real crowd-sourced setting. Learning is challenging due to the scale of the model and noisy data: there are hundreds of thousands of workers with high-variance abilities. This paper presents an end-to-end system that first learns TurKontrol's POMDP parameters from real Mechanical Turk data, and then applies the model to dynamically optimize live tasks. We validate the model and use it to control a successive-improvement process on Mechanical Turk. By modeling worker accuracy and voting patterns, our system produces significantly superior artifacts compared to those generated through static workflows using the same amount of money.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;One Step beyond Independent Agreement: A Tournament Selection Approach for Quality Assurance of Human Computation Tasks&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Yu-An Sun, Shourya Roy (Xerox); Greg Little (MIT CSAIL)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Quality assurance remains a key topic in the human computation research field. Prior work indicates that independent agreement is effective for low difficulty tasks, but has limitations. This paper addresses this problem by proposing a tournament selection based quality control process. The experimental results from this paper show that humans are better at identifying the correct answers than generating them.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Turkomatic: Automatic, Recursive Task and Workflow Design for Mechanical Turk&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Anand Kulkarni, Matthew Can, Bjoern Hartmann (UC Berkeley)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
On today’s human computation systems, designing tasks and workflows is a difficult and labor-intensive process. Can workers from the crowd be used to help plan workflows? We explore this question with Turkomatic, a new interface to microwork platforms that uses crowd workers to help plan workflows for complex tasks. Turkomatic uses a general-purpose divide-and-conquer algorithm to solve arbitrary natural-language requests posed by end users. The interface includes a novel real-time visual workflow editor that enables requesters to observe and edit workflows while the tasks are being completed. Crowd verification of work and the division of labor among members of the crowd can be handled automatically by Turkomatic, which substantially simplifies the process of using human computation systems. These features enable a novel means of interaction with crowds of online workers to support successful execution of complex work.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;MuSweeper: Collect Mutual Exclusions with Extensive Game&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Tao-Hsuan Chang, Cheng-wei Chan, Jane Yung-jen Hsu (National Taiwan University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Mutual exclusions are important information for machine learning. Games With A Purpose (or GWAP) provide an effective way to get large amount of data from web users. This research proposes MuSweeper, a minesweeper-like game, to collect mutual exclusions. By embedding game theory into game mechanism, the precision is guaranteed. Experiment showed MuSweeper can efficiently collect mutual exclusions with high precision.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;MobileWorks: A Mobile Crowdsourcing Platform for Workers at the Bottom of the Pyramid&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Prayag Narula, Philipp Gutheim, David Rolnitzky, Anand Kulkarni, Bjoern Hartmann (UC Berkeley)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We present MobileWorks, a mobile phone-based crowdsourcing platform. MobileWorks targets workers in developing countries who live at the bottom of the economic pyramid. This population does not have access to desktop computers, so existing microtask labor markets are inaccessible to them. MobileWorks offers human OCR tasks that can be accomplished on low-end mobile phones; workers access it through their mobile web browser. To address the limited screen resolution available on low-end phones, MobileWorks segments documents into many small pieces, and sends each piece to a different worker. A first pilot study with 10 users over a period of 2 months revealed that it is feasible to do simple OCR tasks using simple Mobile Web based application. We found that on an average the workers do the tasks at 120 tasks per hour. Using single entry the accuracy of workers across the different documents is 89% . We propose a multiple entry solution which increases the theoretical accuracy of the OCR to more than 99%.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Towards Task Recommendation in Micro-Task Markets&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Vamsi Ambati, Stephan Vogel, Jaime Carbonell (CMU)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
As researchers embrace micro-task markets for eliciting human input, the nature of the posted tasks moves from those requiring simple mechanical labor to requiring specific cognitive skills. On the other hand, increase is seen in the number of such tasks and the user population in micro-task market places requiring better search interfaces for productive user participation. In this paper we posit that understanding user skill sets and presenting them with suitable tasks not only maximizes the over quality of the output, but also attempts to maximize the benefit to the user in terms of more successfully completed tasks. We also implement a recommendation engine for suggesting tasks to users based on implicit modeling of skills and interests. We present results from a preliminary evaluation of our system using publicly available data gathered from a variety of human computation experiments recently conducted on Amazon's Mechanical Turk.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;On Quality Control and Machine Learning in Crowdsourcing&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Matthew Lease (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
The advent of crowdsourcing has created a variety of new opportunities for improving upon traditional methods of data collection and annotation. This in turn has created intriguing new opportunities for data-driven machine learning (ML). Convenient access to crowd workers for simple data collection has further generalized to leveraging more arbitrary crowd-based human computation to supplement ML. While new potential applications of crowdsourcing continue to emerge, a variety of practical and sometimes unexpected obstacles have already limited the degree to which its promised potential can be actually realized in practice. This paper considers two particular aspects of crowdsourcing and their interplay, data quality control (QC) and ML, reflecting on where we have been, where we are, and where we might go from here.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;CollabMap: Augmenting Maps using the Wisdom of Crowds&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Ruben Stranders, Sarvapali Ramchurn, Bing Shi, Nicholas Jennings (University of Southampton)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
In this paper we develop a novel model of geospatial data creation, called CollabMap, that relies on human computation. CollabMap is a crowdsourcing tool to get users contracted via Amazon Mechanical Turk or a similar service to perform micro-tasks that involve augmenting existing maps (e.g. GoogleMaps or Ordnance Survey) by drawing evacuation routes, using satellite imagery from GoogleMaps and panoramic views from Google Street View. We use human computation to complete tasks that are hard for a computer vision algorithm to perform or to generate training data that could be used by a computer vision algorithm to automatically define evacuation routes.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Improving Consensus Accuracy via Z-score and Weighted Voting &lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Hyun Joon Jung, Matthew Lease (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We describe a Z-score based outlier detection method for detection and filtering of inaccurate crowd workers. After filtering, we aggregate labels from remaining workers via simple majority voting or feature-weighted voting. Both su-pervised and unsupervised features are used, individually and in combination, for both outlier detection and weighted voting. We evaluate on noisy judgments collected from Amazon Mechanical Turk which assess Websearch relevance of query/document pairs. We find that filtering in combination with multi-feature weighted voting achieves 8.94% relative error reduction for graded accuracy (4.25% absolute) and 5.32% for binary accuracy (3.45% absolute).&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Making Searchable Melodies: Human vs. Machine&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Mark Cartwright, Zafar Rafii, Jinyu Han, Bryan Pardo (Northwestern University)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Systems that find music recordings based on hummed or sung, melodic input are called Query-By-Humming (QBH) systems. Such systems employ search keys that are more similar to a cappella singing than the original recordings. Successful deployed systems use human computation to create these search keys: hand-entered midi melodies or recordings of a cappella singing. Tunebot is one such system. In this paper, we compare search results using keys built from two automated melody extraction system to those gathered using two populations of humans: local paid sing-ers and Amazon Turk workers.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;PulaCloud: Using Human Computation to Enable Development at the Bottom of the Economic Ladder&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Andrew Schriner (University of Cincinnati); Daniel Oerther (Missouri University of Science and Technology); James Uber (University of Cincinnati)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
This research aims to explore how Human Computation can be used to aid economic development in communities experiencing extreme poverty throughout the world. Work is ongoing with a community in rural Kenya to connect them to employment opportunities through a Human Computation system. A feasibility study has been conducted in the community using the 3D protein folding game Foldit and Amazon’s Mechanical Turk. Feasibility has been confirmed and obstacles identified. Current work includes a pilot study doing image analysis for two research projects and developing a GUI that is usable by workers with little computer literacy. Future work includes developing effective incentive systems that operate both at the individual level and the group level and integrating worker accuracy evaluation, worker compensation, and result-credibility evaluation.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Towards Large-Scale Processing of Simple Tasks with Mechanical Turk&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Paul Wais, Shivaram Lingamneni, Duncan Cook, Jason Fennell, Benjamin Goldenberg, Daniel Lubarov, David Marin, Hari Simons (Yelp, inc.)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Crowdsourcing platforms such as Amazon's Mechanical Turk (AMT) provide inexpensive and scalable workforces for processing simple online tasks. Unfortunately, workers participating in crowdsourcing tend to supply work of inconsistent or low quality. We report on our experiences using AMT to verify hundreds of thousands of local business listings for the online directory Yelp.com. Using expert-verified changes, we evaluate the accuracy of our workforce and present the results of preliminary experiments that work towards filtering low-quality workers and correcting for worker bias. Our report seeks to inform the community of practical and financial constraints that are critical to understanding the problem of quality control in crowdsourcing systems.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Learning to Rank From a Noisy Crowd&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Abhimanu Kumar, Matthew Lease (UT Austin)&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
We consider how to most effectively use crowd-based relevance assessors to produce training data for learning to rank. This integrates two lines of prior work: studies of unreliable crowd-based binary annotation for binary classification, and studies for aggregating graded relevance judgments from reliable experts for ranking. To model varying performance of the crowd, we simulate annotation noise with varying magnitude and distributional properties. Evaluation on three LETOR test collections reveals a striking trend contrary to prior studies: single labeling outperforms consensus methods in maximizing learner rate (relative to annotator effort). We also see surprising consistency of learning rate across noise distributions, as well as greater challenge with the adversarial case for multi-class labeling.&lt;br /&gt;
&lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-5585467094888505184?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=2tGfg2jdcbU:a91ns58j9nk:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=2tGfg2jdcbU:a91ns58j9nk:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=2tGfg2jdcbU:a91ns58j9nk:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/2tGfg2jdcbU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/5585467094888505184/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/06/accepted-papers-for-hcomp-2011.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5585467094888505184?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5585467094888505184?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/2tGfg2jdcbU/accepted-papers-for-hcomp-2011.html" title="Accepted papers for the 3rd Human Computation Workshop (HCOMP 2011)" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/06/accepted-papers-for-hcomp-2011.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUNRnY8fip7ImA9WhdTEEs.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-5632533162427331699</id><published>2011-06-20T11:37:00.006-04:00</published><updated>2011-07-07T13:38:17.876-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-07T13:38:17.876-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="tagasauris" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="machine learning" /><title>Crowdsourcing and the discovery of a hidden treasure</title><content type="html">&lt;div class="separator" style="clear: both; text-align: left;"&gt;A few months back, I started advising &lt;a href="http://www.tagasauris.com/"&gt;Tagasauris&lt;/a&gt;, a company that provides media annotation services, using crowdsourcing.&amp;nbsp;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;This month, Tagasauris is featured in a &lt;a href="http://www.wired.com/"&gt;Wired&lt;/a&gt; article, titled "&lt;a href="http://www.wired.com/magazine/2011/06/pl_americangraffiti/?pid=5850"&gt;Hidden Treasure&lt;/a&gt;". It is a story of rediscovering a "lost" set of photos, from the shooting of the movie "American&amp;nbsp;Graffiti". You can see the article by clicking the image:&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;a alt="Hidden Treasure

Rediscovered: Never before seen American Graffiti photos in the Magnum archive.

IN MARCH, the Magnum photo agency stumbled onto a remarkable find: Nearly two dozen lost photos from the set of American Graffiti. The images feature pre-Star Wars George Lucas as well as cast members like Richard Dreyfuss, Mackenzie Phillips, and Ron Howard, and they offer an unparalleled look at the making of the 1973 film. So where did Magnum discover these gems? In its own archive. Magnum had hired Tagasauris, a company that tags photos using Amazon Mechanical Turk workers, to add keywords to hundreds of thousands of untagged images. When those workers came across the Graffiti photos, they quickly identified the actors, scenes, and other image details. Magnum originally hoped the phototagging would improve its archive's searchability, which it has, but the agency was also thrilled that the initiative unearthed such an incredible trove - images that visually resurrect an American classic." href="http://2.bp.blogspot.com/-6HiUV_rSzig/Tfzd3Q5nRNI/AAAAAAAAs6A/BO6WhPfEn-o/s1600/wired-tagasauris-magnum-american_graffitti_july_2011.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img alt="Hidden Treasure

Rediscovered: Never before seen American Graffiti photos in the Magnum archive.

IN MARCH, the Magnum photo agency stumbled onto a remarkable find: Nearly two dozen lost photos from the set of American Graffiti. The images feature pre-Star Wars George Lucas as well as cast members like Richard Dreyfuss, Mackenzie Phillips, and Ron Howard, and they offer an unparalleled look at the making of the 1973 film. So where did Magnum discover these gems? In its own archive. Magnum had hired Tagasauris, a company that tags photos using Amazon Mechanical Turk workers, to add keywords to hundreds of thousands of untagged images. When those workers came across the Graffiti photos, they quickly identified the actors, scenes, and other image details. Magnum originally hoped the phototagging would improve its archive's searchability, which it has, but the agency was also thrilled that the initiative unearthed such an incredible trove - images that visually resurrect an American classic." border="0" height="400" src="http://2.bp.blogspot.com/-6HiUV_rSzig/Tfzd3Q5nRNI/AAAAAAAAs6A/BO6WhPfEn-o/s400/wired-tagasauris-magnum-american_graffitti_july_2011.jpg" width="298" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
Since there are some interesting aspects of the story, which go beyond the simple "tag using MTurk" story, I would like to give a few more details that I consider interesting.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Magnum Photos&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
One of the clients of Tagasauris is &lt;a href="http://www.magnumphotos.com/"&gt;Magnum Photos&lt;/a&gt;, a cooperative &lt;a href="http://www.magnumphotos.com/C.aspx?VP=XSpecific_MAG.StaticPage_VPage&amp;amp;SP=photographers_list&amp;amp;l1=0&amp;amp;XXAPXX=SubPanel10"&gt;owned by its own photographer members&lt;/a&gt;, designated to handle the commercial aspect of their own work. The list of members of Magnum Photos include photographers such as&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Robert_Capa"&gt;Robert Capa&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/Henri_Cartier-Bresson"&gt;Henri Cartier-Bresson&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/David_Seymour"&gt;David Seymour&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/George_Rodger"&gt;George Rodger&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/Steve_McCurry"&gt;Steve McCurry&lt;/a&gt;, and&lt;a href="http://www.magnumphotos.com/C.aspx?VP=XSpecific_MAG.StaticPage_VPage&amp;amp;SP=photographers_list&amp;amp;l1=0&amp;amp;XXAPXX=SubPanel10"&gt; many others&lt;/a&gt;.&amp;nbsp;(See their &lt;a href="http://en.wikipedia.org/wiki/Magnum_Photos"&gt;Wikipedia entry&lt;/a&gt;&amp;nbsp;for further details.) A few photos in the Magnum Photos archive that you may recognize:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLU3Q07&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYDYF53IF&amp;amp;PN=3&amp;amp;CT=Search"&gt;&lt;img border="0" height="200" src="http://2.bp.blogspot.com/-UKRmgmVg5CA/TfzDd99r67I/AAAAAAAAs54/m5yU81Q3fHA/s200/SteveMcCurry-AfghanGirl.jpg" width="132" /&gt;&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLU3CWQ&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2K7O3R3AX6QL&amp;amp;PN=5&amp;amp;CT=Search"&gt;&lt;img border="0" height="200" src="http://1.bp.blogspot.com/-oYowUgGAAOM/TfzDcTzmbYI/AAAAAAAAs5s/JsvOtWa7dXI/s200/ElliottErwitt-MarilynMonroe.jpg" width="130" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLOB3F9&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYD1PEVR4&amp;amp;PN=1&amp;amp;CT=Search"&gt;&lt;img border="0" height="133" src="http://1.bp.blogspot.com/-uligbQqXNJY/Tf9iwyD2xMI/AAAAAAAAs6s/Tpro3iNask0/s200/ReneBurri-CheGuevara.jpg" width="200" /&gt;&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXLU3B6X&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYDO52FJR&amp;amp;PN=5&amp;amp;CT=Search"&gt;&lt;img border="0" height="133" src="http://2.bp.blogspot.com/-fG1er5NJOp8/TfzDdWjHspI/AAAAAAAAs50/8pF7vqVK-iE/s200/RobertCapa-SpanishCivilWar.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
One of my favorite parts of the Magnum website is the &lt;a href="http://www.magnumphotos.com/C.aspx?VP=XSpecific_MAG.StaticPage_VPage&amp;amp;SP=agency_yearahead"&gt;Archival Calendar&lt;/a&gt;, where they have a set of photos showcasing various historic events. Beats Facebook browsing by a wide margin. But let's get back to the story.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The problem&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, what is the problem of Magnum Photos?&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; The same problem that almost &lt;u&gt;every single big media company faces&lt;/u&gt;:&amp;nbsp;a very large number of media objects without useful, descriptive metadata&lt;/span&gt;&lt;/b&gt;. No keywords, no description, nothing to aid the discovery process. Just the image file and mechanical data about film number etc. (Well, my own photo archive looks very similar...)&lt;br /&gt;
&lt;br /&gt;
This lack of metadata is the case not only for the archive but also for the new, incoming photos that arrive every day from its members. (To put it mildly, photographers are not exactly eager to sit, tag, and describe the hundreds of photos they shoot every day.)&amp;nbsp;This means that a large fraction of the Magnum Photos archive, which contains millions of photos,&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; is virtually unsearchable&lt;/span&gt;&lt;/b&gt;. The photos are effectively lost in the digital world, even though they are digitized and available on the Internet.&lt;br /&gt;
&lt;br /&gt;
An example of such case of "lost" photos is a set of photos from the shooting of the movie "&lt;a href="http://en.wikipedia.org/wiki/American_Graffiti"&gt;American Graffitti&lt;/a&gt;".&amp;nbsp;People at Magnum Photos &lt;i&gt;knew &lt;/i&gt;that one of their photographers, &lt;a href="http://en.wikipedia.org/wiki/Dennis_Stock"&gt;Dennis Stock&lt;/a&gt;&amp;nbsp;who died in 2009, was on set during the production of the movie, and he had taken photos of the, then young and unknown, members of the team.&amp;nbsp;Magnum Photos had no idea &lt;i&gt;where &lt;/i&gt;these photos were. &lt;b&gt;They &lt;i&gt;knew &lt;/i&gt;they digitized the archive of Dennis Stock, they &lt;i&gt;knew &lt;/i&gt;that the photos are in the archive, but nobody could locate the photos &lt;/b&gt;within the millions of other, untagged photos.&lt;br /&gt;
&lt;br /&gt;
For those unfamiliar with the movie,&amp;nbsp;American Graffiti is a 1973 film, by&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/George_Lucas"&gt;George Lucas&lt;/a&gt;&amp;nbsp;(pre-Star Wars), with starring actors the, then unknowns,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Richard_Dreyfuss"&gt;Richard Dreyfuss&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Ron_Howard"&gt;Ron Howard&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Paul_Le_Mat"&gt;Paul Le Mat&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Charles_Martin_Smith"&gt;Charles Martin Smith&lt;/a&gt;,&lt;a href="http://en.wikipedia.org/wiki/Cindy_Williams"&gt;Cindy Williams&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Candy_Clark"&gt;Candy Clark&lt;/a&gt;,&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Mackenzie_Phillips"&gt;Mackenzie Phillips&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Harrison_Ford"&gt;Harrison Ford&lt;/a&gt;.&amp;nbsp;The latter shot to stardom of all the actors makes the movie almost a cult.&lt;br /&gt;
&lt;br /&gt;
The Magnum Photos archive is a trove of similar "hidden treasures". Sitting there, waiting for some accidental, serendipitous discovery.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The tagging solution and the machine support&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Magnum Photos had its own set of annotators. However, the annotators could not even catch up even with the volume of incoming photos. The task of going back and annotating the archive was an even more daunting task. This meant lost revenue for&amp;nbsp;Magnum Photos, as if you cannot find a photo, you cannot license it, and you cannot sell it.&lt;br /&gt;
&lt;br /&gt;
Tagasauris proposed to solve the problem using crowdsourcing. With hundreds of workers working in parallel, it became possible to tame the influx of untagged incoming photos, and start going backwards and tagging the archive.&lt;br /&gt;
&lt;br /&gt;
Of course, vanilla photo tagging is not a solution. Workers type&amp;nbsp;misspelled&amp;nbsp;words (named entities are systematic offenders), try to get away with generic tags, etc. Following the lessons learned from ESP Game, and all the subsequent studies, Tagasauris built solutions for cleaning the tags, rewarding specificity, and, in general, clean up and ensure high-quality for the noisy tagging process.&lt;br /&gt;
&lt;br /&gt;
A key component was the ability to match the tags entered by the workers with named entities, which themselves were then connected to Freebase entities.&lt;br /&gt;
&lt;br /&gt;
The result? When workers were tagging the photos from Magnum Photos, they identified the actors in the shots, and the machine process in the background assigned "semantic tags" to the photos, such as [&lt;a href="http://www.freebase.com/view/en/george_lucas"&gt;George Lucas&lt;/a&gt;],&amp;nbsp;[&lt;a href="http://www.freebase.com/view/en/richard_dreyfuss"&gt;Richard Dreyfuss&lt;/a&gt;], [&lt;a href="http://www.freebase.com/view/en/ron_howard"&gt;Ron Howard&lt;/a&gt;], [&lt;a href="http://www.freebase.com/view/en/mackenzie_phillips"&gt;Mackenzie Phillips&lt;/a&gt;],&amp;nbsp;[&lt;a href="http://www.freebase.com/view/en/harrison_ford"&gt;Harrison Ford&lt;/a&gt;] and others.&lt;br /&gt;
&lt;br /&gt;
Yes, humans + machines generate things that are better than the sum of the parts.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The machine support, cont.&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
So, how the workers discovered the photos from American Graffiti?&amp;nbsp;As you may imagine, the workers had no idea that the photos that they were tagging were from the shooting of the film. They could identify the actors, but that was it.&lt;br /&gt;
&lt;br /&gt;
Going from actor tagging to understanding the context of the photo shooting, is a task that cannot be required by layman, non-expert taggers.&amp;nbsp;You need experts that can "connect the dots". Unfortunately, subject experts are expensive. And they tend not to be interested in tedious tasks, such as assigning tags to photos.&lt;br /&gt;
&lt;br /&gt;
However, this "connecting the dots" is a task where machines are better than humans. We have recently seen how Watson, by having access to semantically connected ontologies (often generated by humans), could identify the correct answers to a wide variety of questions.&lt;br /&gt;
&lt;br /&gt;
Tagasauris employed a similar strategy. Knowing the entities that appear in a set of photos, it is then possible to identify additional metadata. For example, look at the five actors that were identified in the photos (red boxes, with white background), and the associated semantic graph that links the different entities together:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-Foyfqn00l90/Tfzd2-lm-nI/AAAAAAAAs58/_7nlnEs1CnM/s1600/semantic-annotation-discovery.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="385" src="http://4.bp.blogspot.com/-Foyfqn00l90/Tfzd2-lm-nI/AAAAAAAAs58/_7nlnEs1CnM/s400/semantic-annotation-discovery.jpg" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
Bingo! The entity that connects together the different entities is the entity "&lt;a href="http://www.freebase.com/view/en/american_graffiti"&gt;American Graffiti&lt;/a&gt;", which was not used by &lt;i&gt;any &lt;/i&gt;worker.&lt;br /&gt;
&lt;br /&gt;
At this point, you can understand how the story evolved. A graph activation/spreading algorithm suggests the tag, experts can verify it, and the rest is history.&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://blog.magnumphotos.com/meagan_young.html"&gt;Meagan Young&lt;/a&gt;&amp;nbsp;looked at the stream of incoming photos, noticed the American Graffiti tag, realized that the&amp;nbsp;"lost" photos were found, and she notified the others at Magnum Photos and Todd Carter, the CEO of Tagasauris.&amp;nbsp;The "hidden treasure" was identified, and the Wired story was underway...&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Crowdsourcing: It is not just about the humans&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
This is not a story to show how cool discovery based on linked entities is. This is old news for many people that work with such data. However, this &lt;i&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;is &lt;/span&gt;&lt;/b&gt;&lt;/i&gt;a &lt;i&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;simple &lt;/span&gt;&lt;/b&gt;&lt;/i&gt;example of using crowdsourcing in a more intelligent way that it is currently being used. Machines cannot do everything (in fact, they are especially bad in tasks that are "trivial" for humans) but when humans provide enough input, the machines can take it from there, and improve significantly the overall process.&lt;br /&gt;
&lt;br /&gt;
Someone can even see the obvious next step: Use face recognition and allow tagging to be done collaboratively with humans and machines. Google and Facebook have very advanced algorithms for face recognition. Match them intelligently with humans, and you are way ahead of solutions that rely simply on humans to tag faces.&lt;br /&gt;
&lt;br /&gt;
I think the lesson is clear: &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;Let humans do what they do best, and let machines do what they do best&lt;/span&gt;&lt;/b&gt;. (And expect the balance to change as we move forward and machines can do more.) Undoing and ignoring decades of research in computer science, just because it is easier to use cheap labor, is a disservice not only to computer science. It is a disservice to the potential of crowdsourcing as well.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-5632533162427331699?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=A5VftwLf91Y:Hg8LoLU13cg:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=A5VftwLf91Y:Hg8LoLU13cg:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=A5VftwLf91Y:Hg8LoLU13cg:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/A5VftwLf91Y" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/5632533162427331699/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/06/crowdsourcing-and-discovery-of-hidden.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5632533162427331699?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5632533162427331699?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/A5VftwLf91Y/crowdsourcing-and-discovery-of-hidden.html" title="Crowdsourcing and the discovery of a hidden treasure" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-6HiUV_rSzig/Tfzd3Q5nRNI/AAAAAAAAs6A/BO6WhPfEn-o/s72-c/wired-tagasauris-magnum-american_graffitti_july_2011.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/06/crowdsourcing-and-discovery-of-hidden.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8ASXY9eyp7ImA9WhZVFk0.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3256121701426974704</id><published>2011-05-28T13:14:00.000-04:00</published><updated>2011-05-28T13:14:08.863-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-28T13:14:08.863-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="education" /><title>Crowdsourcing Education</title><content type="html">&lt;i&gt;This is a guest blog post by Lindsey Wright.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;hr /&gt;&lt;br /&gt;
&lt;b&gt;Crowdsourcing Education  &lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Technology and the popularity of online classes are radically transforming the educational process on both national and international scales. Society sees an increased failure of traditional schools in meeting goals to fully educate student populations to produce future generations of innovative, intellectual community leaders and workers. It is time to look at online learning and other alternative choices in education as possible solutions to complex obstacles and challenges of traditional learning routes.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Home Schooling Expands&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
As elementary and secondary schools continue to lag behind academically, home schooling movements continue to expand. &lt;a href="http://www.blogger.com/%E2%80%9Chttp://nces.ed.gov/fastfacts/display.asp?id=91%E2%80%9D"&gt; The National Household Education Survey Program &lt;/a&gt; found a 36 percent increase in home-schooled populations for reasons other than health from 2003 to 2007 and a 74 percent increase from 1999 to 2007. Parents cite a lack of moral education and safety fears as primary reasons to abandon the traditional school model. However, if parents enrolled their children in an &lt;a href="http://www.onlineschools.org/"&gt;online school&lt;/a&gt; these problems could be eliminated. For instance, overworked, tired parents would not have the additional responsibility of planning lessons and students would be held accountable for their learning. Students would also have fewer distractions to concentrate on their studies, thus eliminating discipline problems.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Who's Watching the Kids?&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Additionally, in 2002, only 7 percent of U.S. families survived on one spouse’s income, according to the Population Reference Bureau. As such, parents are spending a more substantial amount of time at work, leaving little time for parental school involvement. Overwhelmed teachers routinely contact parents primarily for discipline problems and not for building academic progress. As the primary years lay the foundation for future academic success, the educational and business community must look for new ways to transform education methods to ensure students will have the keys to higher learning opportunities and a lifetime of success. With the wealth of information found on the Internet, online classes could utilize educational Web sites and software that engage students in ways beyond the traditional classroom. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;High School Dropouts Increasing&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
According to &lt;a href="http://www.blogger.com/%E2%80%9Chttp://www.all4ed.org/files/GraduationRates_FactSheet.pdf%E2%80%9D"&gt; Valiance for Excellent Education &lt;/a&gt;, 1.2 million students annually fail to graduate from school. Yet due to the rapid growth of knowledge, businesses and industry are demanding innovative workers equipped with advanced skills in communication, mathematics and technology. Students who fail to gain these skills are less likely to earn decent salaries, forcing the government to spend more on poverty fighting programs such as Medicaid. Online classes would aid in dropout prevention by providing students with individualized instruction, allowing them to focus on gaining the practical, innovative working skills they need. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Teacher Attainment Issues&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
A study by the &lt;a href="http://www.blogger.com/%E2%80%9Chttp://www.nova.edu/ssss/QR/QR13-1/gonzalez.pdf%E2%80%9D"&gt; National Education Association &lt;/a&gt; found that 50 percent of new teachers quit teaching within a five year period, due to low salaries, heavy pressures of administrators and poor working conditions. Though teaching can be a fulfilling career, new teachers often lack the support of veteran co-workers and supervisors, leaving the new teacher lost in a maze of bureaucracy and hopelessness. This task can become overwhelming for teachers lacking support from parents, administration and the community at large. By streamlining the curriculum through online classes, teachers would be freed to devote more time to the art of teaching and have more time to spend learning from peers and gaining the support they need, eliminating the decline in teacher attrition rates.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The New Economy&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The global economy is radically different from what it was even fifty years ago, when America was a leading global power of industry and commerce. Today, international consumer and industry markets are the norm, as seen with technological leaders of Google and Microsoft, whose headquarters are not located in the United States, but in Ireland. American industries, forced by heavy government regulations, are relocating facilities to countries with friendlier business climates, such as India. A young person without a solid education will have severe difficulties in this new highly competitive job market. However the use of Web chats, videos and other learning projects will transition students into this global economy, and provide engaging activities to keep students interested. For example, instead of learning foreign languages through the traditional methods of worksheets and repetitive instruction, students could learn through interactive distance learning, eventually conversing with foreign students. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Popularity of Online classes&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Colleges are already embracing online classes, with the use of popular programs such as Blackboard. Flexibility, availability of classes anywhere in the country and streamlined instruction are benefits of this new technology. Instead of being lazily spoon-fed direction instruction by teachers and faculty members, students can take control of their education through individualized instruction with unlimited access to the Internet from the convenience of their own home. This also allows students to save time money, and gas while pursuing a bachelor’s or master’s degree. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Online Classes - The Solution?&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
The future of education depends on the radical transformation of attitudes and traditional methods associated with it. These are if not changed, it is likely that the education in the U.S. will be doomed to miserable failure. With online and distance learning taking a strong hold in the education community, expansion of these services could decrease dropout rates, eliminate disciplinary problems and reduce new teacher attrition and instability. Likewise, schools would no longer have funding problems, as much could be saved by the elimination of transportation and utility costs. Parents would also be more aware of their child's academic progress and would have the added peace of mind of knowing that their child was in a safe environment. School uniforms and other required classroom supplies would also be eliminated from the family budget. &lt;br /&gt;
&lt;br /&gt;
In order for the United States to again become recognized as a global leader, we must look to online classes and distance learning as viable options to solving the issues that plague our education system. The traditional learning model is no longer functional in achieving educational goals for this country. Technology is making this revolution possible, if we believe in its potential. We cannot continue to use traditions and methods of the past if we are to educate the students of tomorrow.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3256121701426974704?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Uq4Oh9kAhb0:dKRncoCO6CU:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=Uq4Oh9kAhb0:dKRncoCO6CU:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=Uq4Oh9kAhb0:dKRncoCO6CU:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/Uq4Oh9kAhb0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3256121701426974704/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-education.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3256121701426974704?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3256121701426974704?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/Uq4Oh9kAhb0/crowdsourcing-education.html" title="Crowdsourcing Education" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-education.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08CRHk4cCp7ImA9WhZWFEw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-5502252624067659260</id><published>2011-05-13T18:02:00.020-04:00</published><updated>2011-05-14T20:37:45.738-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-14T20:37:45.738-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="incentives" /><category scheme="http://www.blogger.com/atom/ns#" term="payment" /><title>Pay Enough or Don't Pay at All</title><content type="html">&lt;div style="text-align: justify;"&gt;&lt;b&gt;No good deed goes unpunished&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;A while back, we have been working with Dahn Tamir on identifying spam tasks and requesters on the Mechanical Turk platform. Dahn took the lead and build a&amp;nbsp;task on MTurk&amp;nbsp;in which Turkers could see the (other) newly posted tasks on MTurk, and flag the obvious spam ones. Since this was not a task from which he could benefit, he asked workers to rate as many tasks as possible without submitting the task as "completed", to keep the costs down. Workers were happy to collaborate, and effectively work for free, in order to clear the market. We were collecting data nicely. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;And then, I received some minimal funding for the project ($1,000 to be exact). At that point, I thought that it would be a nice gesture to actually start paying the workers. So, we created a new task, we calibrated for time to pay around 7 dollars an hour, and we posted the task. We were expecting workers to be happy. They were doing the work for free before; now they would not only help clean the market, but they would also get paid for this!&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The result? A few positive messages with a thank-you note. But also a big backlash: "You, fat cat academic, with all the grants, you want us to work for peanuts?". "Hey, big prof, would you like to be paid minimum wage for your work?". "Yeah, we should be the slaves doing all the grunge work for your research, so that you can get the fame." &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;I was shocked. What happened? I tried to remind the workers that they were doing the same task for free before, but it did not really make a difference. Actually, it made things worse.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Market norms vs. social norms&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Then, I remembered. Dan Ariely, in this book "Predictably Irrational" has warned about this. There are the &lt;b&gt;social norms&lt;/b&gt; and the &lt;b&gt;market norms&lt;/b&gt;. When no money is involved, the exchanges operate using social norms.  Once you put a price on a task, it becomes part of a market norm. It can be measured and compared. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;When the workers were not getting paid, they were working towards a noble goal: Clean the market from the spammers. By putting a price on the task of classifying spam tasks, we essentially told the workers how much we value their work: minimum wage. Instead of offering their priceless help, they were being valued as unskilled workers, like every other worker in the market. &lt;b&gt;Money and altruism do not mix.&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Somebody must have studied that before&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Needless to say, examining the influence of money on performance and motivation is not a new topic. A wonderful paper that deals is the "&lt;a href="http://www.jstor.org/stable/2586896"&gt;Pay Enough or Don't Pay at All&lt;/a&gt;" by Gneezy and Rustichini, published back in 2000, titled  (625 citations so far, according to Google Scholar). Instead of trying to describe the paper myself, I will just list here the succinct abstract:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;Economists usually assume that monetary incentives improve performance, and psychologists claim that the opposite may happen. We present and discuss a set of experiments designed to test these contrasting claims. We found that the effect of monetary compensation on performance was not monotonic. In the treatments in which money was offered, a larger amount yielded a higher performance. However, offering money did not always produce an improvement: subjects who were offered monetary incentives performed more poorly than those who were offered no compensation. Several possible interpretations of the results are discussed.&lt;/div&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;I would encourage anyone to read the paper, as it contains extensive discussion of various models and explanations. I will definitely do no justice if I claim that I covered fully the content of the paper here. However, I would like to highlight some parts below.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Gneezy and Rustichini extended research in psychology from the 1970's, which examined the difference between intrinsic and extrinsic motivation.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;Psychologists study behavior modification through conditioning (in the  case  of the behaviorist school)  or learning (for the cognitive school). We do not. To illustrate the difference, we may consider the classic experiment reported in &lt;a href="http://www.psych.wdev.rochester.edu/SDT/documents/1971_Deci.pdf"&gt;Deci [1971]&lt;/a&gt;. He  had college  students play with a puzzle in three successive sessions. In the  first session participants were left to play freely. In the second  session subjects in one group received payment if they solved the  puzzle, while the control group did not. In a  third session the  subjects were again left to play freely. The amount of time spent on free activity in the first and  third session was  taken as  a measure  of intrinsic motivation.&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt; &lt;/span&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;Deci  found that in the third  session the experimental group spent less time than the control  group playing with the puzzle, and he concluded that the reward offered had decreased the intrinsic motivation of subjects in the  first group over the three sessions.&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;That was a result from research in the 1970's. Gneezy and Rustichini wanted to also examine the effect of money in non-sequential environments. So, they conducted the following experiments:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Effect of &lt;i&gt;additional&lt;/i&gt; payment on a paid task&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;At the beginning of the experiment, &lt;b&gt;each student was promised a fixed payment of NIS  60 for participation&lt;/b&gt;. (NIS = New Israeli Shekel, at the time of the experiment, 3.5 NIS = $1.) They were then told that the experiment would take 45 minutes, and they would be asked to answer a quiz consisting of 50 problems taken out of a psychometric test used to scan applicants to the university. [...] In  the four different treatments subjects were  promised different additional payments for each correct answer.&lt;b&gt; &lt;br /&gt;
&lt;/b&gt;&lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the first  group no mention was made of any additional payment. &lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the second group subjects were promised an additional payment of 0.1 NIS per question answered correctly.&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the third group subjects were promised an additional payment of 1 NIS per question answered correctly and&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the fourth group subjects were promised an additional payment of 3 NIS per question answered correctly&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt; [...]  The average number of correct answers was: &lt;/span&gt;&lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;  28.4 in the first group (no additional payment)&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; 23.1 in the second group (additional&amp;nbsp;0.1 NIS per  correct answer).&lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; 34.7 in the third group,&amp;nbsp;(additional 1 NIS per correct answer).&amp;nbsp; &lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt; 34.1 in the fourth group,&amp;nbsp;(additional&amp;nbsp;3 NIS per correct answer).&amp;nbsp;&lt;/span&gt; &lt;/li&gt;
&lt;/ul&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;In other words, performance-based payment improved performance. But offering just a small additional financial incentive, actually decreased performance compared to the case of providing no financial incentives.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Effect of payment on &lt;i&gt;unpaid&lt;/i&gt; tasks&lt;/b&gt;  &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;/div&gt;&lt;blockquote&gt;&lt;div style="text-align: justify;"&gt;We  had  180  high-school students around  the  age  of 16 participating with three treatment levels [collecting money for a charitable purpose]....&lt;/div&gt;&lt;ul&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the first treatment, the students were told about the importance of collecting money for the society, that the results of the collection would be published, so that the amount collected by each pair would become public knowledge.&amp;nbsp;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the second treatment, after the same speech, each pair was promised 1 percent of the amount that the two of them collected.&amp;nbsp;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;In the third treatment, each pair was promised 10 percent of the amount they collected.&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;div style="text-align: justify;"&gt;In the second and third treatments it was made clear that the  payment was made from funds additional to the donation, provided by the researchers. The average amount collected was: &lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;238.67 for groups in the first treatment (with no payment). &lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;153.67 in the second group (1 percent of the collected amount). &lt;br /&gt;
&lt;/span&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;219.33 in the third group (10 percent of the collected amount). &lt;/span&gt;&lt;br /&gt;
&lt;/li&gt;
&lt;/ul&gt;&lt;/blockquote&gt;&lt;div style="text-align: justify;"&gt;In this case, where there was no initial payment and the task had an altruistic purpose, &lt;b&gt;&lt;span class="Apple-style-span" style="color: #990000;"&gt;providing financial incentives actually reduced performance.  &lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;b&gt;Additional literature&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div&gt;&lt;div style="text-align: justify;"&gt;There is significant literature for anyone interested (thanks &lt;a href="http://pages.stern.nyu.edu/~ptambe/"&gt;Sonny&lt;/a&gt;!). A few pointers to start:&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;a href="http://www.amazon.com/Gift-Relationship-Human-Social-Policy/dp/1565844033"&gt;The Gift Relationship: From Human Blood to Social Policy&lt;/a&gt;&amp;nbsp;(or why paying for blood donations decreases&amp;nbsp;willingness&amp;nbsp;to donate)&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;a href="http://www.jstor.org/stable/10.1086/468061"&gt;A Fine is a Price&lt;/a&gt;&amp;nbsp;(or how a day-care, by penalizing late-coming parents, increased significantly the number of late-coming parents)&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;a href="http://www.jstor.org/stable/2951373"&gt;The Cost of Price Incentives: An Empirical Analysis of Motivation Crowding-Out&lt;/a&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;a href="http://papers.ssrn.com/sol3/papers.cfm?abstract_id=229047"&gt;Do Incentive Contracts Crowd Out Voluntary Cooperation?&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;/div&gt;&lt;div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Conclusions&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Essentially, Gneezy and Rustichini  found that:  &lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Paying more indeed increases performance, compared to paying less.&amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;However, paying nothing may actually be better than paying!&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;div style="text-align: justify;"&gt;Section IV of the paper has a very nice discussion on how to interpret and model the process. Here are a few explanations in increasing power of explaining the observed phenomenon:  &lt;/div&gt;&lt;ul&gt;&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Paying something removes the intrinsic motivation for a task, and&amp;nbsp;&lt;u&gt;replaces&lt;/u&gt;&amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;it with the external motivation for money.&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Incomplete contract: the piece-wise or performance-based payment &lt;u&gt;changes the original meaning of the contract&lt;/u&gt;, which implied  that high-performance is part of the task.&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;Paying small amounts compared to the originally implied value of the task &lt;u&gt;devalues the task&lt;/u&gt; (e.g., take back a glass bottle to help recycling vs. for getting 5 cents back)&lt;br /&gt;
&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;hr style="background-color: #691f01; border-bottom-width: 1px; border-color: initial; border-left-width: 1px; border-right-width: 1px; border-style: initial; border-top-width: 1px; color: #691f01; display: block; height: 2px; text-align: center;" width="50%" /&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Relevance to crowdsourcing&lt;/b&gt;&amp;nbsp; &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;I found the results pretty interesting, with significant implications for micro-crowdsourcing. While volunteers may be great for various tasks (e.g., in &lt;a href="http://en.wikipedia.org/wiki/Citizen_science"&gt;citizen science&lt;/a&gt; applications, such as the &lt;a href="http://en.wikipedia.org/wiki/Galaxy_Zoo"&gt;Galaxy Zoo&lt;/a&gt;), migrating such applications to a paid crowdsourcing application may have a significant downside. Paying small rewards to workers will be counterproductive. The work of volunteers is, indeed, priceless.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Furthermore, with the low level of payments on Mechanical Turk, we are stuck at the worst possible status. &lt;b&gt;We pay, and we do not pay enough.&amp;nbsp;&lt;/b&gt;But how can we pay more, when every attempt to increase the price to reasonable levels is followed by attempts of scammers to game the system and get paid for doing nothing?&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-5502252624067659260?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=fyN4i8fXNIg:BuFC0Y-ubyw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=fyN4i8fXNIg:BuFC0Y-ubyw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=fyN4i8fXNIg:BuFC0Y-ubyw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/fyN4i8fXNIg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/5502252624067659260/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/05/pay-enough-or-dont-pay-at-all.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5502252624067659260?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/5502252624067659260?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/fyN4i8fXNIg/pay-enough-or-dont-pay-at-all.html" title="Pay Enough or Don't Pay at All" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/05/pay-enough-or-dont-pay-at-all.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ak4MRXg7fSp7ImA9WhZWEEs.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1847137548481255471</id><published>2011-05-10T19:08:00.001-04:00</published><updated>2011-05-10T19:09:44.605-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-10T19:09:44.605-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="www2011" /><category scheme="http://www.blogger.com/atom/ns#" term="slides" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="reputation" /><category scheme="http://www.blogger.com/atom/ns#" term="online labor" /><category scheme="http://www.blogger.com/atom/ns#" term="assembly line" /><title>Crowdsourcing: Lessons from Henry Ford</title><content type="html">Last month, during the WWW2011 conference, I participated in the panel "&lt;a href="http://portal.acm.org/citation.cfm?id=1963335"&gt;The computer is the new sewing machine: benefits and perils of crowdsourcing&lt;/a&gt;"&lt;br /&gt;
&lt;br /&gt;
Below you can find my slides:&lt;br /&gt;
&lt;br /&gt;
&lt;center&gt;&lt;div style="width:425px" id="__ss_7884697"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/ipeirotis/crowdsourcing-lessons-from-henry-ford" title="Crowdsourcing: Lessons from Henry Ford"&gt;Crowdsourcing: Lessons from Henry Ford&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/7884697" width="425" height="355" frameborder="0" marginwidth="0" marginheight="0" scrolling="no"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;/center&gt;&lt;br /&gt;
&lt;br /&gt;
The main points that I wanted to make:&lt;br /&gt;
&lt;div&gt;&lt;ul&gt;&lt;li&gt;It is common to consider crowdsourcing as the "assembly line for knowledge work" and think of the workers as simple cogs in a big machine.&amp;nbsp;It is almost a knee-jerk reaction to think negatively about the concept. However, it was the proper use of the assembly line (together with the proper automation) by Henry Ford that&lt;b&gt; led to the first significant improvement in the level of living for the masses&lt;/b&gt;.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Crowdsourcing suffers a lot due to significant worker turnover: Everyone who experimented with large tasks on MTurk knows that the participation distribution is very skewed. A few workers contribute the majority of the work, while a large number of workers contribute only minimally. &lt;b&gt;Dealing with these hit-and-run workers is a pain, as we cannot apply any statistically meaningful mechanism for quality control.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;We ignore the fact that workers give back what they are given. Pay peanuts, get monkeys. &lt;b&gt;Pay well, and get good workers&lt;/b&gt;. Needless to say, reputation and other quality signaling mechanisms are of fundamental importance for this task.&lt;/li&gt;
&lt;li&gt;Keeping the same workers around can give significant improvements in quality. Today on MTurk we have a tremendous turnover of workers, wasting significant effort and efficiencies. &lt;b&gt;Whomever builds a strong base of a few good workers can pay the workers much better and, at the same time, generate a better product for lower cost&lt;/b&gt; than relying on an army of inexperienced, noisy workers.&lt;/li&gt;
&lt;/ul&gt;&lt;div&gt;Yes, at the end, crowdourcing is not about the crowd. It is about the individuals in the crowd. And we can now search for these valuable individuals very effectively. Crowd&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;b&gt;sourcing&lt;/b&gt;&lt;/span&gt; is crowd&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;searching&lt;/span&gt;&lt;/b&gt;.&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1847137548481255471?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=jaSVlG-j8fI:PzSy2eBbpDw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=jaSVlG-j8fI:PzSy2eBbpDw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=jaSVlG-j8fI:PzSy2eBbpDw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/jaSVlG-j8fI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1847137548481255471/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-lessons-from-henry-ford.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1847137548481255471?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1847137548481255471?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/jaSVlG-j8fI/crowdsourcing-lessons-from-henry-ford.html" title="Crowdsourcing: Lessons from Henry Ford" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/05/crowdsourcing-lessons-from-henry-ford.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEUNRHc9fCp7ImA9WhZRGUw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-3771063621543204461</id><published>2011-04-15T20:04:00.000-04:00</published><updated>2011-04-15T20:04:55.964-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-15T20:04:55.964-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><category scheme="http://www.blogger.com/atom/ns#" term="hcomp" /><title>Deadline for HCOMP 2011 extended: Submission due on April 29th</title><content type="html">Due to a significant number of requests, and a number of conflicts with other conferences and workshops, we decided to extend the submission deadline for &lt;a href="http://www.humancomputation.com/"&gt;HCOMP 2011&lt;/a&gt;. The &lt;b&gt;new deadline is April 29th&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
If you want to know more, you can see the &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/02/3rd-human-computation-workshop-hcomp.html"&gt;call for papers and workshop announcement&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-3771063621543204461?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=kB_fYsNcSbI:HWayrRtrU4g:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=kB_fYsNcSbI:HWayrRtrU4g:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=kB_fYsNcSbI:HWayrRtrU4g:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/kB_fYsNcSbI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/3771063621543204461/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/deadline-for-hcomp-2011-extended.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3771063621543204461?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/3771063621543204461?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/kB_fYsNcSbI/deadline-for-hcomp-2011-extended.html" title="Deadline for HCOMP 2011 extended: Submission due on April 29th" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/deadline-for-hcomp-2011-extended.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkANRnY8cCp7ImA9WhZQEkg.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-2822764788019891983</id><published>2011-04-15T11:29:00.003-04:00</published><updated>2011-04-19T20:19:57.878-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-19T20:19:57.878-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><title>Video from NYC Crowdsourcing Meetup</title><content type="html">On April 13th, we hosted at NYU Stern the &lt;a href="http://behind-the-enemy-lines.blogspot.com/2011/04/nyc-crowdsourcing-meetup-april-13th.html"&gt;NYC Crowdsourcing Meetup&lt;/a&gt;.&amp;nbsp;For those who missed it, you can now download an&amp;nbsp;&lt;a href="http://echo360.stern.nyu.edu:8080/ess/echo/presentation/3cca744c-8dc7-4c0d-9564-d8868fe3f177/media.mp3"&gt;audio-only podcast version&lt;/a&gt;, see &lt;a href="http://sterntv.stern.nyu.edu:8080/ramgen/faculty/panos/398259/041311-panos-398259.rm"&gt;the online video&lt;/a&gt;,&amp;nbsp;or watch the&amp;nbsp;&lt;a href="http://echo360.stern.nyu.edu:8080/ess/echo/presentation/3cca744c-8dc7-4c0d-9564-d8868fe3f177"&gt;video from the event together with the slide presentations&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://echo360.stern.nyu.edu:8080/ess/echo/presentation/3cca744c-8dc7-4c0d-9564-d8868fe3f177"&gt;&lt;img border="0" height="313" src="http://4.bp.blogspot.com/-jFmqv6pObxM/TahjhbKIWBI/AAAAAAAAlu0/Y9GNVKlqIZM/s320/crowdsourcing-meetup-video-snapshot.PNG" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
The speakers at the event:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;John Horton&lt;/b&gt;, Staff Economist of oDesk. John talked on issues of matching employers with contractors in an online marketplace. Specifically he described mechanisms for forcing contractors to give an accurate description of their skills, avoiding issues of over-tagging a profile with irrelevant keywords or over-claiming qualifications.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Amanda Michel&lt;/b&gt;, Director of Distributed Reporting at ProPublica. Amanda talked about the crowdsourcing efforts of ProPublica, and how they use the crowd to enable better journalistic investigation of topics they are researching. At some point during the presentation, Amanda quoted from &lt;a href="http://www.propublica.org/article/our-stimulus-spot-check-summer-wave-of-projects-nears-crest-817"&gt;one of their studie&lt;/a&gt;s "&lt;i&gt;ProPublica pulled a random sample of 520 of the roughly 6,000 approved projects to examine stimulus progress around the country. That sample is large enough to estimate national patterns with a margin of error of plus or minus 4.5 percentage points&lt;/i&gt;." Honestly, a tear came down my eye when I compared that with the corresponding practices of Greek newsrooms that typically operate with samples of n=1 or n=0.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Todd Carter&lt;/b&gt;, CEO and Co-Founder of Tagasauris. Todd described Tagasauris, a system for annotating and tagging media files. Todd described the annotation effort for &lt;a href="http://www.magnumphotos.com/"&gt;Magnum Photos&lt;/a&gt;, (sample photos in their collection include the &lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXHAFXW3&amp;amp;IT=ZoomImage01_VForm&amp;amp;IID=2S5RYDYF53IF&amp;amp;PN=8&amp;amp;CT=Search"&gt;Afghan refugee girl&lt;/a&gt;, &lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXHAQ5TH&amp;amp;CT=Search&amp;amp;DT=Image"&gt;Merilyn Monroe on top of the vent&lt;/a&gt;, and many other iconic photos). A highlight was the discovery of a "lost" set of images from the shooting of the movie "American Graffiti". These images, shot by Dennis Stock, were in the Magnum archive but were not possible to find as they were lacking any tags and description. After the annotation effort from Tagasauris, the lost set of photos &lt;a href="http://www.magnumphotos.com/C.aspx?VP3=ViewBox_VPage&amp;amp;VBID=2K1HZOXHARHQF&amp;amp;CT=Search&amp;amp;DT=Image"&gt;were re-discovered&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Panos Ipeirotis&lt;/b&gt;, representing AdSafe Media. I talked about our efforts in AdSafe, on using crowdsourcing in order to create machine learning systems for classifying web pages.&lt;/li&gt;
&lt;/ul&gt;&lt;div&gt;It was a lively and&amp;nbsp;successful&amp;nbsp;event. If there is enough interest and participants, I think this is an event that can be repeated periodically.&lt;/div&gt;&lt;a href="http://www.blogger.com/"&gt;&lt;/a&gt;&lt;span id="goog_1773302289"&gt;&lt;/span&gt;&lt;span id="goog_1773302290"&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-2822764788019891983?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VedNdDFx-Mo:c-I0qJ6OG14:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=VedNdDFx-Mo:c-I0qJ6OG14:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=VedNdDFx-Mo:c-I0qJ6OG14:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/VedNdDFx-Mo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/2822764788019891983/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/video-from-nyc-crowdsourcing-meetup.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2822764788019891983?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/2822764788019891983?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/VedNdDFx-Mo/video-from-nyc-crowdsourcing-meetup.html" title="Video from NYC Crowdsourcing Meetup" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-jFmqv6pObxM/TahjhbKIWBI/AAAAAAAAlu0/Y9GNVKlqIZM/s72-c/crowdsourcing-meetup-video-snapshot.PNG" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/video-from-nyc-crowdsourcing-meetup.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkINR38-fCp7ImA9WhZRFEk.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-1156336514512612042</id><published>2011-04-10T11:16:00.000-04:00</published><updated>2011-04-10T11:16:36.154-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-10T11:16:36.154-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="meetup" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="online labor" /><title>NYC Crowdsourcing Meetup: April 13th, 6.30pm</title><content type="html">Join us for its first ever &lt;a href="http://www.meetup.com/Distributed-Work/events/17149053/"&gt;New York City Crowdsourcing meetup&lt;/a&gt; hosted by NYU and sponsored by &lt;a href="http://crowdflower.com/"&gt;CrowdFlower&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;When&lt;/b&gt;: Wednesday, April 13,&amp;nbsp;6:30-9pm&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Where&lt;/b&gt;: NYU Stern School of Business,&amp;nbsp;Room M3-110,&amp;nbsp;&lt;a href="http://goo.gl/maps/rVxm"&gt;44 West 4th Street, New York, NY 10012&lt;/a&gt;&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
Pizza, beer, and thought provoking conversation about the future of work.&amp;nbsp;Come listen, ask, and debate how crowdsourcing is changing everything from philanthropy and urban planing to creative design and enterprise solutions.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Confirmed Speakers:&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Lukas Biewald, CEO and Co-Founder of CrowdFlower&lt;/li&gt;
&lt;li&gt;Todd Carter, CEO and Co-Founder of Tagasauris&lt;/li&gt;
&lt;li&gt;John Horton, Chief Economist of oDesk&lt;/li&gt;
&lt;li&gt;Panos Ipeirotis, Associate Professor at Stern School of Business, NYU&lt;/li&gt;
&lt;li&gt;Amanda Michel, Director of Distributed Reporting at ProPublica&lt;/li&gt;
&lt;li&gt;Bartek Ringwelski, CEO and Co-Founder of SkillSlate&lt;/li&gt;
&lt;li&gt;Trebor Scholz, Associate Professor in Media &amp;amp; Culture at The New School University&lt;/li&gt;
&lt;/ul&gt;&lt;div&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-1156336514512612042?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=OGw0IhDLU7M:WmJWQttM9XE:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=OGw0IhDLU7M:WmJWQttM9XE:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=OGw0IhDLU7M:WmJWQttM9XE:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/OGw0IhDLU7M" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/1156336514512612042/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/nyc-crowdsourcing-meetup-april-13th.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1156336514512612042?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/1156336514512612042?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/OGw0IhDLU7M/nyc-crowdsourcing-meetup-april-13th.html" title="NYC Crowdsourcing Meetup: April 13th, 6.30pm" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/nyc-crowdsourcing-meetup-april-13th.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0cBQXg9cCp7ImA9WhZREEU.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-9006676185953191494</id><published>2011-04-05T16:08:00.003-04:00</published><updated>2011-04-06T07:24:10.668-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-06T07:24:10.668-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="crowdsourcing" /><category scheme="http://www.blogger.com/atom/ns#" term="surveys" /><category scheme="http://www.blogger.com/atom/ns#" term="wisdom of the crowds" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><category scheme="http://www.blogger.com/atom/ns#" term="tutorial" /><category scheme="http://www.blogger.com/atom/ns#" term="computer science" /><category scheme="http://www.blogger.com/atom/ns#" term="human computation" /><title>Tutorial on Crowdsourcing and Human Computation</title><content type="html">Last week, together with Praveen Paritosh from Google, we presented a 6-hour tutorial at the WWW 2011 conference, on crowdsourcing and human computation. The title of the tutorial was "&lt;a href="http://www.www2011india.com/tutorialstr26.html"&gt;Managing Crowdsourced Human Computation&lt;/a&gt;".&lt;br /&gt;
&lt;br /&gt;
My slides from the tutorial are &lt;a href="http://www.slideshare.net/ipeirotis/managing-crowdsourced-human-computation"&gt;available now on Slideshare&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&lt;center&gt;&lt;br /&gt;
&lt;div style="text-align: center;"&gt;&lt;div id="__ss_7526103" style="width: 425px;"&gt;&lt;iframe frameborder="0" height="355" marginheight="0" marginwidth="0" scrolling="no" src="http://www.slideshare.net/slideshow/embed_code/7526103" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;/div&gt;&lt;/center&gt;&lt;br /&gt;
&lt;br /&gt;
Once Praveen gets clearance from Google, we will post his slides as well. &lt;br /&gt;
&lt;br /&gt;
Judging from all the crap that I get to review lately, I was getting pessimistic about the quality of research on crowdsourcing. However, while preparing the tutorial, I realized the massive amount of high-quality research that is being published. We had 6 hours for the tutorial, and we did not have enough time to cover many really interesting papers. I had to refer people to other, more "specialized" tutorials (e.g., on linguistic annotation, on search relevance, etc), which I mention at the end of the slides.&lt;br /&gt;
&lt;br /&gt;
Special thanks go to my PhD student, &lt;a href="http://pages.stern.nyu.edu/~jwang5/"&gt;Jing Wang&lt;/a&gt;, for her slides on market design, &lt;a href="http://www.ischool.utexas.edu/~ml/"&gt;Matt Lease&lt;/a&gt; for his excellent &lt;a href="http://ir.ischool.utexas.edu/crowd/"&gt;list of pointers for crowdsourcing resources&lt;/a&gt;, &lt;a href="http://wwwcsif.cs.ucdavis.edu/~alonsoom/"&gt;Omar Alonso&lt;/a&gt; for his tutorial slides on crowdsourcing for search relevance, &lt;a href="http://alexquinn.org/"&gt;Alex Quinn&lt;/a&gt; and &lt;a href="http://www.cs.umd.edu/~bederson/"&gt;Ben Bederson&lt;/a&gt; for their survey on human computation, and Winter Mason for sharing his slides from his CSDM keynote. And all the other researchers for making crowdsourcing and human computation an exciting field for research!&lt;br /&gt;
&lt;br /&gt;
Last but not least: Luis von Ahn with Edith Law will be presenting another tutorial on human computation during AAAI, in San Francisco on August 8th. We will be organizing the &lt;a href="http://humancomputation.com/"&gt;HCOMP 2011 workshop&lt;/a&gt; in conjunction with AAAI as well! The submission deadline is April 22nd! Do not forget to submit!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-9006676185953191494?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=7g3ltXQm4YI:Zt38x7lnCAw:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=7g3ltXQm4YI:Zt38x7lnCAw:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=7g3ltXQm4YI:Zt38x7lnCAw:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/7g3ltXQm4YI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/9006676185953191494/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/tutorial-on-crowdsourcing-and-human.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/9006676185953191494?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/9006676185953191494?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/7g3ltXQm4YI/tutorial-on-crowdsourcing-and-human.html" title="Tutorial on Crowdsourcing and Human Computation" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/tutorial-on-crowdsourcing-and-human.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEQASXo4fSp7ImA9WhRRFUw.&quot;"><id>tag:blogger.com,1999:blog-7118563403027467631.post-6882121483395717753</id><published>2011-04-05T08:41:00.011-04:00</published><updated>2011-11-28T16:39:08.435-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-28T16:39:08.435-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mechanical turk" /><category scheme="http://www.blogger.com/atom/ns#" term="research" /><title>An ingenious application of crowdsourcing: Fix reviews' grammar, improve sales</title><content type="html">&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;
I have been doing &lt;a href="http://pages.stern.nyu.edu/~panos/publications/kdd2007.pdf"&gt;research&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/icis2010.pdf"&gt;on&lt;/a&gt; the &lt;a href="http://pages.stern.nyu.edu/~panos/publications/jrpm2009.pdf"&gt;economic&lt;/a&gt; &lt;a href="http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1024903"&gt;impact&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/tkde2010-usefulness.pdf"&gt;of&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/icec2007.pdf"&gt;product&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/acl2007.pdf"&gt;reviews&lt;/a&gt; &lt;a href="http://pages.stern.nyu.edu/~panos/publications/www2011.pdf"&gt;for a&lt;/a&gt; &lt;a href="http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&amp;amp;Sect2=HITOFF&amp;amp;p=1&amp;amp;u=/netahtml/PTO/search-bool.html&amp;amp;r=1&amp;amp;f=G&amp;amp;l=50&amp;amp;co1=AND&amp;amp;d=PTXT&amp;amp;s1=7848979.PN.&amp;amp;OS=PN/7848979&amp;amp;RS=PN/7848979"&gt;while&lt;/a&gt;. One thing that we have noticed is that the quality of the reviews can have an impact on product sales,&lt;i&gt; independently of the polarity of the review&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;High-quality reviews improve product sales&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
A well-written review tends to inspire confidence about the product, even if the review is negative. Typically, such reviews are perceived as objective and thorough. If we have a high-quality &lt;i&gt;negative &lt;/i&gt;review this may serve as a guarantee that the negative aspects of the product are not that bad after all. For example, a negative review, such as "&lt;i&gt;horrible battery life... in my tests battery lasts barely longer than 24 hours,&lt;/i&gt;" may be perceived as positive  by other customers that consider a 24-hour battery life to be more than sufficient.&lt;br /&gt;
&lt;br /&gt;
In our recent (&lt;a href="http://www.www2011india.com/"&gt;award-winning&lt;/a&gt;) &lt;a href="http://pages.stern.nyu.edu/~panos/publications/www2011.pdf"&gt;WWW2011 paper "Towards a Theory Model for Product Search"&lt;/a&gt;&amp;nbsp;(with &lt;a href="http://pages.stern.nyu.edu/~bli/"&gt;Beibei Li&lt;/a&gt; and &lt;a href="http://people.stern.nyu.edu/aghose/"&gt;Anindya Ghose&lt;/a&gt;), we noticed that demand for a hotel increases if the reviews on TripAdvisor and Travelocity are well-written, without spelling errors; this holds no matter if the review is positive or negative. In our &lt;a href="http://pages.stern.nyu.edu/~panos/publications/tkde2010-usefulness.pdf"&gt;TKDE paper "Estimating the Helpfulness and Economic Impact of Product Reviews: Mining Text and Reviewer Characteristics"&lt;/a&gt;&amp;nbsp;&amp;nbsp;(with&amp;nbsp;&lt;a href="http://people.stern.nyu.edu/aghose/"&gt;Anindya Ghose&lt;/a&gt;), we observed similar trends for products sold and reviewed on Amazon.com.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;And what can we do knowing this?&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Being in a business school, these findings were considered informative but not deeply interesting. Do not forget, the focus of researchers in business schools is &lt;a href="http://behind-the-enemy-lines.blogspot.com/2009/09/different-attitudes-of-computer.html"&gt;centered on causality and on policy-making&lt;/a&gt;. Yes, we now know that it is important for the reviews to be well-written and informative, if we want the product to sell well. But if we cannot do anything about this, it is not deeply interesting. It is almost like knowing that during the cold months the demand for summer resorts drops!&lt;br /&gt;
&lt;br /&gt;
But here comes the twist...&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;i&gt;The crowdsourcing solution&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
Last week, over drinks during the WWW conference, I learned about a fascinating application of crowdsourcing that attacked exactly this issue.&lt;br /&gt;
&lt;br /&gt;
An online retailer noticed that, indeed, products with high-quality reviews are selling well. So, they decided to take action. The retailer used Amazon Mechanical Turk to improve the quality of the reviews posted on its own website. Using the &lt;a href="http://groups.csail.mit.edu/uid/other-pubs/soylent.pdf"&gt;Find-Fix-Verify pattern&lt;/a&gt;, the retailed used Mechanical Turk to examine &lt;b&gt;&lt;i&gt;millions &lt;/i&gt;&lt;/b&gt;of product reviews. (Here are the archived versions of the HITs: &lt;a href="http://mturk-tracker.com/hit/79f44798e8c296e29bddbd3a3aa8f60a/"&gt;Find&lt;/a&gt;, &lt;a href="http://mturk-tracker.com/hit/160L5FUZB7DDN03NIAB2D4J8HRZ022/"&gt;Fix&lt;/a&gt;, &lt;a href="http://mturk-tracker.com/hit/1QPDXYWLI7K28IZ6MAP7WEO0N6V03B/"&gt;Verify&lt;/a&gt;. And if you have not figured out the firm name by now, the retailer is Zappos.) For the reviews with mistakes, &lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;they fixed the spelling and grammar errors! Thus they effectively improved the quality of the reviews on their website. And, correspondingly, they improved the demand for their products.&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;br /&gt;
&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;i&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;&lt;span class="Apple-style-span" style="color: black; font-style: normal; font-weight: normal;"&gt;For the curious readers, Zappos has been doing this&amp;nbsp;&lt;a href="http://mturk-tracker.com/hit/e39ec137599fd6adbdf44a18ab53054a/"&gt;at least since April of 2009&lt;/a&gt;, which means that they were doing it even before being bought by Amazon.&lt;/span&gt;&lt;/span&gt;&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
While I do not know the exact revenue improvement, I was told that it was substantial. Given that Zappos spent at least 10 cents per review, and that they examined approximately 5 million reviews, this is an expense of a few hundred thousand dollars. (My archive on MTurk-Tracker kind of confirms these numbers.) So, the expected revenue improvement should have been at least a few million dollars for this exercise to make sense.&lt;br /&gt;
&lt;br /&gt;
Ethical? Notice that they are not fixing the polarity or the content of the reviews. They just change the language to be correct and error-free. I can see the counter-argument that the writing style allows us to judge if the review is serious or not. So, artificially improving the writing style may be considered as interference with the perceived objectivity of the user-generated reviews. I still consider it fine to change the grammar, from the ethics point of view.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;span class="Apple-style-span" style="color: #660000;"&gt;But is it ingenious? A resounding yes!&lt;/span&gt;&lt;/b&gt; It is one of these solutions that is sitting in front of you but you just cannot see it. And this is what makes it ingenious.&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7118563403027467631-6882121483395717753?l=www.behind-the-enemy-lines.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:BZkkm1Y4jn8"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=suZr4PVOpB0:deP2qE04G8E:BZkkm1Y4jn8" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?i=suZr4PVOpB0:deP2qE04G8E:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?a=suZr4PVOpB0:deP2qE04G8E:I9og5sOYxJI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/AComputerScientistInABusinessSchool?d=I9og5sOYxJI" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/AComputerScientistInABusinessSchool/~4/suZr4PVOpB0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://www.behind-the-enemy-lines.com/feeds/6882121483395717753/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.behind-the-enemy-lines.com/2011/04/want-to-improve-sales-fix-grammar-and.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6882121483395717753?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/7118563403027467631/posts/default/6882121483395717753?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/AComputerScientistInABusinessSchool/~3/suZr4PVOpB0/want-to-improve-sales-fix-grammar-and.html" title="An ingenious application of crowdsourcing: Fix reviews' grammar, improve sales" /><author><name>Panos Ipeirotis</name><uri>https://profiles.google.com/103666871486129948108</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//lh4.googleusercontent.com/-dIWj8iHQSKU/AAAAAAAAAAI/AAAAAAAAuds/a0nL5vYf2FI/s512-c/photo.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://www.behind-the-enemy-lines.com/2011/04/want-to-improve-sales-fix-grammar-and.html</feedburner:origLink></entry></feed>

