<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;AkUGQ3gzcSp7ImA9WhBaEUQ.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059</id><updated>2013-05-21T21:50:22.689-07:00</updated><category term="k-nn" /><category term="extract transform load" /><category term="data mining" /><category term="concept mining" /><category term="robots.txt" /><category term="knn" /><category term="importxml" /><category term="etl" /><category term="ajax web scraping scraper" /><category term="tutorial" /><category term="web crawling" /><category term="r" /><category term="text mining" /><category term="text analysis" /><category term="web scraping" /><category term="business intelligence" /><category term="web scraping rapidminer xpath web scrape rapid miner x-path" /><category term="google spreadsheets" /><category term="crawling rules" /><category term="rapid miner" /><category term="extjs ext js tutorial learn help" /><category term="google docs spreadsheets" /><category term="web crawl" /><category term="x-path" /><category term="xpath" /><category term="rapidminer" /><category term="how to scrape ajax web pages" /><category term="rapidminer data mining etl" /><category term="naive bayes" /><title>Vancouver Data Blog by Neil McGuigan</title><subtitle type="html">Some RapidMiner, some JMP, some Google Docs</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://vancouverdata.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>61</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/VancouverData" /><feedburner:info uri="vancouverdata" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><feedburner:emailServiceId>VancouverData</feedburner:emailServiceId><feedburner:feedburnerHostname>http://feedburner.google.com</feedburner:feedburnerHostname><entry gd:etag="W/&quot;CUUDSHgyfyp7ImA9WhBbF04.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-2130792509713501426</id><published>2013-05-16T12:41:00.001-07:00</published><updated>2013-05-16T12:41:19.697-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-16T12:41:19.697-07:00</app:edited><title>AWS Redshift: How Amazon Changed The Game</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/2130792509713501426/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2013/05/aws-redshift-how-amazon-changed-game.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/2130792509713501426?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/2130792509713501426?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/mJPamJeE7mw/aws-redshift-how-amazon-changed-game.html" title="AWS Redshift: How Amazon Changed The Game" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">A good blog post on Amazon RedShift - their Postgres-based massive data warehouse. Some good analysis on performance and costs:  

http://blog.aggregateknowledge.com/2013/05/16/aws-redshift-how-amazon-changed-the-game/&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=mJPamJeE7mw:GNeybgxxsq8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=mJPamJeE7mw:GNeybgxxsq8:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=mJPamJeE7mw:GNeybgxxsq8:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=mJPamJeE7mw:GNeybgxxsq8:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=mJPamJeE7mw:GNeybgxxsq8:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/mJPamJeE7mw" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2013/05/aws-redshift-how-amazon-changed-game.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck8GSXYzfip7ImA9WhBVE0g.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-7440313607929380140</id><published>2013-04-18T22:53:00.001-07:00</published><updated>2013-04-18T22:53:48.886-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-18T22:53:48.886-07:00</app:edited><title>Vancouver Training: Introduction to Data Mining and Predictive Analytics with RapidMiner - Save $500</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/7440313607929380140/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2013/04/vancouver-training-introduction-to-data.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/7440313607929380140?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/7440313607929380140?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/MJl_My9dmE0/vancouver-training-introduction-to-data.html" title="Vancouver Training: Introduction to Data Mining and Predictive Analytics with RapidMiner - Save $500" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">I'll be teaching a RapidMiner course here in Vancouver next week:Tuesday, April 23, 2013 at 8:30 AM - Wednesday, April 24, 2013 at 5:00 PM (PDT)Details here:http://rapid-i_us_20130423-eorg.eventbrite.com/Save $500 with the coupon VAN_BLOG !&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=MJl_My9dmE0:D_K6lpVx7DI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=MJl_My9dmE0:D_K6lpVx7DI:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=MJl_My9dmE0:D_K6lpVx7DI:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=MJl_My9dmE0:D_K6lpVx7DI:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=MJl_My9dmE0:D_K6lpVx7DI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/MJl_My9dmE0" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2013/04/vancouver-training-introduction-to-data.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkICQXcycCp7ImA9WhBTF08.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-8783676131394604283</id><published>2013-02-12T17:56:00.000-08:00</published><updated>2013-02-12T17:56:00.998-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-02-12T17:56:00.998-08:00</app:edited><title>Google's Data Mining Research Papers</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/8783676131394604283/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2013/02/googles-data-mining-research-papers.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/8783676131394604283?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/8783676131394604283?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/kqkNQaBcDSU/googles-data-mining-research-papers.html" title="Google's Data Mining Research Papers" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><content type="html">In case you missed it, here are Google's 104 data mining research papers:

http://research.google.com/pubs/DataMining.html

&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqkNQaBcDSU:VrUc2pWBf64:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqkNQaBcDSU:VrUc2pWBf64:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqkNQaBcDSU:VrUc2pWBf64:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=kqkNQaBcDSU:VrUc2pWBf64:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqkNQaBcDSU:VrUc2pWBf64:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/kqkNQaBcDSU" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2013/02/googles-data-mining-research-papers.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUQBQHc4eip7ImA9WhNVEEs.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-5468419230103865951</id><published>2012-12-20T20:15:00.000-08:00</published><updated>2012-12-20T20:15:51.932-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-12-20T20:15:51.932-08:00</app:edited><title>The Google F1 slides</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/5468419230103865951/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/12/the-google-f1-slides.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/5468419230103865951?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/5468419230103865951?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/oiapA_HD7kY/the-google-f1-slides.html" title="The Google F1 slides" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">Google F1 is a relational database query engine that works on top of Google Spanner, which is a distributed storage system that sits on top of Google File System. Got it? :)

Basically, it's a really big, distributed relational database, and Google is using F1 to replace MySQL for Adwords.

http://www.stanford.edu/class/cs347/slides/f1.pdf

&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=oiapA_HD7kY:RTq4OpxMQuw:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=oiapA_HD7kY:RTq4OpxMQuw:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=oiapA_HD7kY:RTq4OpxMQuw:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=oiapA_HD7kY:RTq4OpxMQuw:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=oiapA_HD7kY:RTq4OpxMQuw:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/oiapA_HD7kY" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/12/the-google-f1-slides.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D08ER3w8fip7ImA9WhNSGE4.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-4801634930985173130</id><published>2012-11-01T22:56:00.001-07:00</published><updated>2012-11-01T22:56:46.276-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-11-01T22:56:46.276-07:00</app:edited><title>Chomsky on Where AI Went Wrong</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/4801634930985173130/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/11/chomsky-on-where-ai-went-wrong.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4801634930985173130?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4801634930985173130?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/tQZgpxTAcF8/chomsky-on-where-ai-went-wrong.html" title="Chomsky on Where AI Went Wrong" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><content type="html">If one were to rank a list of civilization's greatest and most elusive intellectual challenges, the problem of "decoding" ourselves -- understanding the inner workings of our minds and our brains, and how the architecture of these elements is encoded in our genome -- would surely be at the top. Yet the diverse fields that took on this challenge, from philosophy and psychology to computer science &lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=tQZgpxTAcF8:GhdLaIgTFCs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=tQZgpxTAcF8:GhdLaIgTFCs:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=tQZgpxTAcF8:GhdLaIgTFCs:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=tQZgpxTAcF8:GhdLaIgTFCs:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=tQZgpxTAcF8:GhdLaIgTFCs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/tQZgpxTAcF8" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/11/chomsky-on-where-ai-went-wrong.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0EMSHY8eip7ImA9WhNSGE4.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-6417931691516940891</id><published>2012-11-01T22:54:00.002-07:00</published><updated>2012-11-01T22:54:49.872-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-11-01T22:54:49.872-07:00</app:edited><title>The father of fractals</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/6417931691516940891/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/11/the-father-of-fractals.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6417931691516940891?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6417931691516940891?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/RtQgDfoKuw0/the-father-of-fractals.html" title="The father of fractals" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">A nice little piece on Mandlebrot in the Economist:

http://www.economist.com/node/2246127&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=RtQgDfoKuw0:MQLBslx_CU0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=RtQgDfoKuw0:MQLBslx_CU0:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=RtQgDfoKuw0:MQLBslx_CU0:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=RtQgDfoKuw0:MQLBslx_CU0:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=RtQgDfoKuw0:MQLBslx_CU0:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/RtQgDfoKuw0" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/11/the-father-of-fractals.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8FQ3ozcSp7ImA9WhJbFkQ.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-7588855649941313006</id><published>2012-09-26T13:53:00.002-07:00</published><updated>2012-09-26T13:53:32.489-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-09-26T13:53:32.489-07:00</app:edited><title>As I predicted, Self-driving cars a reality for 'ordinary people' within 5 years, says Google's Sergey Brin</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/7588855649941313006/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/09/self-driving-cars-reality-for-ordinary.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/7588855649941313006?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/7588855649941313006?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/5Jcj9lJMbYg/self-driving-cars-reality-for-ordinary.html" title="As I predicted, Self-driving cars a reality for 'ordinary people' within 5 years, says Google's Sergey Brin" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><content type="html">Link here:

http://www.computerworld.com/s/article/9231707/Self_driving_cars_a_reality_for_39_ordinary_people_39_within_5_years_says_Google_39_s_Sergey_Brin&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=5Jcj9lJMbYg:UeCc-4HLQCs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=5Jcj9lJMbYg:UeCc-4HLQCs:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=5Jcj9lJMbYg:UeCc-4HLQCs:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=5Jcj9lJMbYg:UeCc-4HLQCs:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=5Jcj9lJMbYg:UeCc-4HLQCs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/5Jcj9lJMbYg" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/09/self-driving-cars-reality-for-ordinary.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkIFR305eSp7ImA9WhJbFkQ.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-4755800189553045656</id><published>2012-09-26T13:13:00.001-07:00</published><updated>2012-09-26T13:15:16.321-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-09-26T13:15:16.321-07:00</app:edited><title>The Google Spanner Paper</title><link rel="related" href="http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//archive/spanner-osdi2012.pdf" title="The Google Spanner Paper" /><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/4755800189553045656/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/09/the-google-spanner-paper.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4755800189553045656?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4755800189553045656?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/iobaRSsFGpo/the-google-spanner-paper.html" title="The Google Spanner Paper" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">Google spanner is a massively distributed database. It needs atomic clocks on each machine to work though...

http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//archive/spanner-osdi2012.pdf&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=iobaRSsFGpo:MzU7qwA6pvY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=iobaRSsFGpo:MzU7qwA6pvY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=iobaRSsFGpo:MzU7qwA6pvY:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=iobaRSsFGpo:MzU7qwA6pvY:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=iobaRSsFGpo:MzU7qwA6pvY:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/iobaRSsFGpo" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/09/the-google-spanner-paper.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUUDQHk-eip7ImA9WhNSE0s.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-4425466551555268857</id><published>2012-09-07T20:30:00.002-07:00</published><updated>2012-10-27T12:47:51.752-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-10-27T12:47:51.752-07:00</app:edited><title>The Google Dremel Paper</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/4425466551555268857/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/09/the-google-dremel-paper.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4425466551555268857?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4425466551555268857?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/k-D1bGtRf-s/the-google-dremel-paper.html" title="The Google Dremel Paper" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>3</thr:total><content type="html">Here is the paper describing Google Dremel, which may replace Hive one day. There does not seem to be anyone working on an open-source version though

Link (PDF)

Update: Apache Drill is the open source version of Dremel (hat tip to Zoltan).

Also, Cloudera's Impala looks simlar.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=k-D1bGtRf-s:IL67skcEmL4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=k-D1bGtRf-s:IL67skcEmL4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=k-D1bGtRf-s:IL67skcEmL4:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=k-D1bGtRf-s:IL67skcEmL4:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=k-D1bGtRf-s:IL67skcEmL4:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/k-D1bGtRf-s" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/09/the-google-dremel-paper.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkIMRnw8fCp7ImA9WhJUEEo.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-5744926602260808931</id><published>2012-09-07T20:22:00.001-07:00</published><updated>2012-09-07T20:23:07.274-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-09-07T20:23:07.274-07:00</app:edited><title>Self-driving cars:  The next revolution</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/5744926602260808931/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/09/self-driving-cars-next-revolution.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/5744926602260808931?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/5744926602260808931?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/Ib1m-e2XLc8/self-driving-cars-next-revolution.html" title="Self-driving cars:  The next revolution" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">Here is a recent report from KPMG about self-driving cars:

http://www.kpmg.com/US/en/IssuesAndInsights/ArticlesPublications/Documents/self-driving-cars-next-revolution.pdf&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=Ib1m-e2XLc8:rb0hPpmQeTg:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=Ib1m-e2XLc8:rb0hPpmQeTg:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=Ib1m-e2XLc8:rb0hPpmQeTg:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=Ib1m-e2XLc8:rb0hPpmQeTg:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=Ib1m-e2XLc8:rb0hPpmQeTg:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/Ib1m-e2XLc8" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/09/self-driving-cars-next-revolution.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0QNRXw7eCp7ImA9WhNSEUk.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-994590053096386889</id><published>2012-08-07T17:36:00.000-07:00</published><updated>2012-10-24T23:09:54.200-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-10-24T23:09:54.200-07:00</app:edited><title>Google’s Self-Driving Cars Are Going to Change Everything</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/994590053096386889/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/08/googles-self-driving-cars-are-going-to.html#comment-form" title="54 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/994590053096386889?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/994590053096386889?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/aw9SfCesVjg/googles-self-driving-cars-are-going-to.html" title="Google’s Self-Driving Cars Are Going to Change Everything" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>54</thr:total><content type="html">Recent News:

Google’s Self-Driving Cars Complete 300K Miles Without Accident, Deemed Ready For Commuting
http://techcrunch.com/2012/08/07/google-cars-300000-miles-without-accident/



Here's what is going to happen in the next 5-10 years. It won't all happen right away.



The car insurance industry will cease to exist. These cars aren't going to crash. Even if there are hold-outs that drive &lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aw9SfCesVjg:TIyzZi8Hu3U:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aw9SfCesVjg:TIyzZi8Hu3U:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aw9SfCesVjg:TIyzZi8Hu3U:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=aw9SfCesVjg:TIyzZi8Hu3U:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aw9SfCesVjg:TIyzZi8Hu3U:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/aw9SfCesVjg" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/08/googles-self-driving-cars-are-going-to.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUEMRX0-fyp7ImA9WhRaEU0.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-6460311686727702164</id><published>2012-02-11T20:36:00.000-08:00</published><updated>2012-02-12T20:34:44.357-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-12T20:34:44.357-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="ajax web scraping scraper" /><title>Less Painful AJAX / Javascript Web Scraping</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/6460311686727702164/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/02/less-painful-ajax-javascript-web.html#comment-form" title="7 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6460311686727702164?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6460311686727702164?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/80uYthcCyyM/less-painful-ajax-javascript-web.html" title="Less Painful AJAX / Javascript Web Scraping" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://img.youtube.com/vi/wB9-rRmjT2E/default.jpg" height="72" width="72" /><thr:total>7</thr:total><content type="html">If you read my previous post, you'll see that scraping ajax pages can be a pain. So I wrote a little Java program to make it easier. It takes a list of URLs to scrape, and will render them in a browser, and save the (normal and ajax) rendered HTML and screenshots to a folder. 

Here's the how-to video:



You need Firefox 3+ installed, as well as Java 1.6. This is a beta project, and no warranty &lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=80uYthcCyyM:i_dAyYxzP80:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=80uYthcCyyM:i_dAyYxzP80:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=80uYthcCyyM:i_dAyYxzP80:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=80uYthcCyyM:i_dAyYxzP80:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=80uYthcCyyM:i_dAyYxzP80:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/80uYthcCyyM" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/02/less-painful-ajax-javascript-web.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQAQnY9fSp7ImA9WhVaFEo.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-3679647273351338746</id><published>2012-02-09T16:01:00.000-08:00</published><updated>2012-06-11T21:12:23.865-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-06-11T21:12:23.865-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="how to scrape ajax web pages" /><title>Web Scraping AJAX Pages</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/3679647273351338746/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/02/web-scraping-ajax-pages.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/3679647273351338746?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/3679647273351338746?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/I3gpmcckeEg/web-scraping-ajax-pages.html" title="Web Scraping AJAX Pages" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><content type="html">This is part four of a series of video tutorials on web scraping and web crawling.

You can probably skip this one, and go to the easy version.


Part 1: Web scraping with Google Spreadsheets and XPath

Part 2: Web Crawling with RapidMiner

Part 3: Web Scraping with RapidMiner and Xpath

This post explains how to capture HTML from Ajax / Javascript generated pages.

Here is the accompanying video&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=I3gpmcckeEg:qvxVZbTYzkM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=I3gpmcckeEg:qvxVZbTYzkM:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=I3gpmcckeEg:qvxVZbTYzkM:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=I3gpmcckeEg:qvxVZbTYzkM:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=I3gpmcckeEg:qvxVZbTYzkM:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/I3gpmcckeEg" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/02/web-scraping-ajax-pages.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkAAQnk6fCp7ImA9WhRUGEU.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-8179698928628077273</id><published>2012-01-29T17:59:00.000-08:00</published><updated>2012-01-29T17:59:03.714-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-29T17:59:03.714-08:00</app:edited><title>On Making Videos</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/8179698928628077273/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2012/01/on-making-videos.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/8179698928628077273?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/8179698928628077273?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/wxX3buLBOrs/on-making-videos.html" title="On Making Videos" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">Here is what i use to make my videos:


1. CamStudio. This is a nice free and open-source desktop video capture program. Make sure to use their Lossless Codec, and go with these settings:

Set Keyframes Every 30 frames
Capture Frames Every = 50 milliseconds
Playback Rate = 20 frames per second
Video codec: CamStudio Lossless Codec 
Quality: 70%


2. Handbrake Video Transcoder. This will help you &lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=wxX3buLBOrs:3MemtnIR_iA:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=wxX3buLBOrs:3MemtnIR_iA:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=wxX3buLBOrs:3MemtnIR_iA:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=wxX3buLBOrs:3MemtnIR_iA:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=wxX3buLBOrs:3MemtnIR_iA:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/wxX3buLBOrs" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2012/01/on-making-videos.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0YGSHw7eyp7ImA9WhRWE0U.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-3878839214082603433</id><published>2011-12-31T19:38:00.003-08:00</published><updated>2011-12-31T19:38:49.203-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-31T19:38:49.203-08:00</app:edited><title>Happy New Year</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/3878839214082603433/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/12/happy-new-year.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/3878839214082603433?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/3878839214082603433?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/o6utg6or2X0/happy-new-year.html" title="Happy New Year" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">75,000 pageviews this year! Thanks to everyone for visiting. I will post some new material in the new year.

Have a safe and fun 2012

Neil&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o6utg6or2X0:5pzvPX7VarM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o6utg6or2X0:5pzvPX7VarM:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o6utg6or2X0:5pzvPX7VarM:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=o6utg6or2X0:5pzvPX7VarM:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o6utg6or2X0:5pzvPX7VarM:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/o6utg6or2X0" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/12/happy-new-year.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEcHQXg-eSp7ImA9WhJbEE4.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-5679397571750587883</id><published>2011-11-04T14:39:00.001-07:00</published><updated>2012-09-18T23:27:10.651-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-09-18T23:27:10.651-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="extjs ext js tutorial learn help" /><title>My new blog about learning ExtJS</title><link rel="related" href="http://extjs-tutorials.blogspot.com/" title="My new blog about learning ExtJS" /><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/5679397571750587883/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/11/extjs-ext-js-learn-tutorial-help.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/5679397571750587883?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/5679397571750587883?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/o0hLNys3zPU/extjs-ext-js-learn-tutorial-help.html" title="My new blog about learning ExtJS" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">I have a new blog. It's about learning to use ExtJS, a great rich internet application library in javascript. Here it is:

http://extjs-tutorials.blogspot.com/

Check it out. Thanks
Don't worry, I'll keep posting here too&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o0hLNys3zPU:QJa7uBoBwEU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o0hLNys3zPU:QJa7uBoBwEU:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o0hLNys3zPU:QJa7uBoBwEU:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=o0hLNys3zPU:QJa7uBoBwEU:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=o0hLNys3zPU:QJa7uBoBwEU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/o0hLNys3zPU" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/11/extjs-ext-js-learn-tutorial-help.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQNQHk_cCp7ImA9WhdbEk8.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-6633676995435803511</id><published>2011-10-09T22:26:00.000-07:00</published><updated>2011-10-09T22:26:31.748-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-09T22:26:31.748-07:00</app:edited><title>How Obama's data-crunching prowess may get him re-elected</title><link rel="related" href="http://www.cnn.com/2011/10/09/tech/innovation/obama-data-crunching-election/index.html?hpt=hp_c1" title="How Obama's data-crunching prowess may get him re-elected" /><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/6633676995435803511/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/10/how-obamas-data-crunching-prowess-may.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6633676995435803511?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6633676995435803511?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/kqE_a71PiTI/how-obamas-data-crunching-prowess-may.html" title="How Obama's data-crunching prowess may get him re-elected" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><content type="html">An article on CNN about how the Obama 2012 campaign has hired many data miners and statisticians to help boost fundraising and support.

http://www.cnn.com/2011/10/09/tech/innovation/obama-data-crunching-election/index.html?hpt=hp_c1&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqE_a71PiTI:X7AUsDUdHzs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqE_a71PiTI:X7AUsDUdHzs:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqE_a71PiTI:X7AUsDUdHzs:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=kqE_a71PiTI:X7AUsDUdHzs:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=kqE_a71PiTI:X7AUsDUdHzs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/kqE_a71PiTI" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/10/how-obamas-data-crunching-prowess-may.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE4FSXg8cSp7ImA9WhNREk4.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-1939800431561533890</id><published>2011-10-08T15:31:00.001-07:00</published><updated>2012-11-06T12:15:18.679-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-11-06T12:15:18.679-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="text mining" /><category scheme="http://www.blogger.com/atom/ns#" term="text analysis" /><category scheme="http://www.blogger.com/atom/ns#" term="data mining" /><category scheme="http://www.blogger.com/atom/ns#" term="rapidminer" /><category scheme="http://www.blogger.com/atom/ns#" term="r" /><title>Text Analytics with RapidMiner Part 6 of 6 - Applying the Model to New Documents</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/1939800431561533890/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/10/rapidminer-text-mining-r-analytics.html#comment-form" title="16 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/1939800431561533890?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/1939800431561533890?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/OuOOsbQBKFI/rapidminer-text-mining-r-analytics.html" title="Text Analytics with RapidMiner Part 6 of 6 - Applying the Model to New Documents" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://img.youtube.com/vi/9I0BcMuhPe8/default.jpg" height="72" width="72" /><thr:total>16</thr:total><content type="html">After my last series, I got a lot of questions about how to apply a model to new data, so here is the real final installment in the series.

I show how to save a wordlist and model to the repository. I use them later to read the wordlist and model and apply them to new documents that RapidMiner hasn't seen before. It correctly labels 11 of the 12 documents.



Files from the video.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=OuOOsbQBKFI:QvK7TQFRJC0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=OuOOsbQBKFI:QvK7TQFRJC0:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=OuOOsbQBKFI:QvK7TQFRJC0:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=OuOOsbQBKFI:QvK7TQFRJC0:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=OuOOsbQBKFI:QvK7TQFRJC0:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/OuOOsbQBKFI" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/10/rapidminer-text-mining-r-analytics.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEABRXg_cCp7ImA9WhdWEE8.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-6512368803019856563</id><published>2011-09-02T21:06:00.000-07:00</published><updated>2011-09-02T21:05:54.648-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-02T21:05:54.648-07:00</app:edited><title>September sunset</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/6512368803019856563/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/09/september-sunset.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6512368803019856563?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6512368803019856563?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/jg-_58jae-A/september-sunset.html" title="September sunset" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-Pbmmxtf1_nc/TmGnorFeRbI/AAAAAAAAAOI/ALClZf1ryCc/s72-c/%253D%253Futf-8%253FB%253FVmFuY291dmVyLTIwMTEwOTAyLTAwMDY3LmpwZw%253D%253D%253F%253D-754650" height="72" width="72" /><thr:total>1</thr:total><content type="html">&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=jg-_58jae-A:Ze9Sf_oAmCc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=jg-_58jae-A:Ze9Sf_oAmCc:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=jg-_58jae-A:Ze9Sf_oAmCc:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=jg-_58jae-A:Ze9Sf_oAmCc:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=jg-_58jae-A:Ze9Sf_oAmCc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/jg-_58jae-A" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/09/september-sunset.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0cFQn47eyp7ImA9WhdXFUg.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-1128744730928246656</id><published>2011-08-27T20:01:00.002-07:00</published><updated>2011-08-28T11:10:13.003-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-28T11:10:13.003-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="extract transform load" /><category scheme="http://www.blogger.com/atom/ns#" term="data mining" /><category scheme="http://www.blogger.com/atom/ns#" term="rapidminer" /><title>RapidMiner ETL - Transforming Attributes with Functions</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/1128744730928246656/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-transforming-attributes.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/1128744730928246656?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/1128744730928246656?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/YoQcHcT5JEg/rapidminer-etl-transforming-attributes.html" title="RapidMiner ETL - Transforming Attributes with Functions" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://img.youtube.com/vi/6uBKg9-EMRk/default.jpg" height="72" width="72" /><thr:total>4</thr:total><content type="html">In this video I show how to transform features in RapidMiner using operators such as log, sqrt, absolute value, and multiplying columns.

&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=YoQcHcT5JEg:2_XwZ6Jo19w:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=YoQcHcT5JEg:2_XwZ6Jo19w:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=YoQcHcT5JEg:2_XwZ6Jo19w:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=YoQcHcT5JEg:2_XwZ6Jo19w:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=YoQcHcT5JEg:2_XwZ6Jo19w:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/YoQcHcT5JEg" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-transforming-attributes.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkAMRH88cCp7ImA9WhdXFUg.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-6208340498780104596</id><published>2011-08-27T20:01:00.000-07:00</published><updated>2011-08-28T11:06:25.178-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-28T11:06:25.178-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="extract transform load" /><category scheme="http://www.blogger.com/atom/ns#" term="data mining" /><title>RapidMiner ETL - Normalizing, Discretizing, Recoding</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/6208340498780104596/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-normalizing-discretizing.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6208340498780104596?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/6208340498780104596?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/_zvLLT_WfUQ/rapidminer-etl-normalizing-discretizing.html" title="RapidMiner ETL - Normalizing, Discretizing, Recoding" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://img.youtube.com/vi/XfvSIgcTDZs/default.jpg" height="72" width="72" /><thr:total>0</thr:total><content type="html">In this video I show how to normalize an attribute, including z-normalization, how to discretize a column, and how to recode values


&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=_zvLLT_WfUQ:2nLI5Ll2EUk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=_zvLLT_WfUQ:2nLI5Ll2EUk:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=_zvLLT_WfUQ:2nLI5Ll2EUk:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=_zvLLT_WfUQ:2nLI5Ll2EUk:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=_zvLLT_WfUQ:2nLI5Ll2EUk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/_zvLLT_WfUQ" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-normalizing-discretizing.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkEEQnY-eip7ImA9WhdXE0U.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-4366878170088799710</id><published>2011-08-25T18:18:00.000-07:00</published><updated>2011-08-26T10:43:23.852-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-26T10:43:23.852-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="rapidminer data mining etl" /><title>RapidMiner ETL - Sampling, Selecting Rows, Attributes</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/4366878170088799710/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-sampling-selecting-rows.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4366878170088799710?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4366878170088799710?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/aN44lmPekPA/rapidminer-etl-sampling-selecting-rows.html" title="RapidMiner ETL - Sampling, Selecting Rows, Attributes" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://img.youtube.com/vi/DtKE2aaRhAU/default.jpg" height="72" width="72" /><thr:total>2</thr:total><content type="html">In this video I show how to sample rows, including balancing class labels, bootstrap sampling. I also show how to filter rows by value, and select a subset of attributes.



You can get the dataset here&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aN44lmPekPA:OzlVnw3AItc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aN44lmPekPA:OzlVnw3AItc:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aN44lmPekPA:OzlVnw3AItc:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=aN44lmPekPA:OzlVnw3AItc:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=aN44lmPekPA:OzlVnw3AItc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/aN44lmPekPA" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-sampling-selecting-rows.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUMSHcycSp7ImA9WhdXE08.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-566762827856756137</id><published>2011-08-25T17:58:00.000-07:00</published><updated>2011-08-25T17:58:09.999-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-25T17:58:09.999-07:00</app:edited><title>RapidMiner ETL - Combining Datasets</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/566762827856756137/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-combining-datasets.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/566762827856756137?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/566762827856756137?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/ZXBS9x_IpUk/rapidminer-etl-combining-datasets.html" title="RapidMiner ETL - Combining Datasets" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://img.youtube.com/vi/RioT2Z1QB9s/default.jpg" height="72" width="72" /><thr:total>1</thr:total><content type="html">In this video, I show how to combine multiple datasets into one, and join columns and append rows.


&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=ZXBS9x_IpUk:lM11LuAVid4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=ZXBS9x_IpUk:lM11LuAVid4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=ZXBS9x_IpUk:lM11LuAVid4:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=ZXBS9x_IpUk:lM11LuAVid4:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=ZXBS9x_IpUk:lM11LuAVid4:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/ZXBS9x_IpUk" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-combining-datasets.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C04MRno_cCp7ImA9WhdVE0U.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-4666301277375180679</id><published>2011-08-25T17:33:00.000-07:00</published><updated>2011-09-18T14:39:47.448-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-18T14:39:47.448-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="etl" /><category scheme="http://www.blogger.com/atom/ns#" term="data mining" /><category scheme="http://www.blogger.com/atom/ns#" term="rapidminer" /><category scheme="http://www.blogger.com/atom/ns#" term="business intelligence" /><title>And We're Back. A video series on ETL with RapidMiner</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/4666301277375180679/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-extract-transform-load.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4666301277375180679?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/4666301277375180679?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/UnfEGc210hI/rapidminer-etl-extract-transform-load.html" title="And We're Back. A video series on ETL with RapidMiner" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>2</thr:total><content type="html">Back with some more videos! Sorry for the long wait, and thanks for your patience.

This series is on ETL: Extract, Transform, Load with Rapidminer.

The first video shows how to combine multiple datasets into one, by joining columns and appending rows.

The second videos is on sampling and selecting rows and attributes.

More videos coming soon.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=UnfEGc210hI:WEIFrolQFXI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=UnfEGc210hI:WEIFrolQFXI:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=UnfEGc210hI:WEIFrolQFXI:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=UnfEGc210hI:WEIFrolQFXI:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=UnfEGc210hI:WEIFrolQFXI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/UnfEGc210hI" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/08/rapidminer-etl-extract-transform-load.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUQMRXc9fyp7ImA9WhZRFUs.&quot;"><id>tag:blogger.com,1999:blog-2523819181563716059.post-1960434316324224700</id><published>2011-04-10T18:13:00.001-07:00</published><updated>2011-04-11T17:16:24.967-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-11T17:16:24.967-07:00</app:edited><title>A rainy sunday in downtown Vancouver</title><link rel="replies" type="application/atom+xml" href="http://vancouverdata.blogspot.com/feeds/1960434316324224700/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://vancouverdata.blogspot.com/2011/04/rainy-sunday-in-downtown-vancouver.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/1960434316324224700?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2523819181563716059/posts/default/1960434316324224700?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/VancouverData/~3/0hasLYHMpsQ/rainy-sunday-in-downtown-vancouver.html" title="A rainy sunday in downtown Vancouver" /><author><name>Neil McGuigan</name><uri>http://www.blogger.com/profile/14122981831780837323</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-2KIu8bmB7e4/TaJV1-JAk7I/AAAAAAAAAMw/pow9K1Km70s/s72-c/%253D%253Futf-8%253FB%253FVmFuY291dmVyLTIwMTEwNDEwLTAwMDM2LmpwZw%253D%253D%253F%253D-739348" height="72" width="72" /><thr:total>2</thr:total><content type="html">My blog should look better on mobile devices now.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=0hasLYHMpsQ:n2zSbCFTCY4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=0hasLYHMpsQ:n2zSbCFTCY4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=0hasLYHMpsQ:n2zSbCFTCY4:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?i=0hasLYHMpsQ:n2zSbCFTCY4:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/VancouverData?a=0hasLYHMpsQ:n2zSbCFTCY4:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/VancouverData?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/VancouverData/~4/0hasLYHMpsQ" height="1" width="1"/&gt;</content><feedburner:origLink>http://vancouverdata.blogspot.com/2011/04/rainy-sunday-in-downtown-vancouver.html</feedburner:origLink></entry></feed>
