<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:blogger='http://schemas.google.com/blogger/2008' xmlns:georss='http://www.georss.org/georss' xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-21831384</id><updated>2025-04-22T15:58:04.335+05:30</updated><category term="data mining"/><category term="Explaining vs. Predicting"/><category term="business analytics"/><category term="data visualization"/><category term="software"/><category term="data"/><category term="teaching business data mining"/><category term="competition"/><category term="big data"/><category term="predictive analytics"/><category term="time series"/><category term="regression"/><category term="Book"/><category term="Excel"/><category term="p-value"/><category term="teaching"/><category term="data analytics"/><category term="forecasting"/><category term="Course"/><category term="Google"/><category term="Large samples"/><category term="graphs"/><category term="predictive accuracy"/><category term="privacy"/><category term="spotfire"/><category term="technology"/><category term="visualization"/><category term="data collection"/><category term="prediction"/><category term="social sciences"/><category term="textbook"/><category term="Analytics"/><category term="Business intelligence"/><category term="Netflix"/><category term="SAS"/><category term="classification"/><category term="data source"/><category term="design of experiments"/><category term="health"/><category term="CART"/><category term="INFORMS"/><category term="bhutan"/><category term="causality"/><category term="classification trees"/><category term="correlation"/><category term="data liberation"/><category term="data usage"/><category term="education"/><category term="experiment"/><category term="infoQ"/><category term="interactive visualization"/><category term="k-NN"/><category term="newspaper"/><category term="nytimes"/><category term="performance metrics"/><category term="pivot table"/><category term="projects"/><category term="psychology"/><category term="scatterplot"/><category term="surveys"/><category term="tableau"/><category term="Clickers"/><category term="Factor Analysis"/><category term="KDD cup"/><category term="MAPE"/><category term="PCA"/><category term="WSJ"/><category term="Webcast"/><category term="behavioral big data"/><category term="conditional probability"/><category term="data compression"/><category term="econometric model"/><category term="forum"/><category term="machine learning"/><category term="mobile learning"/><category term="multiple testing"/><category term="overfitting"/><category term="predicting"/><category term="regresson tree"/><category term="river plot"/><category term="sampling"/><category term="seasonality"/><category term="t-test"/><category term="text mining"/><category term="treemap"/><category term="trend"/><category term="visual analytics"/><category term="A/B testing"/><category term="ASA"/><category term="Bayes Rule"/><category term="C4.5"/><category term="CEO mansions"/><category term="CHAID"/><category term="DHS"/><category term="FDR"/><category term="FICO"/><category term="Google Insights for Search"/><category term="India"/><category term="Israel"/><category term="Microsoft"/><category term="Monty Hall Problem"/><category term="OR"/><category term="PointMaven"/><category term="R-squared"/><category term="RMSE"/><category term="Simpson&#39;s paradox"/><category term="The American Statistician"/><category term="Yahoo"/><category term="aggregation"/><category term="astronomy"/><category term="bar chart"/><category term="bonferroni"/><category term="cancer screening"/><category term="cartoon"/><category term="chrysler"/><category term="clustering"/><category term="coincident indicators"/><category term="credit score"/><category term="cycle plots"/><category term="data democratization"/><category term="data science"/><category term="descriptive modeling"/><category term="eBay"/><category term="elections"/><category term="epidemiology"/><category term="equivalence"/><category term="ethics"/><category term="goodness-of-fit"/><category term="hidden fields"/><category term="interaction"/><category term="jobs"/><category term="leading indicators"/><category term="library"/><category term="mathematics"/><category term="microstrategy"/><category term="missing values"/><category term="moderator"/><category term="moving average"/><category term="musical"/><category term="network"/><category term="observational data"/><category term="online auctions"/><category term="operationalization"/><category term="outliers"/><category term="portfolio approach"/><category term="proving theory"/><category term="quantified self"/><category term="recommender systems"/><category term="residuals"/><category term="security"/><category term="selection bias"/><category term="social media"/><category term="social network"/><category term="spatial data"/><category term="spreadsheets"/><category term="statistical test"/><category term="statisticians"/><category term="students"/><category term="summary stats"/><category term="surplus"/><category term="surveillance"/><category term="tive analytics"/><category term="videos"/><category term="voting"/><category term="weights"/><title type='text'>BzST | Business Analytics, Statistics, Teaching</title><subtitle type='html'>A blog by Prof. Galit Shmueli</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://www.bzst.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default?alt=atom&amp;redirect=false'/><link rel='alternate' type='text/html' href='http://www.bzst.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default?alt=atom&amp;start-index=26&amp;max-results=25&amp;redirect=false'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>187</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-21831384.post-1031931959312417936</id><published>2020-12-03T15:05:00.004+05:30</published><updated>2020-12-03T15:05:47.208+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="prediction"/><category scheme="http://www.blogger.com/atom/ns#" term="predictive analytics"/><title type='text'>Machine learning algorithms surprises at deployment? (article on Medium)</title><summary type="text">Machine
 learning (ML) algorithms are being used to generate predictions in 
every corner of our decision-making life. Methods range from “simple” 
algorithms such as trees, forests, naive Bayes, linear and logistic 
regression models, and nearest-neighbor methods, through improvements 
such as boosting, bagging, regularization, and ensembling, to 
computationally-intensive, blackbox deep </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/1031931959312417936/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/1031931959312417936' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/1031931959312417936'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/1031931959312417936'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2020/12/machine-learning-algorithms-surprises.html' title='Machine learning algorithms surprises at deployment? (article on Medium)'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-4550192490180009397</id><published>2018-12-10T16:28:00.001+05:30</published><updated>2018-12-10T17:51:25.644+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="big data"/><category scheme="http://www.blogger.com/atom/ns#" term="business analytics"/><category scheme="http://www.blogger.com/atom/ns#" term="forecasting"/><category scheme="http://www.blogger.com/atom/ns#" term="time series"/><title type='text'>Forecasting large collections of time series</title><summary type="text">With the recent launch of&amp;nbsp;Amazon Forecast, I can no longer procrastinate writing about forecasting &quot;at scale&quot;!

Quantitative forecasting of time series has been used (and taught) for decades, with applications in many areas of business such as demand forecasting, sales forecasting, and financial forecasting. The types of methods taught in forecasting courses tends to be discipline-specific:
</summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/4550192490180009397/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/4550192490180009397' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4550192490180009397'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4550192490180009397'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2018/12/forecasting-large-collections-of-time.html' title='Forecasting large collections of time series'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEh_8IYJ-sxhP1BOby_fUkJ5JUZldIobUphlX5zI1lwRJX9XnrAvAbfwX2lHxK3OypEKlFFnCMpVFKegliSKQna8hfErAESOUe2kFp5-0LEq9ITi1nH1iHWl11mKStwUEmTicaxCKQ/s72-c/PTSF-R-MU-LG.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-7922818449447297191</id><published>2018-02-04T19:42:00.000+05:30</published><updated>2018-02-04T19:42:00.383+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="data"/><category scheme="http://www.blogger.com/atom/ns#" term="data science"/><category scheme="http://www.blogger.com/atom/ns#" term="data usage"/><category scheme="http://www.blogger.com/atom/ns#" term="ethics"/><category scheme="http://www.blogger.com/atom/ns#" term="privacy"/><category scheme="http://www.blogger.com/atom/ns#" term="social sciences"/><title type='text'>Data Ethics Regulation: Two key updates in 2018</title><summary type="text">This year, two important new regulations will be impacting research with human subjects: the EU&#39;s General Data Protection Regulation (GDPR), which kicks in May 2018, and the USA&#39;s updated Common Rule, called the Final Rule, is in effect from Jan 2018. Both changes relate to protecting individuals&#39; private information and will affect researchers using behavioral data in terms of data collection, </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/7922818449447297191/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/7922818449447297191' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7922818449447297191'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7922818449447297191'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2018/02/data-ethics-regulation-two-key-updates.html' title='Data Ethics Regulation: Two key updates in 2018'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhG4dA_yogCf7ZmuCCvJ6hahxP77LzrSuT-R1TIv75TDeHEw9Q6v8B1-y_uC3RNCz_GCAehMYd4j-Z85fXUQcJygKkCqXiXrBJYWAglQuF3cDzMTvWUaORsi9iuuGUCs-9Ug85B9g/s72-c/General-Data-Protection-Regulation-GDPR-2018.jpeg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-9154681573518447896</id><published>2017-12-25T12:27:00.000+05:30</published><updated>2017-12-25T16:42:23.491+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="big data"/><category scheme="http://www.blogger.com/atom/ns#" term="data"/><category scheme="http://www.blogger.com/atom/ns#" term="descriptive modeling"/><category scheme="http://www.blogger.com/atom/ns#" term="elections"/><category scheme="http://www.blogger.com/atom/ns#" term="Explaining vs. Predicting"/><category scheme="http://www.blogger.com/atom/ns#" term="Google"/><category scheme="http://www.blogger.com/atom/ns#" term="infoQ"/><category scheme="http://www.blogger.com/atom/ns#" term="social sciences"/><title type='text'>Election polls: description vs. prediction</title><summary type="text">My papers To Explain or To Predict&amp;nbsp;and Predictive Analytics in Information Systems Research contrast the process and uses of predictive modeling and causal-explanatory modeling. I briefly mentioned there a third type of modeling: descriptive. However, I haven&#39;t expanded on how descriptive modeling differs from the other two types (causal explanation and prediction). While descriptive and </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/9154681573518447896/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/9154681573518447896' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/9154681573518447896'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/9154681573518447896'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2017/12/election-polls-description-vs-prediction.html' title='Election polls: description vs. prediction'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgVmH4pEX_pbA2NB3E3hdliOtFyF18iGWREiGzAhEmiF6iHIzgbZs06zbMCh6OcabBpY0Ij7GZxoFMzhz-oNrzrTpp-0rE0M8PDWYgpohIYo5Nf2f436C9c_TwSzTJW7wbiSLduUQ/s72-c/everybodylies.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-4265978712541303692</id><published>2017-11-06T18:19:00.000+05:30</published><updated>2017-11-06T18:19:10.448+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="equivalence"/><category scheme="http://www.blogger.com/atom/ns#" term="Large samples"/><category scheme="http://www.blogger.com/atom/ns#" term="p-value"/><category scheme="http://www.blogger.com/atom/ns#" term="statistical test"/><title type='text'>Statistical test for &quot;no difference&quot;</title><summary type="text">To most researchers and practitioners using statistical inference, the popular hypothesis testing universe consists of two hypotheses:

H0 is the null hypothesis of &quot;zero effect&quot;

H1 is the alternative hypothesis of &quot;a non-zero effect&quot;


The alternative hypothesis (H1) is typically what the researcher is trying to find: a different outcome for a treatment and control group in an experiment, a </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/4265978712541303692/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/4265978712541303692' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4265978712541303692'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4265978712541303692'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2017/11/statistical-test-for-no-difference.html' title='Statistical test for &quot;no difference&quot;'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhSYrcuCQqwyOBL6POEmwKXbuWctOScXXqVcDD6Gi-O8IpRkLTisYfwDXTr1NGoVvt-UTDL2fn0XnEIzg5KZ9KBV9G7mTL6a0eQthQWZor_whxSBGc5AetM5sCXrKeb_VeIjmw5kw/s72-c/2smple_equiv_image_w640.jpeg" height="72" width="72"/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-326583015495788736</id><published>2017-09-05T19:50:00.001+05:30</published><updated>2017-09-05T19:50:23.297+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="business analytics"/><category scheme="http://www.blogger.com/atom/ns#" term="data mining"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching business data mining"/><category scheme="http://www.blogger.com/atom/ns#" term="textbook"/><category scheme="http://www.blogger.com/atom/ns#" term="videos"/><title type='text'>My videos for “Business Analytics using Data Mining” now publicly available!</title><summary type="text">
Five years ago, in 2012, I decided to experiment in improving my teaching by creating a flipped classroom (and semi-MOOC) for my course “Business Analytics Using Data Mining” (BADM) at the Indian School of Business. I initially designed the course at University of Maryland’s Smith School of Business in 2005 and taught it until 2010. When I joined ISB in 2011 I started teaching multiple sections </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/326583015495788736/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/326583015495788736' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/326583015495788736'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/326583015495788736'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2017/09/my-videos-for-business-analytics-using.html' title='My videos for “Business Analytics using Data Mining” now publicly available!'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiM1ZGy0xJ4BfBkCRTBrrv60dtxrZsu4y0z_vJIQk_TDdo3VrY87Z4LZ8gnVPHHFzgjKiPLOXZ29GaND-AY0mLNaBnmYOBew97C3ZblIts6u_xa0BbmDzFAZhG3pokSAEE0SJYC5g/s72-c/DMBA-books.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-286679499218463020</id><published>2017-03-14T06:45:00.002+05:30</published><updated>2017-03-14T06:45:41.567+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="classification"/><category scheme="http://www.blogger.com/atom/ns#" term="clustering"/><category scheme="http://www.blogger.com/atom/ns#" term="data"/><category scheme="http://www.blogger.com/atom/ns#" term="data mining"/><category scheme="http://www.blogger.com/atom/ns#" term="k-NN"/><category scheme="http://www.blogger.com/atom/ns#" term="regression"/><title type='text'>Data mining algorithms: how many dummies?</title><summary type="text">There&#39;s lots of posts on &quot;k-NN for Dummies&quot;. This one is about &quot;Dummies for k-NN&quot;

Categorical predictor variables are very common. Those who&#39;ve taken a Statistics course covering linear (or logistic) regression, know the procedure to include a categorical predictor into a regression model requires the following steps:


Convert the categorical variable that has m categories, into m binary dummy </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/286679499218463020/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/286679499218463020' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/286679499218463020'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/286679499218463020'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2017/03/data-mining-algorithms-how-many-dummies.html' title='Data mining algorithms: how many dummies?'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgWW0q08TnxZyV1E3VVbXBYCADlvLntGCwZL1kC1rZyXewTvB7bhZZ9Mxrqnb8kQXTM-49VIssd0vmmuYdO4gGRY6ImDM9qYn3wDriIsLJQy2_ajf1Wau0G_kAgmmKKJ9p5iFD4lA/s72-c/dummy.jpeg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-5011153059355071009</id><published>2016-12-22T13:48:00.001+05:30</published><updated>2016-12-27T19:07:51.263+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="A/B testing"/><category scheme="http://www.blogger.com/atom/ns#" term="behavioral big data"/><category scheme="http://www.blogger.com/atom/ns#" term="big data"/><category scheme="http://www.blogger.com/atom/ns#" term="design of experiments"/><category scheme="http://www.blogger.com/atom/ns#" term="social sciences"/><category scheme="http://www.blogger.com/atom/ns#" term="statisticians"/><title type='text'>Key challenges in online experiments: where are the statisticians?</title><summary type="text">

Randomized experiments (or randomized controlled trials, RCT) are a powerful tool for testing causal relationships. Their main principle is random assignment, where subjects or items are assigned randomly to one of the experimental conditions. A classic example is a clinical trial with one or more treatment groups and a no-treatment (control) group, where individuals are assigned at random to </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/5011153059355071009/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/5011153059355071009' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/5011153059355071009'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/5011153059355071009'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2016/12/key-challenges-in-online-experiments.html' title='Key challenges in online experiments: where are the statisticians?'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEi0pj2S-4M3IbBMRYCElCD0cr7Un1AiElFX4glOpuD0gZZS5s9Np4S2bVcH4cqT5uO-zYkO5jSFT6JfCHH9e-P2TocyACyRr61Wb3d9wf50V9YMDljwjGWDU2du9SCF94Q6Gzn9XQ/s72-c/social-experiment-advertsing-tugboat-group-vancouver.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-2460099439751309392</id><published>2016-10-24T14:32:00.000+05:30</published><updated>2016-10-24T18:12:38.019+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="behavioral big data"/><category scheme="http://www.blogger.com/atom/ns#" term="big data"/><category scheme="http://www.blogger.com/atom/ns#" term="health"/><category scheme="http://www.blogger.com/atom/ns#" term="privacy"/><category scheme="http://www.blogger.com/atom/ns#" term="quantified self"/><title type='text'>Experimenting with quantified self: two months hooked up to a fitness band</title><summary type="text">It&#39;s one thing to collect and analyze behavioral big data (BBD) and another to understand what it means to be the subject of that data. To really understand. Yes, we&#39;re all aware that our social network accounts and IoT devices share our private information with large and small companies and other organizations. And although we complain about our privacy, we are forgiving about sharing it, most </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/2460099439751309392/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/2460099439751309392' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2460099439751309392'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2460099439751309392'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2016/10/experimenting-with-quantitative-self.html' title='Experimenting with quantified self: two months hooked up to a fitness band'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiJt7xmR3S87JLGO0koJhhPHN7D5PRbQB09D6L6_cgUXj0KLE-An_DU73aYAYg-AXYheBsLUfzf1Rd-Uqz1vDqGVSLcOgxt5RiTpEOuwVsbBfBfIMHj3ZIv7os3Ch5nVQYt0Di90g/s72-c/Mi-Band-Pulse.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-4441736114671243506</id><published>2016-04-26T12:36:00.001+05:30</published><updated>2016-04-26T17:45:37.186+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="data analytics"/><category scheme="http://www.blogger.com/atom/ns#" term="p-value"/><category scheme="http://www.blogger.com/atom/ns#" term="software"/><title type='text'>Statistical software should remove *** notation for statistical significance</title><summary type="text">Now that the emotional storm following the American Statistical Association&#39;s statement on p-values is slowing down (is it? was there even a storm outside of the statistics area?), let&#39;s think about a practical issue. One that greatly influences data analysis in most fields: statistical software. Statistical software influences which methods are used and how they are reported. Software companies </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/4441736114671243506/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/4441736114671243506' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4441736114671243506'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4441736114671243506'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2016/04/statistical-software-should-remove.html' title='Statistical software should remove *** notation for statistical significance'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjMD5VzKTppOynHmjrKny0_GdwimxuWKszb8IwbRXPO4X9skn31Ft569l9_TG_ahx_D08EVWbaWip78eXHuJy9Ziu8BTF_KqtCEkhXM-S8a_LpLafeyRoXAxxpolJ0DdS3JO8QS0Q/s72-c/software-p-values.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-929664294926029913</id><published>2016-03-24T13:33:00.000+05:30</published><updated>2016-03-25T05:37:07.719+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="big data"/><title type='text'>A non-traditional definition of Big Data: Big is Relative</title><summary type="text">I&#39;ve noticed that in almost every talk or discussion that involves the term Big Data, one of the first slides by the presenter or the first questions to be asked by the audience is &quot;what is Big Data?&quot; The typical answer has to do with some digits, many V&#39;s, terms that end with &quot;bytes&quot;, or statements about software or hardware capacity.

I beg to differ.

&quot;Big&quot; is relative. It is relative to a </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/929664294926029913/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/929664294926029913' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/929664294926029913'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/929664294926029913'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2016/03/a-non-traditional-definition-of-big.html' title='A non-traditional definition of Big Data: Big is Relative'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgVIuFhUI4ZwvxD18jwJNE9J5L9xSCOwnu2LA0QLNcC2LtcWdS8UBaoCCkd5WYkz54Ru2Kt3zCkCNxiWoxr5pjvk5CrmhamUlmd-nOmKdPjy0pXdBOjiau02T1y-rO2IKjaW_D9pg/s72-c/bigdata.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-2287860247775202480</id><published>2015-12-07T15:19:00.001+05:30</published><updated>2015-12-07T15:19:50.824+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="Explaining vs. Predicting"/><category scheme="http://www.blogger.com/atom/ns#" term="prediction"/><category scheme="http://www.blogger.com/atom/ns#" term="predictive analytics"/><title type='text'>Predictive analytics in the long term</title><summary type="text">Ten years ago, micro-level prediction the way we know it today, was nearly absent in companies. MBAs learned about data analysis mostly in a requires statistics course, which covered mostly statistical inference and descriptive modeling. At the time, I myself was learning my way into the predictive world, and designed the first Data Mining course at University of Maryland&#39;s Smith School of </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/2287860247775202480/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/2287860247775202480' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2287860247775202480'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2287860247775202480'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2015/12/predictive-analytics-in-long-term.html' title='Predictive analytics in the long term'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEglbRTFmIAc6seF_S6ubF5JWEOG2c1EvJU0SuddPl-JeVtSFzBgt3OEMYhN3Z2Aifhsp4Z9CaCU5x_ZesWLJjTpQVehB6sculJv2lLhnt4WfJkxty5AabbSomM4001TNRfEHduZrg/s72-c/correlation.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-8369800958350983945</id><published>2015-08-19T06:40:00.000+05:30</published><updated>2015-08-19T06:40:48.148+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="CART"/><category scheme="http://www.blogger.com/atom/ns#" term="data analytics"/><category scheme="http://www.blogger.com/atom/ns#" term="data mining"/><category scheme="http://www.blogger.com/atom/ns#" term="k-NN"/><category scheme="http://www.blogger.com/atom/ns#" term="regression"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching business data mining"/><title type='text'>Categorical predictors: how many dummies to use in regression vs. k-nearest neighbors </title><summary type="text">Recently I&#39;ve had discussions with several instructors of data mining courses about a fact that is often left out of many books, but is quite important: different treatment of dummy variables in different data mining methods.



From&amp;nbsp;http://blog.excelmasterseries.com

Statistics courses that cover linear or logistic regression teach us to be careful when including a categorical predictor </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/8369800958350983945/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/8369800958350983945' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/8369800958350983945'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/8369800958350983945'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2015/08/categorical-predictors-how-many-dummies.html' title='Categorical predictors: how many dummies to use in regression vs. k-nearest neighbors '/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEi98PuVrh2NF-YmLeBUoUj-0Ma4XjS7uIHXBjhDuEvqCUM3LOyaDDUXT9I76LRzd4Ss-j_U4nLPhawycKCkIGZiN5PktGfluH4-fbgllT1IQLWXhUC7_obh1mbByRCMzdoHSw9-Kw/s72-c/dummies.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-2983902123210510245</id><published>2015-03-02T19:15:00.000+05:30</published><updated>2015-03-02T19:15:01.123+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="p-value"/><category scheme="http://www.blogger.com/atom/ns#" term="psychology"/><category scheme="http://www.blogger.com/atom/ns#" term="t-test"/><title type='text'>Psychology journal bans statistical inference; knocks down server</title><summary type="text">




In its recent editorial, the journal Basic and Applied Social Psychology&amp;nbsp;announced that it will no longer accept papers that use classical statistical inference. No more p-values, t-tests, or even... confidence intervals!&amp;nbsp;


&quot;prior to publication, authors will have to remove all vestiges of the NHSTP (p-values, t-values, F-values, statements about ‘‘significant’’ differences or </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/2983902123210510245/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/2983902123210510245' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2983902123210510245'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2983902123210510245'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2015/03/psychology-journal-bans-statistical.html' title='Psychology journal bans statistical inference; knocks down server'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhOCtR5olYB3NpyCr-6WPraDWJgYHdr3zF81J2Dvr4QgFEeXTp3RgKEiVy83Lz1qlK8UatMN4uVrGqEN825_YvzZJNOmEKNlPeSML5ZEmeBokar46mIu6-z7-BhPdyvdvABJsLWJg/s72-c/p.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-9055607906159925772</id><published>2015-02-07T11:56:00.000+05:30</published><updated>2015-02-07T11:56:09.334+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="data analytics"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching business data mining"/><title type='text'>Teaching spaces: &quot;Analytics in a Studio&quot;</title><summary type="text">My first semester at NTHU has been a great learning experience. I introduced and taught two new courses in our new Business Analytics concentration&amp;nbsp;(data mining and forecasting).&amp;nbsp;Both courses met once a week for a 3-hour session for a full semester (18 weeks). Although I&#39;ve taught these courses in different forms, in different countries, and to different audiences, I had a special </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/9055607906159925772/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/9055607906159925772' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/9055607906159925772'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/9055607906159925772'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2015/02/teaching-spaces-analytics-in-studio.html' title='Teaching spaces: &quot;Analytics in a Studio&quot;'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhvfr_wMG1iG32hqSxc45K6rZ7UWI_Dq2WprxmLL9kULfKWYNEXx8kMCo9FwtZqANVzw41QTqLyAMX33DvljMcSdzHZkhtQ-XvXFwkZXAy8RpT3h7NUIOJ5Qy35RYxjzExRsYWXGQ/s72-c/caseroom2.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-7411589150685147797</id><published>2014-12-19T10:39:00.001+05:30</published><updated>2014-12-19T10:44:42.082+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="Explaining vs. Predicting"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching"/><category scheme="http://www.blogger.com/atom/ns#" term="visualization"/><title type='text'>New curriculum design guidelines by American Statistical Association: Who will teach?</title><summary type="text">



The American Statistical Association published new &quot;Curriculum Guidelines for Undergraduate Programs in Statistical Science&quot;. This is the first update to the guidelines since 2000.
The executive summary lists the key points:

Increased importance of data science
Real applications
More diverse models and approaches
Ability to communicate


This set sounds right on target with what is expected </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/7411589150685147797/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/7411589150685147797' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7411589150685147797'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7411589150685147797'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2014/12/new-curriculum-design-guidelines-by.html' title='New curriculum design guidelines by American Statistical Association: Who will teach?'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhpxt3OqIVYRuVvKBs20uUYsS0KKyx36XIeYKIxqJXwM757USSb9bPqCLtw9kr1PA1IGZZuUR5Z0etuh7EFbhHgT97fvRu2r6vdwKWQD74xWR812chcjXSEOTcETgC_X7b5mVh2tA/s72-c/statflask.gif" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-7729841670850915915</id><published>2014-10-16T20:25:00.002+05:30</published><updated>2014-10-16T20:31:00.952+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="big data"/><category scheme="http://www.blogger.com/atom/ns#" term="data"/><category scheme="http://www.blogger.com/atom/ns#" term="infoQ"/><title type='text'>What&#39;s in a name? &quot;Data&quot; in Mandarin Chinese</title><summary type="text">The term &quot;data&quot;, now popularly used in many languages, is not as innocent as it seems. The biggest controversy that I&#39;ve been aware of is whether the English term &quot;data&quot; is singular or plural. The tone of an entire article would be different based on the author&#39;s decision.

In Hebrew, the word is in plural (Netunim, with the final &quot;im&quot; signifying plural), so no question arises.

Today I </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/7729841670850915915/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/7729841670850915915' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7729841670850915915'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7729841670850915915'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2014/10/whats-in-name-data-in-mandarin-chinese.html' title='What&#39;s in a name? &quot;Data&quot; in Mandarin Chinese'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjm3wTjMBJ4D03IegAoqB6AlzlURL_tYPq9vQ5xizE4DNnH5HWdbTBCe3abkHzdzs7f4_WHGe5mabQpyaEFpZeL5u3EgMBH75o9M_p77rIFuN9eXZ1NsevDWqEUh1G_qCe3GeLe5A/s72-c/data-google.PNG" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-3841656836113375300</id><published>2014-09-26T15:34:00.001+05:30</published><updated>2014-09-26T20:36:03.322+05:30</updated><title type='text'>Humane and Socially Responsible Analytics: A new concentration at National Tsing Hua University</title><summary type="text">This Fall, I&#39;m introducing two new elective courses at NTHU&#39;s Institute of Service Science: Business Analytics using Data Mining and Business Analytics using Forecasting&amp;nbsp;(if you&#39;re wondering about the difference, see an earlier post). The two new courses join three other elective courses to form the new concentration in Business Analytics. Courses in this concentration are aimed at getting </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/3841656836113375300/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/3841656836113375300' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/3841656836113375300'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/3841656836113375300'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2014/09/humane-and-socially-responsible.html' title='Humane and Socially Responsible Analytics: A new concentration at National Tsing Hua University'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgrbEhzazaFE4su6axAaaS0V32lBbjw4kqy8GNlzkwOwyfPQZkmVRdWwm5jZADUtBTiY-BjF1_3v8_Si1RsSJPrGOrjayLZ_2W03jTfxFCsEtM-zr4K0Q25T03M6QzdLLfe1tnaQA/s72-c/holistic_analytics.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-8525629653428993997</id><published>2014-09-19T14:22:00.000+05:30</published><updated>2014-09-19T14:40:58.848+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="data visualization"/><category scheme="http://www.blogger.com/atom/ns#" term="India"/><category scheme="http://www.blogger.com/atom/ns#" term="Israel"/><title type='text'>India redefines &quot;reciprocity&quot;; Israeli professionals pay the price</title><summary type="text">After a few years of employment at the Indian School of Business (in 2010 as a visitor and later as a tenured SRITNE Chaired Professor of Data Analytics), the time has come for me to get a new Employment Visa. As an Israeli-American, I decided to apply for the visa using my Israeli passport. I was almost on my way to the Indian embassy when I discovered, to my horror, that the fee is over USD $</summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/8525629653428993997/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/8525629653428993997' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/8525629653428993997'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/8525629653428993997'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2014/09/india-redefines-reciprocity-israeli.html' title='India redefines &quot;reciprocity&quot;; Israeli professionals pay the price'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhu_G2ewxdQwtbcA50JCg1KWXDrrkmGmRoUe3MhP7bxDEa52VEP5NRmtccmpUZ-j4AA0codIChUiTYBNzaoyjGChOHihi3F2SQ6p8alVxel6adXWaahAaurYAqL7E4enzphlY0HmQ/s72-c/Israel-visa-fee.PNG" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-3681561033233513282</id><published>2014-04-02T21:15:00.001+05:30</published><updated>2014-09-19T14:41:20.129+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="data visualization"/><category scheme="http://www.blogger.com/atom/ns#" term="Excel"/><category scheme="http://www.blogger.com/atom/ns#" term="software"/><category scheme="http://www.blogger.com/atom/ns#" term="spotfire"/><category scheme="http://www.blogger.com/atom/ns#" term="tableau"/><title type='text'>Parallel coordinate plot in Tableau: a workaround</title><summary type="text">The parallel coordinate plot is useful for visualizing multivariate data in a dis-aggregated way, where we have multiple numerical measurements for each record. A scatter plot displays two measurements for each record by using the two axes. A parallel coordinate plot can display many measurements for each record, by using many (parallel) axes - one for each measurement.


While not as popular as </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/3681561033233513282/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/3681561033233513282' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/3681561033233513282'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/3681561033233513282'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2014/04/parallel-coordinate-plot-in-tableau.html' title='Parallel coordinate plot in Tableau: a workaround'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiljhoaVPGSn3XCX9V__OadhPo6cd6Q6WpH3KUOrg50SeYlOP3dUdAy-NUy0OYwDJXOg26hlCjUKNyFCa510S2PI4OXOvLil9Q_E6QBz8sVNh_1LRvliep7WQ2m7IL2XyDs_0nIdA/s72-c/TableauCalculated.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-2719187359314112856</id><published>2014-03-15T14:00:00.000+05:30</published><updated>2014-03-15T14:15:11.958+05:30</updated><title type='text'>Can women be professors or doctors? Not according to Jet Airways</title><summary type="text">

I am already used to the comical scene at airports in Asia, where a sign-holder with &quot;Professor Galit Shmueli&quot; sees us walk in his/her direction and right away rushes to my husband. Whether or not the stereotype is based on actual gender statistics of professors in Asia is a good question.

What I don&#39;t find amusing is when a corporate like Jet Airways, under the guise of &quot;celebrating </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/2719187359314112856/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/2719187359314112856' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2719187359314112856'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/2719187359314112856'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2014/03/can-women-be-professors-or-doctors-not.html' title='Can women be professors or doctors? Not according to Jet Airways'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgRUdGrqnX4CmB5QLGwBCvjQ_vismlziNBrkZWzjiGWoHt30h1NhtxYKIw0tGCa5ZfjhupNmMqUKeSuSXLZPcuinrrX8ijI9MINEs5bgvaovn1AUgX_Yil-4MT_ibgKOkOUklOfsw/s72-c/Jet-Women.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-4007166338888457878</id><published>2014-03-06T10:06:00.001+05:30</published><updated>2014-03-06T10:06:20.679+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="data mining"/><category scheme="http://www.blogger.com/atom/ns#" term="predictive analytics"/><category scheme="http://www.blogger.com/atom/ns#" term="regression"/><category scheme="http://www.blogger.com/atom/ns#" term="regresson tree"/><title type='text'>The use of dummy variables in predictive algorithms</title><summary type="text">

Anyone who has taken a course in statistics that covers linear regression has heard some version of the rule regarding pre-processing categorical predictors with more than two categories and the need to factor them into binary dummy/indicator variables:

&quot;If a variable has k levels, you can create only k-1 indicators. You have to choose one of the k categories as a &quot;baseline&quot; and leave out its </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/4007166338888457878/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/4007166338888457878' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4007166338888457878'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4007166338888457878'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2014/03/the-use-of-dummy-variables-in.html' title='The use of dummy variables in predictive algorithms'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiECfUDSqsgtkpBQpAMqkvRN9-w3zj2HqiaHMo1qnmMntFSqInKEqyzJCqVfb19KgItAv8xyIeNcJJqbgdj8A6kayJ07-Lw73NKOXnJzFI9TiXa9ZehSypRi7Aby4auXTaJIRkgQw/s72-c/dummy.gif" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-4111588502273135377</id><published>2013-11-28T01:12:00.001+05:30</published><updated>2013-11-28T09:01:59.465+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="competition"/><category scheme="http://www.blogger.com/atom/ns#" term="data liberation"/><category scheme="http://www.blogger.com/atom/ns#" term="data mining"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching business data mining"/><title type='text'>Running a data mining contest on Kaggle</title><summary type="text">

Following the success last year, I&#39;ve decided once again to introduce a data mining contest in my Business Analytics using Data Mining&amp;nbsp;course at the Indian School of Business. Last year, I used two platforms: CrowdAnalytix and Kaggle. This year I am again using Kaggle. They offer free competition hosting for university instructors, called InClass Kaggle.

Setting up a competition on Kaggle</summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/4111588502273135377/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/4111588502273135377' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4111588502273135377'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/4111588502273135377'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2013/11/running-data-mining-contest-on-kaggle.html' title='Running a data mining contest on Kaggle'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEi-Ahq9Rejs0WCdDJsdGrgem22Oe_U3hNc_taociHinH_LoY-sryFA3GMrx-k6BOkVpkSuILSD1ImAMXdtiL3TJSimd1Ny3vUG17MlQ2WSBFDDK101_Qsf246V5VBpv-02JFt5uiQ/s72-c/kaggle.PNG" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-7386194122393594464</id><published>2013-11-21T14:42:00.001+05:30</published><updated>2013-11-21T14:54:14.355+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="prediction"/><category scheme="http://www.blogger.com/atom/ns#" term="predictive accuracy"/><title type='text'>The Scientific Value of Testing Predictive Performance</title><summary type="text">

This week&#39;s NY Times article&amp;nbsp;Risk Calculator for Cholesterol Appears Flawed&amp;nbsp;and CNN article Does calculator overstate heart attack risk? illustrate the power of evaluating the predictive performance of a model for purposes of validating the underlying theory.

The NYT article describes findings by two Harvard Medical School professors, Ridker and Cook, about extreme over-estimation of</summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/7386194122393594464/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/7386194122393594464' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7386194122393594464'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/7386194122393594464'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2013/11/the-value-of-testing-predictive.html' title='The Scientific Value of Testing Predictive Performance'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjvJMcKBacPtdJz1XMYdV76W9nayC_oYreqz57CjpHC7YCk0jNIImvd7e2Lx6hdG11u8NEEtrDvvEPYNVaZT648JlNFaHUUfOoU5lIGFT6kqc2pY3djoSYCOtd3i3tKpGWF3WchdQ/s72-c/training.jpg" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-21831384.post-6688403081481010857</id><published>2013-11-05T11:54:00.003+05:30</published><updated>2013-11-06T17:15:02.978+05:30</updated><category scheme="http://www.blogger.com/atom/ns#" term="forecasting"/><category scheme="http://www.blogger.com/atom/ns#" term="teaching business data mining"/><title type='text'>A Tale of Two (Business Analytics) Courses</title><summary type="text">I have been teaching two business analytics elective MBA-level courses at ISB. One is called &quot;Business Analytics Using Data Mining&quot; (BADM) and the other, &quot;Forecasting Analytics&quot; (FCAS). Although we share the syllabi for both courses, I often receive the following question, in this variant or the other:


What is the difference between the two courses?

The short answer is: BADM is focused on </summary><link rel='replies' type='application/atom+xml' href='http://www.bzst.com/feeds/6688403081481010857/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/21831384/6688403081481010857' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/6688403081481010857'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/21831384/posts/default/6688403081481010857'/><link rel='alternate' type='text/html' href='http://www.bzst.com/2013/11/a-tale-of-two-business-analytics-courses.html' title='A Tale of Two (Business Analytics) Courses'/><author><name>Galit Shmueli</name><uri>http://www.blogger.com/profile/06119270323184007583</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='https://img1.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhj1tmJ3H0Ekt-dKgyWrY-YJddFRoqGaQOffz34YxYG2Lr9b5Hzl1JP1cwWIfoXnVx69GGTmFDYtRtiigDnkz83SMU2PGqapG_mY3etxYr61K-lV7AB7T37hWCD0TvFKDbkJdjsdA/s72-c/Spot+the+difference+1.jpg" height="72" width="72"/><thr:total>0</thr:total></entry></feed>