<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0"><channel><atom:id>tag:blogger.com,1999:blog-6439806875881311576</atom:id><lastBuildDate>Mon, 29 Apr 2013 03:21:10 +0000</lastBuildDate><category>biomarkers</category><category>ENHANCE</category><category>adaptive trials</category><category>data mining</category><category>Taylor series</category><category>books</category><category>free</category><category>PD</category><category>privacy</category><category>relationships</category><category>Inference for R</category><category>FDA</category><category>safety</category><category>multiple sclerosis</category><category>big data</category><category>statistical significance</category><category>Phase 2</category><category>loess</category><category>randomization</category><category>polls</category><category>learning from data</category><category>ANOVA</category><category>postmarketing</category><category>classes</category><category>natural language processing</category><category>email</category><category>echinacaea</category><category>probability</category><category>bias</category><category>humor</category><category>data collection</category><category>simulation</category><category>blinding</category><category>pharmacogenomics</category><category>visualization</category><category>group sequential</category><category>waste</category><category>autism</category><category>Microsoft Excel</category><category>graphics</category><category>data cleaning</category><category>alternative medicine</category><category>central nervous system</category><category>Hal Varian</category><category>links</category><category>mooc</category><category>Presentations</category><category>Phase I</category><category>networking</category><category>joint statistical meetings</category><category>practice of statistics</category><category>noninferiority</category><category>alzheimer's disease</category><category>drug development</category><category>causal inference</category><category>statistics without borders</category><category>marketing</category><category>epidemiology</category><category>RTF</category><category>clinical trials</category><category>modeling</category><category>PK</category><category>crf</category><category>statistics</category><category>social network analysis</category><category>correlation</category><category>blogging</category><category>Other blogs</category><category>statistics forum</category><category>CETP inhibitors</category><category>critical path initiative</category><category>Python</category><category>education</category><category>t test</category><category>eli lilly</category><category>observational</category><category>random number generation</category><category>John Tukey</category><category>contests</category><category>professionalism</category><category>critical thinking</category><category>RStudio</category><category>smoothing</category><category>risk</category><category>leadership</category><category>o'brien-fleming</category><category>tables</category><category>JSM</category><category>SAS</category><category>analysis</category><category>modern regression</category><category>consulting</category><category>coursera</category><category>Blogkeeping</category><category>Lan-DeMets</category><category>pharmaceutical industry</category><category>antibiotics</category><category>statins</category><category>lying with statistics</category><category>Peter Rost</category><category>statistical leadership</category><category>learning</category><category>science</category><category>neurology</category><category>Bayesian statistics</category><category>vaccination</category><category>simple statistics</category><category>politics</category><category>Microsoft Word</category><category>graduate school</category><category>statistical programming</category><category>careers</category><category>SAS programmer</category><category>Pharmalot</category><category>JUPITER</category><category>interpretation</category><category>multiple comparisons</category><category>Avandia</category><category>MedDRA</category><category>Google</category><category>proof</category><category>publishing</category><category>databases</category><category>biostatistics</category><category>phase 3</category><category>TGN1412</category><category>comparative effectiveness</category><category>matrix</category><category>Mantel-Haenszel</category><category>odds</category><category>metaanalysis</category><category>Type I error</category><category>twitter</category><category>surveys</category><category>p-values</category><category>skepticism</category><category>NNT</category><category>compliance</category><category>mathematics</category><category>torcetrapib</category><category>EMEA</category><category>missing data</category><category>chaos</category><category>Ultraedit</category><category>statistical analysis plan</category><category>intention to treat</category><category>delta method</category><category>machine learning</category><category>Ed Silverman</category><category>writing</category><category>drugs</category><category>R</category><category>Ask Cato</category><title>Realizations in Biostatistics</title><description>Biostatistics, clinical trial design, critical thinking about drugs and healthcare, skepticism, the scientific process.</description><link>http://realizationsinbiostatistics.blogspot.com/</link><managingEditor>noreply@blogger.com (John Johnson)</managingEditor><generator>Blogger</generator><openSearch:totalResults>224</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://feeds.feedburner.com/RealizationsInBiostatistics" /><feedburner:info uri="realizationsinbiostatistics" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-6632382336747906441</guid><pubDate>Mon, 29 Apr 2013 03:21:00 +0000</pubDate><atom:updated>2013-04-28T23:21:10.452-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">mooc</category><title>MOOCs–a low-risk way to explore outside your field</title><description>&lt;p&gt;One of the things I'm realizing from Massively Open Online Courses (MOOCs) -- those online free classes from universities that have seem to sprung up from almost nowhere in the last year and a half -- is that they offer a perfect opportunity to explore outside my field. At first (and this was even before the term MOOC was coined), I took classes there were just outside my field. For instance, I've been in clinical and postmarketing pharmaceutical statistics for over 10 years, and my first two classes were in databases and machine learning. I did this because I was aching to learn something new, but I figured that with a class in databases I could make our database guys in IT sweat a bit just by dropping some terms and showing some understanding of the basics. It worked. In addition, I wanted to understand what this machine learning field was all about, and how it was different from statistics. I accomplished that goal, too.&lt;/p&gt; &lt;p&gt;Since then, I have taken courses in the area of artificial intelligence/machine learning, sociology and networks, scientific computing (separately from statistical computing), and even entrepreneurship. I have also encouraged others to take part in MOOCs, though I don't know the result of that. Finally, I have come back to some classes I've already taken as a community TA, or former student who actively takes part in discussions to help new students take the class.&lt;/p&gt; &lt;p&gt;This is all valuable experience, and I could write several blog entries on the benefits. The main one I'm feeling right now is the feeling that I'm coming up for air, and taking a sampling of other points of view in a low-risk way. For example, though I don't actively use Fourier analysis in my own work, one recent class and one current class both use it to do different things (solve differential equations and process signals). Because these classes involve programming assignments, I've now deepened my understanding of the spectral theorem, which I only studied from a theoretical point of view in graduate school. I'm also thinking about this work from the point of view of time series analysis, which is helping me think about some problems involving longitudinal data at work.&lt;/p&gt; &lt;p&gt;From a completely different standpoint, another class helped me think about salary negotiations in terms of expected payoff (i.e. combination of probability of an offer being accepted vs. salary). This sort of analysis invited further analysis of the value of that job vs. what I would be paid and the insecurity of moving to a different job. In the end, I turned down what would have been a pretty good offer, because I decided it did not compensate for the risks I was incurring. The cool thing is that these were all applying concepts I already understood (expected value, expected payoff), but applied in a different way from what I was already doing.&lt;/p&gt; &lt;p&gt;The best thing about MOOCs is that the risk is low. All that is required is an internet connection and a decent computer. Some math courses may require a better computer to do high-powered math, but I've seen few that require expensive textbooks or expensive software. Even Mathworks is now offering Matlab at student pricing to people who are taking some classes, and Octave remains a free option for people unable to take advantage of it. And, if you are unable to keep up the work, there is now downside. You can simply unenroll.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hDLZkdT89mE:FC1TGuTUTmQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hDLZkdT89mE:FC1TGuTUTmQ:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hDLZkdT89mE:FC1TGuTUTmQ:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hDLZkdT89mE:FC1TGuTUTmQ:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hDLZkdT89mE:FC1TGuTUTmQ:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hDLZkdT89mE:FC1TGuTUTmQ:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hDLZkdT89mE:FC1TGuTUTmQ:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hDLZkdT89mE:FC1TGuTUTmQ:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hDLZkdT89mE:FC1TGuTUTmQ:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hDLZkdT89mE:FC1TGuTUTmQ:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/hDLZkdT89mE" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/hDLZkdT89mE/moocsa-low-risk-way-to-explore-outside.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/04/moocsa-low-risk-way-to-explore-outside.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-2975134576502185668</guid><pubDate>Tue, 16 Apr 2013 02:33:00 +0000</pubDate><atom:updated>2013-04-15T22:33:14.288-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">RStudio</category><category domain="http://www.blogger.com/atom/ns#">R</category><title>RStudio is reminding me of the older Macs</title><description>&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-GXVEbjGxkbU/UWy4EbDlC5I/AAAAAAAANT4/aJFR1M6dw0M/s1600/rbomb.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="212" src="http://2.bp.blogspot.com/-GXVEbjGxkbU/UWy4EbDlC5I/AAAAAAAANT4/aJFR1M6dw0M/s320/rbomb.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;
The only thing missing is the cryptic ID number.&lt;br /&gt;
&lt;br /&gt;
Well, the only bad thing is that I am trying to run a probabilistic graphical model on some real data, and having a crash like this will definitely slow things down.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9HbkQoU2q0I:FaCPuEoVqIU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9HbkQoU2q0I:FaCPuEoVqIU:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9HbkQoU2q0I:FaCPuEoVqIU:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9HbkQoU2q0I:FaCPuEoVqIU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9HbkQoU2q0I:FaCPuEoVqIU:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9HbkQoU2q0I:FaCPuEoVqIU:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9HbkQoU2q0I:FaCPuEoVqIU:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9HbkQoU2q0I:FaCPuEoVqIU:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9HbkQoU2q0I:FaCPuEoVqIU:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9HbkQoU2q0I:FaCPuEoVqIU:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/9HbkQoU2q0I" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/9HbkQoU2q0I/rstudio-is-reminding-me-of-older-macs.html</link><author>noreply@blogger.com (John Johnson)</author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-GXVEbjGxkbU/UWy4EbDlC5I/AAAAAAAANT4/aJFR1M6dw0M/s72-c/rbomb.png" height="72" width="72" /><thr:total>5</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/04/rstudio-is-reminding-me-of-older-macs.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-8285768638362311259</guid><pubDate>Sat, 30 Mar 2013 18:21:00 +0000</pubDate><atom:updated>2013-03-30T14:21:11.312-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">Presentations</category><title>Presenting without slides</title><description>&lt;p&gt;Tired of slides, I’ve been experimenting with different ways of presenting. At the recent Conference on Statistical Practice, I decided only to use slides for an outline and references. As it turns out, the most critical feedback I got had to do with the fact that the audience couldn’t follow the organization because I had no slides.&lt;/p&gt; &lt;p&gt;I tried presenting without slides because, well, I started to use them as a crutch. I also saw a lot of people presenting essentially by putting together slides and reading from them. So I figured I would expand my horizons.&lt;/p&gt; &lt;p&gt;Next time I present, I’ll do slides, I guess, but I may try something a bit different.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=zXp6LwoW1Is:8OvVcgSqVfs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=zXp6LwoW1Is:8OvVcgSqVfs:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=zXp6LwoW1Is:8OvVcgSqVfs:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=zXp6LwoW1Is:8OvVcgSqVfs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=zXp6LwoW1Is:8OvVcgSqVfs:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=zXp6LwoW1Is:8OvVcgSqVfs:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=zXp6LwoW1Is:8OvVcgSqVfs:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=zXp6LwoW1Is:8OvVcgSqVfs:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=zXp6LwoW1Is:8OvVcgSqVfs:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=zXp6LwoW1Is:8OvVcgSqVfs:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/zXp6LwoW1Is" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/zXp6LwoW1Is/presenting-without-slides.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>1</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/03/presenting-without-slides.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-3708967860700789665</guid><pubDate>Wed, 27 Mar 2013 20:49:00 +0000</pubDate><atom:updated>2013-03-27T16:49:52.131-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">mooc</category><category domain="http://www.blogger.com/atom/ns#">machine learning</category><category domain="http://www.blogger.com/atom/ns#">learning from data</category><title>Last session of Caltech's Learning from Data course starts April 2</title><description>&lt;div class="tr_bq"&gt;
I just received this email:&lt;/div&gt;
&lt;br /&gt;
&lt;blockquote&gt;
&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;Caltech's Machine Learning MOOC is coming to an end this spring, with the final session starting on April 2. There will be no future sessions. The course has attracted more than 200,000 participants since its launch last year, and has gained wide acclaim. This is the last chance for anyone who wishes to take the course (&lt;/span&gt;&lt;a href="http://work.caltech.edu/telecourse" style="background-color: white; color: #1155cc; font-family: arial, sans-serif; font-size: 13px;" target="_blank"&gt;http://work.caltech.edu/&lt;wbr&gt;&lt;/wbr&gt;telecourse&lt;/a&gt;&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;).&lt;/span&gt;&lt;br style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;" /&gt;&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;Best.&lt;/span&gt;&lt;br style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;" /&gt;&lt;span style="background-color: white; color: #222222; font-family: arial, sans-serif; font-size: 13px;"&gt;The Caltech Team&lt;/span&gt;&lt;/blockquote&gt;
I strongly &lt;a href="http://realizationsinbiostatistics.blogspot.com/2013/03/review-of-caltech-learning-from-data-e.html" target="_blank"&gt;recommend this course&lt;/a&gt; if you can take it, even if you have taken other machine learning classes. It lays a great theoretical foundation for machine learning, sets it off nicely from classical statistics, and gives you some experience working with data as well.&lt;br /&gt;
&lt;br /&gt;
If you were for some reason waiting for the right time, it looks to be now or never.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=OCkQiDRmua8:sN3mayepxjc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=OCkQiDRmua8:sN3mayepxjc:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=OCkQiDRmua8:sN3mayepxjc:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=OCkQiDRmua8:sN3mayepxjc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=OCkQiDRmua8:sN3mayepxjc:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=OCkQiDRmua8:sN3mayepxjc:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=OCkQiDRmua8:sN3mayepxjc:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=OCkQiDRmua8:sN3mayepxjc:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=OCkQiDRmua8:sN3mayepxjc:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=OCkQiDRmua8:sN3mayepxjc:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/OCkQiDRmua8" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/OCkQiDRmua8/last-session-of-caltechs-learning-from.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/03/last-session-of-caltechs-learning-from.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-6794886113024887262</guid><pubDate>Thu, 21 Mar 2013 02:47:00 +0000</pubDate><atom:updated>2013-03-20T22:47:32.637-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">mooc</category><category domain="http://www.blogger.com/atom/ns#">machine learning</category><title>Review of Caltech's Learning from Data e-course</title><description>&lt;p dir="ltr"&gt;Caltech has an online course &lt;a href="http://work.caltech.edu/telecourse.html"&gt;Learning from Data&lt;/a&gt;, taught by Professor Yaser Abu-Mostafa, that seeks to make the course material accessible to everybody. Unlike most of the online courses I've taken, this one is independently offered through a platform created just for the class. I took the course for its second offering in Jan-March 2013.&lt;/p&gt;
&lt;p dir="ltr"&gt;The platform on which the course is offered isn't as slick as Coursera. The lectures are offered through a Youtube playlist, and the homeworks are graded through multiple choice. That's perhaps a weakness of the class, but somehow the course faculty made it work.&lt;/p&gt;
&lt;p dir="ltr"&gt;The class's content was its strong point. Abu-Mostafa weaved theory and pragmatic concerns throughout the class, and invited students to write code in just about any platform (I, of course, chose &lt;a href="http://www.r-project.org"&gt;R&lt;/a&gt;) to explore the theoretical ideas in a practical setting. Between this class and Andrew Ng's Machine Learning class on the Coursera platform, a student will have a very strong foundation to apply these techniques to a real-world setting.&lt;/p&gt;
&lt;p dir="ltr"&gt;I have only one objection to the content, which came in the last lecture. In his description of Bayesian techniques, he claimed that in most circumstances you could only model a parameter with a delta function. This, of course, falls in line with the frequentist notion that you have a constant, but unknowable "state of nature." I felt this way for a long time, but don't really believe it any more in a variety of contexts. I think he played up the Bayesian v. frequentist squabble a bit much, which may have been appropriate 20 years ago but is not so much an issue now.&lt;/p&gt;
&lt;p dir="ltr"&gt;Otherwise, I found the perspective from the course extremely valuable, especially in the context of supervised learning.&lt;/p&gt;
&lt;p dir="ltr"&gt;If you plan on taking the course, I recommend leaving a lot of time for it or having a very strong statistical background.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZWcb5vuDqVw:_8fcPW49TdQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZWcb5vuDqVw:_8fcPW49TdQ:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZWcb5vuDqVw:_8fcPW49TdQ:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZWcb5vuDqVw:_8fcPW49TdQ:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZWcb5vuDqVw:_8fcPW49TdQ:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZWcb5vuDqVw:_8fcPW49TdQ:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZWcb5vuDqVw:_8fcPW49TdQ:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZWcb5vuDqVw:_8fcPW49TdQ:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZWcb5vuDqVw:_8fcPW49TdQ:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZWcb5vuDqVw:_8fcPW49TdQ:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/ZWcb5vuDqVw" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/ZWcb5vuDqVw/review-of-caltech-learning-from-data-e.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>3</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/03/review-of-caltech-learning-from-data-e.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-4601230599587732472</guid><pubDate>Tue, 12 Mar 2013 23:12:00 +0000</pubDate><atom:updated>2013-03-12T19:12:06.753-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">practice of statistics</category><category domain="http://www.blogger.com/atom/ns#">R</category><title>Distrust of R</title><description>&lt;p dir="ltr"&gt;I guess I've been living in a bubble for a bit, but apparently there are a lot of people who still mistrust R. I got asked this week why I used R (and, specifically, the package rpart) to generate classification and regression trees instead of SAS Enterprise Miner. Never mind the fact that rpart code has been around a very long time, and probably has been subject to more scrutiny than any other decision tree code. (And never mind the fact that I really don't like classification and regression trees in general because of their limitations.)&lt;/p&gt;
&lt;p dir="ltr"&gt;At any rate, if someone wants to pay the big bucks for me to use SAS Enterprise Miner just on their project, they can go right ahead. Otherwise, I have got a bit of convincing to do.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Yi2lYNUh5jI:qI9ZedMVmL8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Yi2lYNUh5jI:qI9ZedMVmL8:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Yi2lYNUh5jI:qI9ZedMVmL8:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Yi2lYNUh5jI:qI9ZedMVmL8:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Yi2lYNUh5jI:qI9ZedMVmL8:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Yi2lYNUh5jI:qI9ZedMVmL8:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Yi2lYNUh5jI:qI9ZedMVmL8:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Yi2lYNUh5jI:qI9ZedMVmL8:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Yi2lYNUh5jI:qI9ZedMVmL8:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Yi2lYNUh5jI:qI9ZedMVmL8:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/Yi2lYNUh5jI" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/Yi2lYNUh5jI/distrust-of-r.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>5</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/03/distrust-of-r.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-6111459403042573251</guid><pubDate>Fri, 01 Mar 2013 01:49:00 +0000</pubDate><atom:updated>2013-02-28T20:49:55.430-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">statistical leadership</category><category domain="http://www.blogger.com/atom/ns#">science</category><category domain="http://www.blogger.com/atom/ns#">publishing</category><title>Bad statistics in high impact journals</title><description>&lt;p dir="ltr"&gt;&lt;a href="http://blogs.discovermagazine.com/neuroskeptic/2013/02/19/better-journals-worse-statistics/#.UTAHJ1Mo6aw"&gt;Better Journals&amp;#8230; Worse Statistics? : &lt;/a&gt;&lt;a href="http://blogs.discovermagazine.com/neuroskeptic/2013/02/19/better-journals-worse-statistics/#.UTAHJ1Mo6aw"&gt;Neuroskeptic&lt;/a&gt;&lt;/p&gt;
&lt;p dir="ltr"&gt;In the linked blog entry, Neuroskeptic notes that high impact journals often have fewer statistical details than other journals. The research reported in these journals is often heavily amended, if not outright contradicted, by later research. I don't think this is nefarious, though, nor is it worthless. The kind of work reported in Science and Nature, for instance, generates interest and, therefore, more scrutiny (funding, studies, theses, etc.).&lt;/p&gt;
&lt;p dir="ltr"&gt;But as with all other research, if statistical details are included it might direct subsequent research in these topics a bit better.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=FVCv71o_X80:R_a2hKbWsFE:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=FVCv71o_X80:R_a2hKbWsFE:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=FVCv71o_X80:R_a2hKbWsFE:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=FVCv71o_X80:R_a2hKbWsFE:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=FVCv71o_X80:R_a2hKbWsFE:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=FVCv71o_X80:R_a2hKbWsFE:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=FVCv71o_X80:R_a2hKbWsFE:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=FVCv71o_X80:R_a2hKbWsFE:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=FVCv71o_X80:R_a2hKbWsFE:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=FVCv71o_X80:R_a2hKbWsFE:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/FVCv71o_X80" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/FVCv71o_X80/bad-statistics-in-high-impact-journals.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/02/bad-statistics-in-high-impact-journals.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-5249806782975027238</guid><pubDate>Thu, 21 Feb 2013 01:34:00 +0000</pubDate><atom:updated>2013-02-20T20:34:42.231-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">big data</category><title>The burst of the Big Data bubble, and do we need the hype, anyway?</title><description>&lt;p dir="ltr"&gt;So, now I'm seeing some buzz over Twitter that the Big Data disillusionment is starting now. Frankly, I've been wondering when this would happen. Of course, the next stage involves making strategic investments in Big Data resources, and having these resources quietly being used effectively, at least if Big Data follows technologies such as neural networks, Java, etc. So the theory goes, all surviving technologies follow a pattern of hype, disillusionment, and then quiet acceptance.&lt;/p&gt;
&lt;p dir="ltr"&gt;Did we really need this period of hype? I can understand companies hype up a technology to maintain interest while they try to make their offerings mature, and overhyping usually leads to disillusionment, but I wonder if there is a different path. R, Python, and some other open projects seem to have flattened the hype hill and disillusionment valley, probably because the larger number of people hacking the inside generates its own interest and maturity mechanism.&lt;/p&gt;
&lt;p dir="ltr"&gt;Anyway, I look forward to the maturing of big data at least until the privacy concerns generate widespread panic.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZCrV5kb2OoE:POywtz5cq9Y:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZCrV5kb2OoE:POywtz5cq9Y:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZCrV5kb2OoE:POywtz5cq9Y:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZCrV5kb2OoE:POywtz5cq9Y:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZCrV5kb2OoE:POywtz5cq9Y:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZCrV5kb2OoE:POywtz5cq9Y:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZCrV5kb2OoE:POywtz5cq9Y:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZCrV5kb2OoE:POywtz5cq9Y:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=ZCrV5kb2OoE:POywtz5cq9Y:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=ZCrV5kb2OoE:POywtz5cq9Y:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/ZCrV5kb2OoE" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/ZCrV5kb2OoE/the-burst-of-big-data-bubble-and-do-we.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/02/the-burst-of-big-data-bubble-and-do-we.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-1735643460819870106</guid><pubDate>Fri, 15 Feb 2013 13:30:00 +0000</pubDate><atom:updated>2013-02-15T08:30:01.191-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">interpretation</category><category domain="http://www.blogger.com/atom/ns#">visualization</category><category domain="http://www.blogger.com/atom/ns#">natural language processing</category><category domain="http://www.blogger.com/atom/ns#">politics</category><title>Sloppy journalism with interactive graphics is still sloppy journalism</title><description>The Guardian recently &lt;a href="http://www.guardian.co.uk/world/interactive/2013/feb/12/state-of-the-union-reading-level" target="_blank"&gt;discussed the "declining linguistic standards"&lt;/a&gt; in State of the Union addresses. I thought &amp;nbsp;this was an interesting exercise, but something seemed wrong about the article, and it turns out this is one case where the data do not really speak for themselves. There's a lot of interpretation and understanding behind cultural trends in the use of the English language in America, as well as the evolution of the presidents' intentions behind the address. There are a few important points:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;The author correctly points out that Woodrow Wilson essentially changed the format of the address through precedent from written document to speech. Right after Wilson's first speech there is a huge drop in the "education level" (hang on for a discussion of this terminology) of these addresses. As I recall, Wilson is the only American president with a Ph.D.&lt;/li&gt;
&lt;li&gt;The index used - &lt;a href="http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test" target="_blank"&gt;Flesch-Kincaid&lt;/a&gt;&amp;nbsp;(FK), is questionable. Good on The Guardian to use a single measure for all speeches, but I have to wonder if it is wise to use the same measure for speeches and written addresses. Furthermore, FK is very sensitive to the placement of punctuation (it weights sentence length heavily). For instance, as a friend pointed out, one of Wilson's speeches has a FK grade level of over 17, but if you replace one of the semi-colons in the speech with a period, the FK grade drops to 12. This subtlety is lost in speech format, giving FK an extremely high uncertainty (this same friend calls FK "utterly useless" for speeches).&lt;/li&gt;
&lt;li&gt;The audience of the SOTU address has changed. Though it's a constitutional duty of the president, the delivery as a speech is not, and it only has to be delivered to Congress. However, most modern addresses have been in the form of televised speeches, and have to be understood by a wider and less politically savvy audience.&lt;/li&gt;
&lt;li&gt;Cultural trends in the use of spoken and written English in America involve shorter sentences over time in general.&lt;/li&gt;
&lt;li&gt;In this case, a more sophisticated natural language processing analysis might reveal some interesting trends. For instance, how do wartime speeches compare to times of peace? Are there any natural categories of speeches that fall out? What are the outliers? How does this compare to polls?&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
In short, we have some interesting data that needs heavy qualification and critical analysis, that is just presented on a page and capped with a headline that gives an overly simplistic interpretation.&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vooac_iNLEU:IYwiFvvOHLo:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vooac_iNLEU:IYwiFvvOHLo:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vooac_iNLEU:IYwiFvvOHLo:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vooac_iNLEU:IYwiFvvOHLo:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vooac_iNLEU:IYwiFvvOHLo:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vooac_iNLEU:IYwiFvvOHLo:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vooac_iNLEU:IYwiFvvOHLo:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vooac_iNLEU:IYwiFvvOHLo:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vooac_iNLEU:IYwiFvvOHLo:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vooac_iNLEU:IYwiFvvOHLo:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/vooac_iNLEU" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/vooac_iNLEU/sloppy-journalism-with-interactive.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/02/sloppy-journalism-with-interactive.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-2170269279656637595</guid><pubDate>Mon, 11 Feb 2013 22:49:00 +0000</pubDate><atom:updated>2013-02-11T17:49:51.138-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">clinical trials</category><title>Operational details can be pesky</title><description>Recently, I was working with a team to finalize a clinical trial protocol. I raised some concerns about their strategic matters, and my concerns were dismissed as "operational details."&lt;br /&gt;
&lt;br /&gt;
The thing about those pesky operational details is that, if something doesn't work due to an operational detail, you might have to modify your strategy. And if enough of these pesky operational details get in the way, &amp;nbsp;you may have to rethink your strategy.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=6cRLLBRMLsc:u03jGTBcCyk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=6cRLLBRMLsc:u03jGTBcCyk:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=6cRLLBRMLsc:u03jGTBcCyk:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=6cRLLBRMLsc:u03jGTBcCyk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=6cRLLBRMLsc:u03jGTBcCyk:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=6cRLLBRMLsc:u03jGTBcCyk:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=6cRLLBRMLsc:u03jGTBcCyk:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=6cRLLBRMLsc:u03jGTBcCyk:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=6cRLLBRMLsc:u03jGTBcCyk:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=6cRLLBRMLsc:u03jGTBcCyk:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/6cRLLBRMLsc" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/6cRLLBRMLsc/operational-details-can-be-pesky.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2013/02/operational-details-can-be-pesky.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-4049388008276217245</guid><pubDate>Sun, 09 Dec 2012 15:15:00 +0000</pubDate><atom:updated>2012-12-09T10:15:52.785-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">education</category><category domain="http://www.blogger.com/atom/ns#">mooc</category><title>MOOCs have exploded!</title><description>&lt;p&gt;About a year and two months ago, Stanford University taught three classes online: Intro to Databases, Machine Learning, and Artificial Intelligence. I took two of those classes (I did not feel I had time to take Artificial Intelligence), and found them very valuable. The success of those programs led to the development of at least two companies in a new area of online education: &lt;a href="http://www.coursera.org" target="_blank"&gt;Coursera&lt;/a&gt; and &lt;a href="http://www.udacity.com" target="_blank"&gt;Udacity&lt;/a&gt;. In the meantime, other efforts have been started (I’m thinking mainly edX, but there are others as well), and now many universities are scrambling to take advantage of either the framework of these companies or other platforms.&lt;/p&gt; &lt;p&gt;Put simply, if you have not already, then you need to make the time to do some of these classes. Education is the most important investment you can make in yourself, and at this point there are hundreds of free online university-level classes in everything from the arts to statistics. If ever you wanted to expand your horizons, now’s the time.&lt;/p&gt; &lt;p&gt;I’ve personally taken 7 online classes now, and earned certificates in all of them. I use the material in many of these classes in my work, and I even have used two (Machine Learning and Probabilistic Graphical Models) to expand my company’s capabilities. I am far more secure in my job because of what I’ve learned. In addition, I had the honor of trying out the Probabilistic Graphical Model Community TA program, and my only regret is that I couldn’t put more time into it. To the extent that I took advantage of it, I got a lot out of the experience.&lt;/p&gt; &lt;p&gt;Now, the hard part. These classes require self-discipline. Like universities, there are some duds as well. At least you can add and drop at will, not worrying about prerequisites. You have to take responsibility for your own education and your own motivation.&lt;/p&gt; &lt;p&gt;In all, I’m very grateful that there are these pioneers Andrew Ng, Daphne Koller, Sebastian Thrun, and others who saw this need and had the knowledge and motivation to fill it. They are now moving in the direction of accreditation, and both free and premium models (probably for some kind of licensing or degree, which I don’t care about right now). For now, you can sign up and take classes at will.&lt;/p&gt; &lt;p&gt;Happy MOOCing!&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=-Hqd67uj2JE:EjLtcdFQOnQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=-Hqd67uj2JE:EjLtcdFQOnQ:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=-Hqd67uj2JE:EjLtcdFQOnQ:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=-Hqd67uj2JE:EjLtcdFQOnQ:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=-Hqd67uj2JE:EjLtcdFQOnQ:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=-Hqd67uj2JE:EjLtcdFQOnQ:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=-Hqd67uj2JE:EjLtcdFQOnQ:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=-Hqd67uj2JE:EjLtcdFQOnQ:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=-Hqd67uj2JE:EjLtcdFQOnQ:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=-Hqd67uj2JE:EjLtcdFQOnQ:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/-Hqd67uj2JE" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/-Hqd67uj2JE/moocs-have-exploded.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/12/moocs-have-exploded.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-2540435153503058184</guid><pubDate>Wed, 14 Nov 2012 17:25:00 +0000</pubDate><atom:updated>2012-11-14T12:25:37.031-05:00</atom:updated><title>Rare things happen all the time</title><description>John Cook reports on the &lt;a href="http://www.johndcook.com/blog/2012/11/14/probability-of-long-runs/" target="_blank"&gt;probability of long runs&lt;/a&gt;. This is a very useful reality check.&lt;br /&gt;
&lt;br /&gt;
I think there is a larger principle here, though, that &lt;a href="http://rationalwiki.org/wiki/Improbable_things_happen" target="_blank"&gt;rare things happen all the time&lt;/a&gt;.&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Jtd3D5U388I:PBDZijr9Ybk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Jtd3D5U388I:PBDZijr9Ybk:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Jtd3D5U388I:PBDZijr9Ybk:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Jtd3D5U388I:PBDZijr9Ybk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Jtd3D5U388I:PBDZijr9Ybk:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Jtd3D5U388I:PBDZijr9Ybk:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Jtd3D5U388I:PBDZijr9Ybk:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Jtd3D5U388I:PBDZijr9Ybk:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Jtd3D5U388I:PBDZijr9Ybk:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Jtd3D5U388I:PBDZijr9Ybk:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/Jtd3D5U388I" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/Jtd3D5U388I/rare-things-happen-all-time.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/11/rare-things-happen-all-time.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-5118820214981058491</guid><pubDate>Sun, 11 Nov 2012 05:16:00 +0000</pubDate><atom:updated>2012-11-11T08:50:33.917-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">blogging</category><category domain="http://www.blogger.com/atom/ns#">social network analysis</category><category domain="http://www.blogger.com/atom/ns#">Other blogs</category><category domain="http://www.blogger.com/atom/ns#">Python</category><title>Analysis of the statistics blogosphere</title><description>My&lt;a href="https://github.com/randomjohn/project/blob/master/out/Communities%20within%20the%20statistics%20blog%20network.pdf" target="_blank"&gt; analysis of the statistics blogosphere&lt;/a&gt;&amp;nbsp;for the Coursera Social Networking Analysis class is up. The Python code and the data are up at my &lt;a href="https://github.com/randomjohn/project" target="_blank"&gt;github repository&lt;/a&gt;. Enjoy!&lt;br /&gt;
&lt;br /&gt;
Included are most of the Python code I used to obtain blog content, some of my attempts to automate the building of the network (I ended up using a manual process in the end), and my analysis. I also included the data. (You can probably see some of your own content.)&lt;br /&gt;
&lt;br /&gt;
Here's what I learned/got reminded of the most:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Doing projects like this is hard when you have other responsibilities, and you usually end up paring down your ambitions toward the end&lt;/li&gt;
&lt;li&gt;Data collection and curation was, as usual, the most difficult process&lt;/li&gt;
&lt;li&gt;Network analysis is fun, but I have a ways to go to know where to start first, what questions to ask, and so forth (these are the things you learn with experience)&lt;/li&gt;
&lt;li&gt;The measures that seem to be the most revealing are not always obvious -- in this network, it was the number of shortest paths compared to a random graph&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.andrewgelman.com/" target="_blank"&gt;Andrew Gelman's blog&lt;/a&gt; is central (but you probably don't need a formal analysis to tell you that)&lt;/li&gt;
&lt;li&gt;There's a lot of great content about statistics, data analysis, data science, and statistical computing out there. I've relied on blog posts for a lot of my work, and I've found even more great stuff. It's a firehose of information.&lt;/li&gt;
&lt;/ul&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Xnh2H-bOWP0:XMvo-2qJiB8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Xnh2H-bOWP0:XMvo-2qJiB8:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Xnh2H-bOWP0:XMvo-2qJiB8:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Xnh2H-bOWP0:XMvo-2qJiB8:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Xnh2H-bOWP0:XMvo-2qJiB8:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Xnh2H-bOWP0:XMvo-2qJiB8:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Xnh2H-bOWP0:XMvo-2qJiB8:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Xnh2H-bOWP0:XMvo-2qJiB8:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=Xnh2H-bOWP0:XMvo-2qJiB8:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=Xnh2H-bOWP0:XMvo-2qJiB8:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/Xnh2H-bOWP0" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/Xnh2H-bOWP0/analysis-of-statistics-blogosphere.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>1</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/11/analysis-of-statistics-blogosphere.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-4117089584506612791</guid><pubDate>Mon, 05 Nov 2012 13:45:00 +0000</pubDate><atom:updated>2012-11-05T08:45:00.219-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">statistics</category><category domain="http://www.blogger.com/atom/ns#">blogging</category><category domain="http://www.blogger.com/atom/ns#">social network analysis</category><title>Snapshot of the statistics blogosphere</title><description>&lt;p&gt;&lt;a href="http://lh4.ggpht.com/-1Jw4p3keUlI/UJc6-scStWI/AAAAAAAAKjw/J5v5fDAu2fE/s1600-h/stats_blogs%25255B2%25255D.png"&gt;&lt;img title="stats_blogs" style="border-top: 0px; border-right: 0px; background-image: none; border-bottom: 0px; padding-top: 0px; padding-left: 0px; border-left: 0px; display: inline; padding-right: 0px" border="0" alt="stats_blogs" src="http://lh4.ggpht.com/-jPexPtGrf_w/UJc6_BAIc5I/AAAAAAAAKj4/r4ZsrJCNmJs/stats_blogs_thumb.png?imgmax=800" width="244" height="244"&gt;&lt;/a&gt;&lt;/p&gt; &lt;p&gt;This was generated during my social network analysis &lt;a href="http://realizationsinbiostatistics.blogspot.com/2012/10/sna-class-proposal.html" target="_blank"&gt;project&lt;/a&gt;. I haven’t finished yet, but I did want to show the cute picture. The statistics blogosphere is like a school of jellyfish.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=omxlggh-JW4:SzY7ddggn7k:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=omxlggh-JW4:SzY7ddggn7k:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=omxlggh-JW4:SzY7ddggn7k:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=omxlggh-JW4:SzY7ddggn7k:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=omxlggh-JW4:SzY7ddggn7k:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=omxlggh-JW4:SzY7ddggn7k:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=omxlggh-JW4:SzY7ddggn7k:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=omxlggh-JW4:SzY7ddggn7k:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=omxlggh-JW4:SzY7ddggn7k:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=omxlggh-JW4:SzY7ddggn7k:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/omxlggh-JW4" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/omxlggh-JW4/snapshot-of-statistics-blogosphere.html</link><author>noreply@blogger.com (John Johnson)</author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://lh4.ggpht.com/-jPexPtGrf_w/UJc6_BAIc5I/AAAAAAAAKj4/r4ZsrJCNmJs/s72-c/stats_blogs_thumb.png?imgmax=800" height="72" width="72" /><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/11/snapshot-of-statistics-blogosphere.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-7892823731481580620</guid><pubDate>Sun, 04 Nov 2012 20:37:00 +0000</pubDate><atom:updated>2012-11-04T15:37:14.522-05:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">statistical programming</category><title>Sometimes, saving CPU time is worth it for small data jobs</title><description>&lt;p&gt;There appears to be a conventional wisdom, one that I myself have espoused on several occasions, that for “most” statistical computing jobs that developer time is more precious than CPU time. (The reason I write “most” in quotes is that there are some people who work in environments where Big Data or large jobs is the norm, or they are developing high performance computing libraries, and they have to squeeze every last bit of performance out of the CPU.)&lt;/p&gt; &lt;p&gt;However, sometimes it can be worth it to save a few extra minutes small jobs, especially if they are run over and over. At one point today, I had an algorithm that I wrote inefficiently using Python’s built-in lists. I decided to stop the job and rewrite using the NumPy libraries, which took me an extra half hour. At first, I thought the time was wasted, but I have ended up running the code several times for various reasons. Those save minutes have now, a couple of hours later, saved me more time than I spent rewriting.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=nzK0etUdDMM:Qlr0_5qKjhU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=nzK0etUdDMM:Qlr0_5qKjhU:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=nzK0etUdDMM:Qlr0_5qKjhU:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=nzK0etUdDMM:Qlr0_5qKjhU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=nzK0etUdDMM:Qlr0_5qKjhU:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=nzK0etUdDMM:Qlr0_5qKjhU:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=nzK0etUdDMM:Qlr0_5qKjhU:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=nzK0etUdDMM:Qlr0_5qKjhU:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=nzK0etUdDMM:Qlr0_5qKjhU:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=nzK0etUdDMM:Qlr0_5qKjhU:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/nzK0etUdDMM" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/nzK0etUdDMM/sometimes-saving-cpu-time-is-worth-it.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/11/sometimes-saving-cpu-time-is-worth-it.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-4360405903626435837</guid><pubDate>Fri, 02 Nov 2012 23:35:00 +0000</pubDate><atom:updated>2012-11-02T19:35:25.610-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">statistics</category><category domain="http://www.blogger.com/atom/ns#">skepticism</category><category domain="http://www.blogger.com/atom/ns#">polls</category><category domain="http://www.blogger.com/atom/ns#">politics</category><title>Politics vs. science and the Nate Silver controversy</title><description>&lt;p&gt;I’ll take a small departure from the narrow world of biostatistics and comment on a wider matter.&lt;/p&gt; &lt;p&gt;Nate Silver of &lt;a href="http://fivethirtyeight.blogs.nytimes.com/" target="_blank"&gt;FiveThirtyEight&lt;/a&gt; has really kicked the hornet’s nest. This is a nest that really needed stirring, but I do not envy him for being the focus of attention.&lt;/p&gt; &lt;p&gt;This all started, I think, when he released his book and basically called political pundits out for a business model of generating drama rather than making good predictions. This wouldn’t be a huge deal, except that he has developed a statistical model that combines data from national and state polls with demographic data to project outcomes of presidential and senatorial elections. This model, as of this writing, has President Obama at close to an &lt;a href="http://fivethirtyeight.blogs.nytimes.com/2012/11/02/nov-1-the-simple-case-for-saying-obama-is-the-favorite/" target="_blank"&gt;81% probability of re-election&lt;/a&gt;, given the current state of things. As it turns out, there are a lot of people that don’t like this, and they generally fall into two camps:&lt;/p&gt; &lt;p&gt;1. People who would rather see President Obama defeated in the election, and&lt;/p&gt; &lt;p&gt;2. Pundits who have a vested interest in a dramatic “horse-race” election&lt;/p&gt; &lt;p&gt;I’ll add a third:&lt;/p&gt; &lt;p&gt;3. Pundits who want to remain relevant (whether to keep their jobs or reputations).&lt;/p&gt; &lt;p&gt;Frankly, I don’t think that pundits will have to worry about #3. There’s an allergy to fact in this country, a large group of people who would rather ignore established fact and cling to a fantasy. (You can find a sampling of these people over at the intelligent design blogosphere, for instance.) I think the demand for compelling stories over dry facts will remain.&lt;/p&gt; &lt;p&gt;I’ve run into people of the first type, when I’ve published some armchair statistician analyses based on Twitter sentiment, for instance. The responses weren’t critiques of the method, but rather, “who cares, Republicans rule!” Even more dangerous, I’ve run into similar responses to negative clinical study results in cases where sponsors have a vested interest in positive outcomes. (There was at least one case I remember a sponsor moved forward with an expensive study to follow on, and some where I was asked to reanalyze a zillion times.)&lt;/p&gt; &lt;p&gt;Nate write &lt;a href="http://www.amazon.com/The-Signal-Noise-Predictions-Fail-but/dp/159420411X/ref=sr_1_1?ie=UTF8&amp;amp;qid=1351898664&amp;amp;sr=8-1&amp;amp;keywords=the+signal+and+the+noise" target="_blank"&gt;The Signal and the Noise&lt;/a&gt; where he, among a lot of explanation, points out that there is a whole cottage industry of people getting paid to BS about politics. So I think that some in the second category are starting to face an existential crisis, and that makes them dangerous.&lt;/p&gt; &lt;p&gt;Ultimately, we have to understand where Nate is coming from to understand his prediction. His money is (literally – He made a bet&lt;a href="#politics_vs_science_fn1" target="_blank"&gt;[1]&lt;/a&gt; on Twitter with “Morning Joe” Scarborough of NBC) on Obama’s victory in the election, not necessarily because he wants Obama to win, but because he has confidence in his prediction. When he made the bet, he made the controversy more than just trading words, but he called Joe’s bluff (Joe had said that anyone not calling the race a tossup is an ideologue). We can now call him The Statistician Who Kicked the Hornet’s Nest – the punditry, including the public editor of the New York Times that hosts his blog, is collectively attacking him.&lt;/p&gt; &lt;p&gt;Unfortunately, the punditry has the upper hand, because people are more interested in the narrative than the science.&lt;/p&gt; &lt;p&gt;&lt;a name="politics_vs_science_fn1"&gt;[1]&lt;/a&gt; The bet originally consisted of the loser donating $1000 to charity. Nate subsequently donated $2538 to the Red Cross before the election.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hoZfTaOQvXo:TKKSHioPLfs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hoZfTaOQvXo:TKKSHioPLfs:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hoZfTaOQvXo:TKKSHioPLfs:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hoZfTaOQvXo:TKKSHioPLfs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hoZfTaOQvXo:TKKSHioPLfs:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hoZfTaOQvXo:TKKSHioPLfs:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hoZfTaOQvXo:TKKSHioPLfs:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hoZfTaOQvXo:TKKSHioPLfs:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=hoZfTaOQvXo:TKKSHioPLfs:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=hoZfTaOQvXo:TKKSHioPLfs:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/hoZfTaOQvXo" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/hoZfTaOQvXo/politics-vs-science-and-nate-silver.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/11/politics-vs-science-and-nate-silver.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-823499767723263743</guid><pubDate>Wed, 31 Oct 2012 12:45:00 +0000</pubDate><atom:updated>2012-10-31T08:45:00.040-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">statistics</category><title>Willful statistical illiteracy</title><description>&lt;p&gt;The fine folks over at &lt;a href="http://simplystatistics.org/" target="_blank"&gt;Simply Statistics&lt;/a&gt; have a &lt;a href="http://simplystatistics.org/post/34635539704/on-weather-forecasts-nate-silver-and-the" target="_blank"&gt;very good educational article&lt;/a&gt; about the difference between the probability of winning an election and vote share. This article stems from a &lt;a href="http://www.politico.com/blogs/media/2012/10/nate-silver-romney-clearly-could-still-win-147618.html" target="_blank"&gt;controversial column over at Politico&lt;/a&gt; criticizing Nate Silver and his election forecasts.&lt;/p&gt; &lt;p&gt;Twitter responses are even worse. Conservative filmmaker John Ziegler calls Nate Silver a “hyper-partisan fraud” who is “not an expert on polls.”&lt;/p&gt; &lt;p&gt;&lt;a href="http://lh6.ggpht.com/-xFBPyLkAyzA/UI_6fPoOkvI/AAAAAAAAKAE/cPDPtWSnLp0/s1600-h/image%25255B2%25255D.png"&gt;&lt;img title="image" style="border-left-width: 0px; border-right-width: 0px; background-image: none; border-bottom-width: 0px; padding-top: 0px; padding-left: 0px; margin: 0px; display: inline; padding-right: 0px; border-top-width: 0px" border="0" alt="image" src="http://lh3.ggpht.com/-uugpHi9hHVQ/UI_6f9Yob_I/AAAAAAAAKAM/1stB47s2_vA/image_thumb.png?imgmax=800" width="244" height="204"&gt;&lt;/a&gt;&lt;/p&gt; &lt;p&gt;Glenn Thrush mentions a “conservative 538:”&lt;/p&gt; &lt;p&gt;&lt;a href="http://lh6.ggpht.com/-SI6Kuo703v0/UI_6gXNpU7I/AAAAAAAAKAU/MksBxJIFItc/s1600-h/image%25255B5%25255D.png"&gt;&lt;img title="image" style="border-left-width: 0px; border-right-width: 0px; background-image: none; border-bottom-width: 0px; padding-top: 0px; padding-left: 0px; margin: 0px; display: inline; padding-right: 0px; border-top-width: 0px" border="0" alt="image" src="http://lh6.ggpht.com/-xlFJCV864bA/UI_6hHTqf8I/AAAAAAAAKAY/l8Z7zRKryl8/image_thumb%25255B1%25255D.png?imgmax=800" width="244" height="48"&gt;&lt;/a&gt;&lt;/p&gt; &lt;p&gt;And it’s not hard to find other examples.&lt;/p&gt; &lt;p&gt;I’ve run into this reaction a bit, especially when it comes to politics. There are a large group of people, who will dismiss any evidence going against their beliefs. I guess the punditry wasn’t so dismissive of Silver in 2010.&lt;/p&gt; &lt;p&gt;At any rate, I give a recommendation I rarely give: read this &lt;a href="http://www.politico.com/blogs/media/2012/10/nate-silver-romney-clearly-could-still-win-147618.html" target="_blank"&gt;Politico article &lt;em&gt;and the comments&lt;/em&gt;&lt;/a&gt; (ignore the “conservatives aren’t bright” nonsense, which is the same stuff coming from the left).&lt;/p&gt; &lt;p&gt;And let’s thank Nate Silver, RealClearPolitics, and all the honest pollsters who try to shine some data on this election.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=67ILQ3M9xl4:AV7qDrNuCzQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=67ILQ3M9xl4:AV7qDrNuCzQ:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=67ILQ3M9xl4:AV7qDrNuCzQ:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=67ILQ3M9xl4:AV7qDrNuCzQ:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=67ILQ3M9xl4:AV7qDrNuCzQ:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=67ILQ3M9xl4:AV7qDrNuCzQ:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=67ILQ3M9xl4:AV7qDrNuCzQ:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=67ILQ3M9xl4:AV7qDrNuCzQ:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=67ILQ3M9xl4:AV7qDrNuCzQ:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=67ILQ3M9xl4:AV7qDrNuCzQ:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/67ILQ3M9xl4" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/67ILQ3M9xl4/willful-statistical-illiteracy.html</link><author>noreply@blogger.com (John Johnson)</author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://lh3.ggpht.com/-uugpHi9hHVQ/UI_6f9Yob_I/AAAAAAAAKAM/1stB47s2_vA/s72-c/image_thumb.png?imgmax=800" height="72" width="72" /><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/10/willful-statistical-illiteracy.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-3237190213147790769</guid><pubDate>Tue, 30 Oct 2012 01:45:00 +0000</pubDate><atom:updated>2012-10-29T21:45:58.242-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">statistics</category><category domain="http://www.blogger.com/atom/ns#">blogging</category><category domain="http://www.blogger.com/atom/ns#">social network analysis</category><title>The most valuable thing about my little stat blog network project</title><description>&lt;p&gt;So, I decided to construct the linking graph through blogrolls, and finally settled on using a manual process. The best part of this project is really finding out for myself all the great content out there!&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=7TQ8zNZfGlM:TdRMZ-PQT58:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=7TQ8zNZfGlM:TdRMZ-PQT58:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=7TQ8zNZfGlM:TdRMZ-PQT58:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=7TQ8zNZfGlM:TdRMZ-PQT58:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=7TQ8zNZfGlM:TdRMZ-PQT58:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=7TQ8zNZfGlM:TdRMZ-PQT58:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=7TQ8zNZfGlM:TdRMZ-PQT58:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=7TQ8zNZfGlM:TdRMZ-PQT58:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=7TQ8zNZfGlM:TdRMZ-PQT58:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=7TQ8zNZfGlM:TdRMZ-PQT58:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/7TQ8zNZfGlM" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/7TQ8zNZfGlM/the-most-valuable-thing-about-my-little.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/10/the-most-valuable-thing-about-my-little.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-8704056006975700490</guid><pubDate>Mon, 22 Oct 2012 12:45:00 +0000</pubDate><atom:updated>2012-10-22T08:45:00.519-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">statistics</category><category domain="http://www.blogger.com/atom/ns#">coursera</category><category domain="http://www.blogger.com/atom/ns#">blogging</category><category domain="http://www.blogger.com/atom/ns#">social network analysis</category><title>SNA class proposal</title><description>&lt;p&gt;I’ve been taking several classes through &lt;a href="http://www.coursera.org" target="_blank"&gt;Coursera&lt;/a&gt; (nothing against the other platforms; I took two of the original three classes via Stanford and just stuck with the platform). The latest one is &lt;a href="http://class.coursera.org/sna-2012-001" target="_blank"&gt;Social Network Analysis&lt;/a&gt;, which has a programming project. Here is what I have posted as a proposal: &lt;blockquote&gt; &lt;p&gt;Ok, I've been thinking about the programming project idea some, and at first I was thinking of analyzing the statistics blogging community, mostly because I belong to it and I wanted to see what comes out. The analysis below can be done for any sort of community. I've developed this idea a little further and wanted to record it here for two reasons. First, I simply need to write it down to get it out of my head and in such a way that the public can understand it. Second, I'd like feedback. &lt;p&gt;As it turns out, I took the NLP class in the spring and think there's some overlap that can be exploited. (This comes up nicely in the &lt;em&gt;Mining the Social Web&lt;/em&gt; and &lt;em&gt;Programming Collective Intelligence&lt;/em&gt; books.) There are measures of content similarity, such as cosine similarity, which are simple to compute and reasonably work well to see how similar content is. Content can then be clustered based on similarity. So, then, I have the following questions: &lt;ul&gt; &lt;li&gt;What are the communities, and do they relate to clusters of content similarity?  &lt;li&gt;If so, who are the "brokers" between different communities, and what do they blog about? There are a couple of aggregators, such as StatBlogs and R-Bloggers, that I imagine would glue together several communities (that's their purpose and value), but I imagine there are a few others that are aggregator-like + commentary as well. Original content generators, like mine, will probably be on the edges.  &lt;li&gt;Is it better to threshold edges based on a number of mentions, or use an edge weight based on the number of mentions?  &lt;li&gt;If I have time, I may try to do some sort of topic or named entity extraction, and get an automated way of seeing what these different communities are talking about.&lt;/li&gt;&lt;/ul&gt;&lt;/blockquote&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vJPx6DSkZs8:TTd1BCA_xD4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vJPx6DSkZs8:TTd1BCA_xD4:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vJPx6DSkZs8:TTd1BCA_xD4:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vJPx6DSkZs8:TTd1BCA_xD4:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vJPx6DSkZs8:TTd1BCA_xD4:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vJPx6DSkZs8:TTd1BCA_xD4:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vJPx6DSkZs8:TTd1BCA_xD4:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vJPx6DSkZs8:TTd1BCA_xD4:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=vJPx6DSkZs8:TTd1BCA_xD4:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=vJPx6DSkZs8:TTd1BCA_xD4:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/vJPx6DSkZs8" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/vJPx6DSkZs8/sna-class-proposal.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/10/sna-class-proposal.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-3136220419092919718</guid><pubDate>Sat, 20 Oct 2012 12:15:00 +0000</pubDate><atom:updated>2012-10-20T08:15:00.317-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">polls</category><category domain="http://www.blogger.com/atom/ns#">politics</category><title>Nate Silver on The Daily Show</title><description>&lt;p&gt;&lt;a href="http://www.thedailyshow.com/full-episodes/wed-october-17-2012-nate-silver" target="_blank"&gt;Watch it&lt;/a&gt;!&lt;/p&gt; &lt;p&gt;There’s an interesting conversation about how the campaigns use analytics in get out the vote efforts. It doesn’t go a lot in depth, but I think this is an important aspect of campaigns that will come out into public view in the next couple of election cycles.&lt;/p&gt; &lt;p&gt;Of course, you can find his blog at &lt;a title="http://fivethirtyeight.blogs.nytimes.com/" href="http://fivethirtyeight.blogs.nytimes.com/"&gt;http://fivethirtyeight.blogs.nytimes.com/&lt;/a&gt;.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=eFcxgYPkxjg:yG6qvoj3fAY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=eFcxgYPkxjg:yG6qvoj3fAY:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=eFcxgYPkxjg:yG6qvoj3fAY:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=eFcxgYPkxjg:yG6qvoj3fAY:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=eFcxgYPkxjg:yG6qvoj3fAY:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=eFcxgYPkxjg:yG6qvoj3fAY:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=eFcxgYPkxjg:yG6qvoj3fAY:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=eFcxgYPkxjg:yG6qvoj3fAY:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=eFcxgYPkxjg:yG6qvoj3fAY:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=eFcxgYPkxjg:yG6qvoj3fAY:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/eFcxgYPkxjg" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/eFcxgYPkxjg/nate-silver-on-daily-show.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/10/nate-silver-on-daily-show.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-7913633974975893017</guid><pubDate>Wed, 19 Sep 2012 12:45:00 +0000</pubDate><atom:updated>2012-09-19T08:45:00.937-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">data collection</category><category domain="http://www.blogger.com/atom/ns#">data cleaning</category><title>Data cleaning is harder than statistical analysis</title><description>&lt;p&gt;Statistical analysis is relatively hard, but it is a piece of cake compared to data collection, cleaning, and manipulation. In fact, in clinical trials research, we spend millions of dollars to develop and advance the capability to effectively manage data. Just about any clinical research organization worth the price has a strong data management department that they’ve spent a lot of time cultivating.&lt;/p&gt; &lt;p&gt;It’s time to take this a step further. In my workplace, we have a very close integration of the statistics group (consisting of statisticians and statistical programmers) and the data management group. In the latest issue of their &lt;a href="http://scdm.org/members/publications/Data%20Basics/2012/fall2012.pdf" target="_blank"&gt;newsletter&lt;/a&gt;, the &lt;a href="http://scdm.org" target="_blank"&gt;Society for Clinical Data Management&lt;/a&gt; has included an article for the optimal collaboration between statisticians and data managers.&amp;nbsp; (I take this a step further and &lt;a href="http://www.pharmasug.org/proceedings/2012/MS/PharmaSUG-2012-MS05.pdf" target="_blank"&gt;include the medical writer&lt;/a&gt;.) This collaboration takes a lot of time – time I could be spending doing statistical analysis. However, if the statistical analysis involves working around fewer data issues, it’s all worth it.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=KmmsvgRE864:NTFXxYkyC7A:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=KmmsvgRE864:NTFXxYkyC7A:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=KmmsvgRE864:NTFXxYkyC7A:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=KmmsvgRE864:NTFXxYkyC7A:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=KmmsvgRE864:NTFXxYkyC7A:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=KmmsvgRE864:NTFXxYkyC7A:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=KmmsvgRE864:NTFXxYkyC7A:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=KmmsvgRE864:NTFXxYkyC7A:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=KmmsvgRE864:NTFXxYkyC7A:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=KmmsvgRE864:NTFXxYkyC7A:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/KmmsvgRE864" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/KmmsvgRE864/data-cleaning-is-harder-than.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/09/data-cleaning-is-harder-than.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-1079003878869370136</guid><pubDate>Mon, 10 Sep 2012 12:45:00 +0000</pubDate><atom:updated>2012-09-10T08:45:00.505-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">careers</category><title>Exercise helps statisticians</title><description>&lt;p&gt;Statistics is a rather sedentary job, and, over the years, I found my effectiveness decreasing as I found fewer “peak” hours in the day. I also gained a lot of weight. The number of migraines I experienced went from about two a year to about once a month.&lt;/p&gt; &lt;p&gt;In the last two or three years, I’ve been getting out of my chair to go for runs, I’ve taken up taekwondo, and also I a small gym that provides small-group personal training. In addition to adding who knows how many years to my life, they’ve really helped my focus and concentration when I’m doing statistics. I’ve also decided to take once a week or so off of thinking about statistics, which I’m finding helpful.&lt;/p&gt; &lt;p&gt;I only wish I had established these habits years ago.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9rHsFrsvwa0:9Yf5iQpM7cY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9rHsFrsvwa0:9Yf5iQpM7cY:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9rHsFrsvwa0:9Yf5iQpM7cY:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9rHsFrsvwa0:9Yf5iQpM7cY:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9rHsFrsvwa0:9Yf5iQpM7cY:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9rHsFrsvwa0:9Yf5iQpM7cY:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9rHsFrsvwa0:9Yf5iQpM7cY:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9rHsFrsvwa0:9Yf5iQpM7cY:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9rHsFrsvwa0:9Yf5iQpM7cY:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9rHsFrsvwa0:9Yf5iQpM7cY:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/9rHsFrsvwa0" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/9rHsFrsvwa0/exercise-helps-statisticians.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/09/exercise-helps-statisticians.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-174528709132962975</guid><pubDate>Wed, 29 Aug 2012 12:45:00 +0000</pubDate><atom:updated>2012-08-29T08:45:00.556-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">SAS</category><category domain="http://www.blogger.com/atom/ns#">R</category><title>Integrating R into a SAS shop</title><description>&lt;p&gt;I work in an environment dominated by &lt;a href="http://www.sas.com"&gt;SAS&lt;/a&gt;, and I am looking to integrate &lt;a href="http://www.r-project.org"&gt;R&lt;/a&gt; into our environment.&lt;/p&gt; &lt;p&gt;Why would I want to do such a thing? First, I do not want to get rid of SAS. That would not only take away most of our investment in SAS training and hiring good quality SAS programmers, but it would also remove the advantages of SAS from our environment. These advantages include the following:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Many years of collective experience in pharmaceutical data management, analysis, and reporting&lt;/li&gt; &lt;li&gt;Workflow that is second to none (with the exception of reproducible research, where R excels)&lt;/li&gt; &lt;li&gt;Reporting tools based on ODS that are second to none&lt;/li&gt; &lt;li&gt;SAS has much better validation tools than R, unless you get a commercial version of R (which makes IT folks happy)&lt;/li&gt; &lt;li&gt;SAS automatically does parallel processing for several common functions&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;So, if SAS is so great, why do I want R?&lt;/p&gt; &lt;ul&gt; &lt;li&gt;SAS’s pricing model makes it so that if I get a package that does everything I want, I pay thousands of dollars per year more than the basic package and end up with a system that does way more than I need. For example, if I want to do a &lt;a href="http://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees" target="_blank"&gt;CART analysis&lt;/a&gt;, I have to buy Enterprise Miner, which does way more than I would need.&lt;/li&gt; &lt;li&gt;R is more agile and flexible than SAS&lt;/li&gt; &lt;li&gt;R more easily integrates with Fortran and C++ than SAS (I’ve tried the SAS integration with DLLs, and it’s doable, but hard)&lt;/li&gt; &lt;li&gt;R is better at custom algorithms than SAS, unless you delve into the world of IML (which is sometimes a good solution).&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;I’m still looking at ways to do it, although the integration with IML/IML studio is promising.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=c-rqXg1vB6g:hXhuiYHRtsM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=c-rqXg1vB6g:hXhuiYHRtsM:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=c-rqXg1vB6g:hXhuiYHRtsM:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=c-rqXg1vB6g:hXhuiYHRtsM:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=c-rqXg1vB6g:hXhuiYHRtsM:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=c-rqXg1vB6g:hXhuiYHRtsM:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=c-rqXg1vB6g:hXhuiYHRtsM:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=c-rqXg1vB6g:hXhuiYHRtsM:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=c-rqXg1vB6g:hXhuiYHRtsM:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=c-rqXg1vB6g:hXhuiYHRtsM:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/c-rqXg1vB6g" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/c-rqXg1vB6g/integrating-r-into-sas-shop.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>4</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/08/integrating-r-into-sas-shop.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-4605697030746676792</guid><pubDate>Mon, 27 Aug 2012 12:45:00 +0000</pubDate><atom:updated>2012-08-27T08:45:00.725-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">data mining</category><category domain="http://www.blogger.com/atom/ns#">big data</category><category domain="http://www.blogger.com/atom/ns#">clinical trials</category><category domain="http://www.blogger.com/atom/ns#">machine learning</category><title>Romney’s “secretive data mining”–could the same techniques be used for clinical trial enrollment?</title><description>&lt;p&gt;Romney has been “exposed” as using “&lt;a href="http://siouxcityjournal.com/business/ap-exclusive-romney-uses-secretive-data-mining/article_288f3215-8677-5304-bc0f-3ee38bd74fc1.html" target="_blank"&gt;secretive data mining techniques&lt;/a&gt;” to find donors to his campaign in traditional Democratic strongholds. (These techniques can be learned in any of these free online courses offered through &lt;a href="http://www.coursera.org" target="_blank"&gt;Coursera&lt;/a&gt; and &lt;a href="http://www.udacity.com" target="_blank"&gt;Udacity&lt;/a&gt; along with the massive databases collected by the different parties.)&lt;/p&gt; &lt;p&gt;Of course, my thought is, can we use these techniques to find potential participants in clinical trials? I think that if we can work out the privacy issues, this represents a useful tool for clinicians to find not just trial participants, but patients who need to be treated, but for some reason are not being treated. This could be a win for everybody.&lt;/p&gt; &lt;p&gt;Other ideas:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;using Google trends, much like Google uses to identify flu outbreaks&lt;/li&gt; &lt;li&gt;mining discussion boards&lt;/li&gt; &lt;li&gt;identifying need through blog networks&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;I’ll be taking the &lt;a href="https://www.coursera.org/course/bigdata" target="_blank"&gt;Web Intelligence and Big Data&lt;/a&gt; class through Coursera, so maybe I’ll get more ideas.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=bVEIqxEe9MU:lrLCK8d1u-U:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=bVEIqxEe9MU:lrLCK8d1u-U:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=bVEIqxEe9MU:lrLCK8d1u-U:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=bVEIqxEe9MU:lrLCK8d1u-U:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=bVEIqxEe9MU:lrLCK8d1u-U:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=bVEIqxEe9MU:lrLCK8d1u-U:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=bVEIqxEe9MU:lrLCK8d1u-U:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=bVEIqxEe9MU:lrLCK8d1u-U:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=bVEIqxEe9MU:lrLCK8d1u-U:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=bVEIqxEe9MU:lrLCK8d1u-U:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/bVEIqxEe9MU" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/bVEIqxEe9MU/romneys-secretive-data-miningcould-same.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>1</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/08/romneys-secretive-data-miningcould-same.html</feedburner:origLink></item><item><guid isPermaLink="false">tag:blogger.com,1999:blog-6439806875881311576.post-3492447695990154959</guid><pubDate>Mon, 20 Aug 2012 12:45:00 +0000</pubDate><atom:updated>2012-08-20T08:45:00.519-04:00</atom:updated><category domain="http://www.blogger.com/atom/ns#">observational</category><category domain="http://www.blogger.com/atom/ns#">interpretation</category><category domain="http://www.blogger.com/atom/ns#">causal inference</category><category domain="http://www.blogger.com/atom/ns#">phase 3</category><category domain="http://www.blogger.com/atom/ns#">pharmaceutical industry</category><category domain="http://www.blogger.com/atom/ns#">clinical trials</category><category domain="http://www.blogger.com/atom/ns#">biostatistics</category><title>Clinical trials: enrollment targets vs. valid hypothesis testing</title><description>&lt;p&gt;The questions raised in this &lt;em&gt;&lt;a href="http://www.scientificamerican.com/article.cfm?id=studying-drugs-in-wrong-people" target="_blank"&gt;Scientific American article&lt;/a&gt;&lt;/em&gt; ought to concern all of us, and I want to take some of these questions further. But let me first explain the problem.&lt;/p&gt; &lt;p&gt;Clinical trials and observational studies of drugs, biologics, and medical devices are a huge logistical challenge, not the least of which is finding physicians and patients to participate. The thesis of the article is that the classical methods of finding participants – mostly compensation – lead to perverse incentives to lie about one’s medical condition.&lt;/p&gt; &lt;p&gt;I think there is a more subtle issue, and it struck me when one of our clinical people expressed a desire not to put enrollment caps on large hospitals for the sake of a fast enrollment. In our race to finish the trial and collect data, we are biasing our studies toward larger centers where there may be better care. This effect is exactly the opposite of that posited in the article, where treatment effect is biased downward. Here, treatment effect is biased upward, with doctors more familiar with best delivery practices (many of the drugs I study are IV or hospital-based), best treatment practices, and more efficient care.&lt;/p&gt; &lt;p&gt;We statisticians can start to characterize the problem by looking at treatment effect by different sites, or using &lt;a href="http://en.wikipedia.org/wiki/Multilevel_model" target="_blank"&gt;hierarchical models&lt;/a&gt; to separate out center effect from drug. But this isn’t always a great solution, because low-enrolling sites, by definition, have a lot fewer people, and pooling is problematic because low-enrolling centers tend to have way more variation in level and quality of care than high-enrolling centers.&lt;/p&gt; &lt;p&gt;We can get creative on the statistical analysis end of studies, but I think the best solution is going to involve stepping back at the clinical trial logistics planning stage and recasting the recruitment problem in terms of a generalizability/speed tradeoff.&lt;/p&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9mbXxuLmubo:z_k8m3l_cfg:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9mbXxuLmubo:z_k8m3l_cfg:-BTjWOF_DHI"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9mbXxuLmubo:z_k8m3l_cfg:-BTjWOF_DHI" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9mbXxuLmubo:z_k8m3l_cfg:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9mbXxuLmubo:z_k8m3l_cfg:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9mbXxuLmubo:z_k8m3l_cfg:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9mbXxuLmubo:z_k8m3l_cfg:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9mbXxuLmubo:z_k8m3l_cfg:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?a=9mbXxuLmubo:z_k8m3l_cfg:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/RealizationsInBiostatistics?i=9mbXxuLmubo:z_k8m3l_cfg:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/RealizationsInBiostatistics/~4/9mbXxuLmubo" height="1" width="1"/&gt;</description><link>http://feedproxy.google.com/~r/RealizationsInBiostatistics/~3/9mbXxuLmubo/clinical-trials-enrollment-targets-vs.html</link><author>noreply@blogger.com (John Johnson)</author><thr:total>0</thr:total><gd:extendedProperty name="commentSource" value="1" /><gd:extendedProperty name="commentModerationMode" value="FILTERED_POSTMOD" /><feedburner:origLink>http://realizationsinbiostatistics.blogspot.com/2012/08/clinical-trials-enrollment-targets-vs.html</feedburner:origLink></item></channel></rss>
