<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;CUcGQHcycSp7ImA9WhRUGU8.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670</id><updated>2012-01-30T06:30:21.999-05:00</updated><category term="Stata" /><category term="densityplot()" /><category term="clustering" /><category term="rexp()" /><category term="matrix operations" /><category term="offset option" /><category term="bargraph.CI() function" /><category term="axis control" /><category term="lag function" /><category term="Firth option" /><category term="RCurl package" /><category term="rpart package" /><category term="subset" /><category term="Monte Carlo experiments" /><category term="time-varying covariates" /><category term="spreadsheets" /><category term="put" /><category term="John Emerson" /><category term="data frames" /><category term="barplot()" /><category term="mosaic plot" /><category term="comparisons" /><category term="running average" /><category term="proc mcmc" /><category term="string functions" /><category term="distance" /><category term="latent class model" /><category term="paste()" /><category term="cat()" /><category term="read Excel sheets" /><category term="proc fcmp" /><category term="probability" /><category term="resampling based inference" /><category term="sort" /><category term="textConnection()" /><category term="non-monotonic missingness" /><category term="cumulative distribution function" /><category term="logistf package" /><category term="drop statement" /><category term="list of variables" /><category term="Rosettacode" /><category term="hexbin()" /><category term="brute force" /><category term="graphics" /><category term="by statement" /><category term="empirical CDF" /><category term="formatting" /><category term="contrasts" /><category term="cumulative hazard" /><category term="transpose" /><category term="multinomial observations" /><category term="proc glm" /><category term="odds ratio" /><category term="mvrnorm()" /><category term="negative binomial distribution" /><category term="proc greplay" /><category term="ggplot2 package" /><category term="dim()" /><category term="vectors" /><category term="Metropolis-Hastings algorithm" /><category term="propensity scores" /><category term="sas tricks" /><category term="epidemiology" /><category term="point option" /><category term="change variable types" /><category term="Task Views" /><category term="R function" /><category term="name conflict" /><category term="SAS formats" /><category term="lattice library" /><category term="lurking variables" /><category term="Mplus" /><category term="elrm package" /><category term="events/trials syntax" /><category term="plot colors" /><category term="one-to-many" /><category term="count models" /><category term="normality assumption" /><category term="survival model" /><category term="large datasets" /><category term="failure time analysis" /><category term="MCMC" /><category term="make categories" /><category term="adding text to graphics" /><category term="reproducible analysis" /><category term="apply()" /><category term="productivity" /><category term="New Year's resolutions" /><category term="libraries in R" /><category term="indicator variables" /><category term="proc lca" /><category term="call symput" /><category term="boxplot" /><category term="R2winbugs" /><category term="data step" /><category term="proc phreg" /><category term="digits of Pi" /><category term="annnotate data sets" /><category term="multiple regression" /><category term="eval statement" /><category term="summary statistics" /><category term="Firth logistic regression" /><category term="sas7bdat package" /><category term="central moments" /><category term="read.sas7bdat()" /><category term="estimate statement" /><category term="University of Auckland" /><category term="college majors" /><category term="round function" /><category term="range of variables" /><category term="class statement" /><category term="cubature library" /><category term="probability distributions" /><category term="crowd-sourcing" /><category term="truncated distribution" /><category term="coda package" /><category term="glm() function" /><category term="Thomas Lumley" /><category term="blog aggregators" /><category term="barplots(back to back)" /><category term="retain" /><category term="Hotelling's T" /><category term="social science" /><category term="Read data in SAS" /><category term="radio static" /><category term="quoting" /><category term="adaptIntegrate()" /><category term="diploma problem" /><category term="NP complete" /><category term="with()" /><category term="dotplot" /><category term="Pi" /><category term="proc sgrender" /><category term="Read data in R" /><category term="complex survey design" /><category term="Matt Regan" /><category term="latent class analysis" /><category term="runif()" /><category term="standardized regression coefficients" /><category term="robustness" /><category term="favstats()" /><category term="expected cell counts" /><category term="annotate data set" /><category term="maps package" /><category term="gdata package" /><category term="write Excel sheets" /><category term="ragged input" /><category term="Type I error rate" /><category term="attach()" /><category term="options()" /><category term="regression to the mean" /><category term="proc tabulate" /><category term="dotplot/boxplot" /><category term="CRAN" /><category term="Poisson distribution" /><category term="exponential" /><category term="ppois()" /><category term="pairs plots" /><category term="JAGS" /><category term="Minard" /><category term="integration" /><category term="looping" /><category term="col option" /><category term="population age" /><category term="probability distributiholons" /><category term="generalized pairs plots" /><category term="read sheets" /><category term="Hosmer and Lemeshow" /><category term="skewness()" /><category term="regular expressions" /><category term="John Snow" /><category term="save data in Stata format" /><category term="design matrix" /><category term="saving output from SAS" /><category term="circles" /><category term="plot.ts()" /><category term="ts()" /><category term="Maxine Pfannkuch" /><category term="random variables" /><category term="axis statement" /><category term="mean()" /><category term="subsetting" /><category term="SAS data sets" /><category term="numeric()" /><category term="association measures" /><category term="comparing models" /><category term="Royal Statistical Society" /><category term="proc mi" /><category term="standard deviation" /><category term="association plot" /><category term="binomial probability" /><category term="offset axes" /><category term="linear regression" /><category term="assumptions" /><category term="exact statement" /><category term="relevel function" /><category term="reflabel option" /><category term="recursive partitioning" /><category term="survey sampling" /><category term="where function" /><category term="merge" /><category term="proc univariate" /><category term="histogram" /><category term="date and time values" /><category term="end =" /><category term="readLines()" /><category term="rnorm()" /><category term="confint()" /><category term="stratiification" /><category term="matrices" /><category term="gmodels package" /><category term="regression adjustment" /><category term="href option" /><category term="statistical education" /><category term="Cramer's V" /><category term="multivariate statistics" /><category term="RColorBrewer package" /><category term="API" /><category term="permutation test" /><category term="pyramid plots" /><category term="goodness of fit" /><category term="regression trees" /><category term="categorical covariates" /><category term="chisq.test()" /><category term="R-bloggers" /><category term="proc simnormal" /><category term="one-way chi-square test" /><category term="maps" /><category term="relative risk" /><category term="negative binomial regression" /><category term="reshape package" /><category term="date formats" /><category term="chi-square test" /><category term="sapply()" /><category term="Chris Wild" /><category term="rare disease assumption" /><category term="cut function" /><category term="partykit package" /><category term="heat map" /><category term="profile likelihood" /><category term="conditioning" /><category term="bayes statement" /><category term="fonts" /><category term="OpenBUGS" /><category term="scatterplot" /><category term="binning" /><category term="Durbin-Watson statistic" /><category term="clodds statement" /><category term="proc gproject" /><category term="nobs option" /><category term="sas7bdat format" /><category term="repeated multiples" /><category term="Convert R to SAS" /><category term="layout()" /><category term="proc kde" /><category term="Monty Hall problem" /><category term="read from URL" /><category term="ods graphics on" /><category term="Fibonacci series" /><category term="reshape" /><category term="pairwaise comparisons" /><category term="variable number of records" /><category term="substitute function" /><category term="read complex data files" /><category term="dynamite plot" /><category term="ods system" /><category term="R environments" /><category term="hat-check problem" /><category term="empirical problem solving" /><category term="principal components" /><category term="proc gmap" /><category term="plot" /><category term="regexp()" /><category term="par()" /><category term="Contour" /><category term="random statement" /><category term="within()" /><category term="Design package" /><category term="order" /><category term="xchisq.test()" /><category term="central limit theorem" /><category term="na.string" /><category term="proc gchart" /><category term="proportional hazards assumption" /><category term="MCMCpack package" /><category term="survival analysis" /><category term="back-to-back barplots" /><category term="a*b=c syntax" /><category term="exchangeability" /><category term="Galton" /><category term="determinant" /><category term="Tim Hesterberg" /><category term="sunflowerplot()" /><category term="flexmix package" /><category term="pnbinom()" /><category term="HistData package" /><category term="programming style" /><category term="proc genmod" /><category term="plot symbols" /><category term="shuffle()" /><category term="cex" /><category term="moments package" /><category term="interactive development environments" /><category term="Excel" /><category term="simulate data" /><category term="MplusAutomation package" /><category term="Ken Beath" /><category term="observed cell counts" /><category term="SAS-x" /><category term="table()" /><category term="Nelson-Aalen estimator" /><category term="Bayesian methods" /><category term="ifelse()" /><category term="set statement options" /><category term="panelby statement" /><category term="perl" /><category term="RStudio" /><category term="exp()" /><category term="exact logistic regression" /><category term="proc gplot" /><category term="seeds" /><category term="proc sgplot" /><category term="read from local disk" /><category term="MANOVA" /><category term="as.POSIXlt()" /><category term="statistics education" /><category term="confounding" /><category term="summary()" /><category term="WinBUGS" /><category term="referencing sequential variables" /><category term="Matt Shotwell" /><category term="hexbin package" /><category term="job creation" /><category term="read.table()" /><category term="pch" /><category term="coverage probabilities" /><category term="sd()" /><category term="as.data.frame()" /><category term="titles" /><category term="World Statistics Day" /><category term="calculus" /><category term="overlay option" /><category term="unobserved class" /><category term="google spreadsheet" /><category term="Bureau of Labor Statistics" /><category term="smoothScatter()" /><category term="le Cessie and Houwelingen" /><category term="class probabilities" /><category term="Gamma function" /><category term="gps" /><category term="HELP data set" /><category term="Shangri La" /><category term="pie() function" /><category term="text()" /><category term="customizing plots" /><category term="readline()" /><category term="quadratic equation" /><category term="test statement" /><category term="missing data" /><category term="GGally package" /><category term="file print" /><category term="R packages" /><category term="names(). events/trials syntax" /><category term="contrast statement" /><category term="deparse function" /><category term="shuffle() function" /><category term="teaching statistics" /><category term="mapproj package" /><category term="hazard function" /><category term="type=&quot;n&quot;" /><category term="read.xlsx()" /><category term="Amazon Sales rank" /><category term="multivariate normal" /><category term="Markov Chain Monte Carlo" /><category term="gsub()" /><category term="functions" /><category term="open source" /><category term="variance" /><category term="informal inference" /><category term="simulation studies" /><category term="randomLCA package" /><category term="grep() function" /><category term="mixtools package" /><category term="psychology" /><category term="pathological distribution" /><category term="Kaplan-Meier estimates" /><category term="minimum" /><category term="proc fmm" /><category term="function()" /><category term="parameterization" /><category term="survival package" /><category term="Project MOSAIC" /><category term="Michael Friendly" /><category term="snowstorms" /><category term="adjacent observations" /><category term="colnames()" /><category term="CPAN" /><category term="proc logistic" /><category term="Cox proportional hazards model" /><category term="categorical data" /><category term="matrix()" /><category term="finite mixture models" /><category term="MASS library" /><category term="logic" /><category term="xckd" /><category term="match()" /><category term="Tick marks" /><category term="null hypothesis" /><category term="3D plots" /><category term="time series" /><category term="frailty models" /><category term="lines()" /><category term="ylim option" /><category term="new variables" /><category term="Wolfram Alpha" /><category term="social networks" /><category term="substr" /><category term="correlated data models" /><category term="kurtosis()" /><category term="causal inference" /><category term="duplicated data" /><category term="expected value" /><category term="manifest variable" /><category term="Hadley Wickham" /><category term="Edward Tufte" /><category term="t-test" /><category term="grammar of graphics" /><category term="read.csv()" /><category term="legend" /><category term="string manipulation" /><category term="two sample comparisons" /><category term="proc sgpanel" /><category term="random number generation" /><category term="skewness" /><category term="FLXPmultinom function" /><category term="debugging" /><category term="measures of association" /><category term="power calculations" /><category term="reference value" /><category term="proc import" /><category term="convert SAS to R" /><category term="mtext()" /><category term="as.factor" /><category term="factor analysis" /><category term="bubble plot" /><category term="dummy variables" /><category term="markerattrs" /><category term="hexagon" /><category term="as.numeric()" /><category term="Xin Wei" /><category term="read data in Stata format" /><category term="tables" /><category term="spreadsheet" /><category term="missing data modeling" /><category term="age distribution" /><category term="Alan Zaslavsky" /><category term="SAS" /><category term="sciplot package" /><category term="random numbers" /><category term="rejection sampling" /><category term="connect points" /><category term="foreign library" /><category term="pseudo-random numbers" /><category term="confidence intervals" /><category term="formatted output" /><category term="amherst" /><category term="rjags" /><category term="SAS macro" /><category term="detach()" /><category term="excerpt" /><category term="cyclemeter" /><category term="logistic regression" /><category term="annotate macro" /><category term="descriptive statistics" /><category term="symbolic computation" /><category term="t() function" /><category term="poLCA package" /><category term="kurtosis" /><category term="Weibull" /><category term="conditional execution" /><category term="choropleth" /><category term="RColorBrewer" /><category term="style guide" /><category term="writeXLS package" /><category term="tidying code" /><category term="abline()" /><category term="communicating between SAS and R" /><category term="sequences" /><category term="mosaic package" /><category term="which.min() function" /><category term="proc_r" /><category term="FLXMRglmfix function" /><category term="censored data" /><category term="matrix" /><category term="latent class models" /><category term="colors" /><category term="axis" /><category term="proc template" /><category term="side by side histograms" /><category term="R" /><category term="multiple imputation" /><title>SAS and R</title><subtitle type="html">Examples of tasks replicated in SAS and R</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://sas-and-r.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>125</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/SASandR" /><feedburner:info uri="sasandr" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><link rel="license" type="text/html" href="http://creativecommons.org/licenses/by-nc-sa/3.0/" /><meta xmlns="http://pipes.yahoo.com" name="pipes" content="noprocess" /><logo>http://kenkleinman.net/files/favicon.jpg</logo><feedburner:emailServiceId>SASandR</feedburner:emailServiceId><feedburner:feedburnerHostname>http://feedburner.google.com</feedburner:feedburnerHostname><entry gd:etag="W/&quot;DkcAQ3s8eyp7ImA9WhRUFko.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-7163812382593333327</id><published>2012-01-26T16:59:00.003-05:00</published><updated>2012-01-27T09:20:42.573-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-27T09:20:42.573-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="proc_r" /><category scheme="http://www.blogger.com/atom/ns#" term="Xin Wei" /><category scheme="http://www.blogger.com/atom/ns#" term="communicating between SAS and R" /><title>SAS Macro Simplifies SAS and R integration (Updated)</title><content type="html">Many of us feel very enthusiastic about R. It's free, it features cutting edge applications, it has a large community of users contributing for mutual benefit, and on and on.  There are also many things to like about SAS, including stability, backwards compatibility, and professional support among them.  The way to be the best analyst you can be is to be flexible and have as many  tools at your disposal as you can manage.  That's the main motivating principle behind our book and what we do here in this blog.&lt;br /&gt;&lt;br /&gt;Today we call attention to a SAS macro that greatly eases integrating R from SAS.  Published last month in the &lt;a href="http://www.jstatsoft.org/"&gt;Journal of Statistical Software&lt;/a&gt;, the macro (written by Xin Wei of Roche Pharmaceuticals) is called Proc_R, and we discuss its installation and use today.  For a fuller write-up, see the paper, &lt;a href="http://www.jstatsoft.org/v46/c02" target="_blank"&gt;here&lt;/a&gt;.  For SAS users, the macro is a huge productivity booster, allowing one to easily complete data management and/or partial data analysis in SAS, skip out quickly to R for analyses that are awkward or impossible in SAS, then return to SAS for completion.  For people in industry, this may also ease integrating R into documentation systems built for SAS code.  See &lt;a href="http://www.decisionstats.com/using-sas-and-r-together/"&gt;this post&lt;/a&gt; on DecisionStats for a review of other integration attempts.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Getting ready&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;1. Download the "&lt;a href="http://www.jstatsoft.org/v46/c02/supp/1"&gt;SAS source code&lt;/a&gt;" and the "&lt;a href="http://www.jstatsoft.org/v46/c02/supp/2"&gt;Replication code and instructions&lt;/a&gt;".&lt;br /&gt;&lt;br /&gt;2. Move the macro somewhere you have write access.&lt;br /&gt;&lt;br /&gt;3. Open the macro in a text editor and change line 46 so that the rpath option points to the location of your R executable.&lt;br /&gt;&lt;br /&gt;(4. If you're running Windows 7 or Vista, and you has SAS 9.1 or above, follow instructions in a PDF in the second supplemental file you downloaded.  This makes a shortcut for a special version of SAS.  I'm not at all sure why you have to do this, though.  I had the same results running in my usual SAS set-up.)&lt;br /&gt;&lt;br /&gt;That's it!  The way the macro works is to read in your R code as a SAS data set, write it out to a file, and call R to run it, then do a bunch of post-processing.  The basic macro call looks like this:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;%include "C:\ken\sasmacros\Proc_R.sas";&lt;br /&gt;%Proc_R (SAS2R =, R2SAS =);&lt;br /&gt;Cards4;&lt;br /&gt;&lt;br /&gt;******************************&lt;br /&gt;***Please Enter R Code Here***&lt;br /&gt;******************************&lt;br /&gt;&lt;br /&gt;;;;;&lt;br /&gt;%Quit;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;You just replace the starred lines with R code, and run-- the R results, if any, appear in your SAS output and/or results windows.  The &lt;tt&gt;SAS2R&lt;/tt&gt; value is a list of the names of SAS data sets you'd like to send to R; they're added into the R environment before your code is executed.  The &lt;tt&gt;R2SAS&lt;/tt&gt; value is a list of the names of R objects (that can be coerced to data frames) that you'd like to become SAS data sets.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Use&lt;/b&gt;&lt;br /&gt;Here's a trivial example-- generate two data sets in SAS, send them to R to run linear regressions, and send the resulting parameter estimates back to SAS.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data test;&lt;br /&gt;do i = 1 to 1000;&lt;br /&gt;  x = normal(0);&lt;br /&gt;  y = x + normal(0);&lt;br /&gt;  output;&lt;br /&gt;  end;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;data t2;&lt;br /&gt;do i = 1 to 100;&lt;br /&gt;  x = normal(0);&lt;br /&gt;  y = x + uniform(0);&lt;br /&gt;  output;&lt;br /&gt;  end;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;%include "C:\Proc_R.sas";&lt;br /&gt;%Proc_R (SAS2R =test t2, R2SAS =mylm mylm2);&lt;br /&gt;Cards4;&lt;br /&gt;setwd("c:/temp")&lt;br /&gt;an.lm = with(test,lm(y ~x))&lt;br /&gt;mylm = t(coef(an.lm))&lt;br /&gt;&lt;br /&gt;an.lm2 = with(t2,lm(y~x))&lt;br /&gt;mylm2 = t(coef(an.lm2))&lt;br /&gt;;;;;&lt;br /&gt;%Quit;&lt;br /&gt;&lt;br /&gt;proc print data = mylm; run;&lt;br /&gt;proc print data = mylm2; run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;And here's what you get in the SAS log. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;[First, proc_r result]&lt;br /&gt;&lt;br /&gt;******************R OUTPUT***********************    &lt;br /&gt;&lt;br /&gt;R_OUTPUT_LOG&lt;br /&gt;&lt;br /&gt;&gt; setwd("c:/temp")&lt;br /&gt;&gt; library(grDevices)&lt;br /&gt;&gt; png("c:/temp/....png")&lt;br /&gt;&gt; test&lt;- read.csv('c:/temp/test.csv')&lt;br /&gt;&gt; t2&lt;- read.csv('c/temp/t2.csv')&lt;br /&gt;&gt; an.lm = with(test,lm(y ~x))&lt;br /&gt;&gt; mylm = t(coef(an.lm))&lt;br /&gt;&gt; summary(an.lm)&lt;br /&gt;&lt;br /&gt;Call:&lt;br /&gt;lm(formula = y ~ x)&lt;br /&gt;&lt;br /&gt;Residuals:&lt;br /&gt;Min      1Q  Median      3Q     Max&lt;br /&gt;-2.8571 -0.6430 -0.0051  0.6713  3.5903&lt;br /&gt;&lt;br /&gt;Coefficients:&lt;br /&gt;Estimate Std. Error t value Pr(&gt;|t|)&lt;br /&gt;(Intercept) 0.008568   0.031686    0.27    0.787&lt;br /&gt;x           1.020640   0.033315   30.64   &lt;2e-16 ***&lt;br /&gt;---&lt;br /&gt;Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1&lt;br /&gt;&lt;br /&gt;Residual standard error: 1.002 on 998 degrees of freedom&lt;br /&gt;Multiple R-squared: 0.4846, Adjusted R-squared: 0.4841&lt;br /&gt;F-statistic: 938.5 on 1 and 998 DF,  p-value: &lt; 2.2e-16&lt;br /&gt;&lt;br /&gt;&gt;&lt;br /&gt;&gt; an.lm2 = with(t2,lm(y~x))&lt;br /&gt;&gt; mylm2 = t(coef(an.lm2))&lt;br /&gt;&gt; write.csv(mylm,'mylm.csv',row.names=F)&lt;br /&gt;&gt; write.csv(mylm2,'mylm2.csv',row.names=F)&lt;br /&gt;&gt; dev.off()&lt;br /&gt;null device&lt;br /&gt;1&lt;br /&gt;&gt; q()&lt;br /&gt;&gt; proc.time()&lt;br /&gt;user  system elapsed&lt;br /&gt;0.28    0.10    0.37&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;[Here are the proc print results]&lt;br /&gt;Obs     _Intercept_               x&lt;br /&gt; 1     0.0085676126    1.0206400545&lt;br /&gt;&lt;br /&gt;Obs     _Intercept_               x&lt;br /&gt; 1      0.528410053    0.9851225238&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;(Page breaks and some extraneous stuff removed.)&lt;br /&gt;&lt;br /&gt;It's pretty magical for a SAS user to see R living in the SAS output like this.  But there are some caveats.  First, this is a windows-only macro.  If you run SAS on *nix, you may not be able to get it to work.  &lt;del&gt;Second, while the article has examples of graphics from R neatly appearing in SAS, this failed for me.  This may be due to the fact that I run SAS 9.3, while the author of the macro is still in earlier versions of SAS.  I may try to diagnose and fix this problem, and will update this entry if I find a fix.&lt;/del&gt; (Fixed!  See update below.)  &lt;br /&gt;&lt;br /&gt;&lt;b&gt;(UPDATE: Reader Abhijit suggested a &lt;tt&gt;setwd()&lt;/tt&gt; in the R code as a fix for the graphics problem.  This works, and I now get R grapics in my SAS results viewer.  Even more magical.  Code and output above updated to show this.  Thanks, Abhijit!)&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;However, these seem like minor problems, compared with the overall simplification offered by the macro.  It's been of great use to me in the past few months, and I expect it will help others as well.  Many thanks and congratulations to Xin Wei!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-7163812382593333327?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ul80gzv5T_Q:becEMgN8fVs:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ul80gzv5T_Q:becEMgN8fVs:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ul80gzv5T_Q:becEMgN8fVs:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ul80gzv5T_Q:becEMgN8fVs:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ul80gzv5T_Q:becEMgN8fVs:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=ul80gzv5T_Q:becEMgN8fVs:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ul80gzv5T_Q:becEMgN8fVs:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=ul80gzv5T_Q:becEMgN8fVs:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/ul80gzv5T_Q" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/7163812382593333327/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2012/01/sas-macro-simplifies-sas-and-r.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7163812382593333327?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7163812382593333327?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/ul80gzv5T_Q/sas-macro-simplifies-sas-and-r.html" title="SAS Macro Simplifies SAS and R integration (Updated)" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2012/01/sas-macro-simplifies-sas-and-r.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8EQHs5cCp7ImA9WhRVE0w.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-1726271591341175267</id><published>2012-01-11T15:30:00.001-05:00</published><updated>2012-01-11T15:30:01.528-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-11T15:30:01.528-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="apply()" /><category scheme="http://www.blogger.com/atom/ns#" term="type=&quot;n&quot;" /><category scheme="http://www.blogger.com/atom/ns#" term="t() function" /><category scheme="http://www.blogger.com/atom/ns#" term="a*b=c syntax" /><category scheme="http://www.blogger.com/atom/ns#" term="proc gplot" /><category scheme="http://www.blogger.com/atom/ns#" term="graphics" /><category scheme="http://www.blogger.com/atom/ns#" term="running average" /><category scheme="http://www.blogger.com/atom/ns#" term="looping" /><title>Example 9.19: Demonstrating the central limit theorem</title><content type="html">&lt;a href="http://2.bp.blogspot.com/-miPYCsak3CE/Tw3sZt4cM9I/AAAAAAAADRA/q-xo0fDauAE/s1600/clt.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 322px;" src="http://2.bp.blogspot.com/-miPYCsak3CE/Tw3sZt4cM9I/AAAAAAAADRA/q-xo0fDauAE/s400/clt.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5696469030250755026" /&gt;&lt;/a&gt;&lt;br /&gt;A colleague recently asked "why should the average get closer to the mean when we increase the sample size?"  We should interpret this question as asking why the standard error of the mean gets smaller as n increases.  The &lt;a href="http://en.wikipedia.org/wiki/Central_limit_theorem"&gt;central limit theorem&lt;/a&gt; shows that (under certain conditions, of course) the standard error &lt;span style="font-style:italic;"&gt;must&lt;/span&gt; do this, and that the mean approaches a normal distribution.  But the question was &lt;span style="font-style:italic;"&gt;why&lt;/span&gt; does it?  This seems so natural that it may have gone unquestioned in the past.  &lt;br /&gt;&lt;br /&gt;The best simple rationale may be that there are more ways to get middle values than extreme values--for example, the mean of a die roll (uniform discrete distribution on 1, 2, ..., 6) is 3.5.  With one die, you're equally likely to get an "average" of 3 or of 1.  But with two dice there are five ways to get an average of 3, and only one way to get an average of 1.  You're 5 times more likely to get the value that's closer to the mean than the one that's further away.&lt;br /&gt;&lt;br /&gt;Here's a simple graphic to show &lt;i&gt;that&lt;/i&gt; the standard error decreases with increasing n.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;We begin by simulating some data-- normal, here, but of course that doesn't matter (assuming that the standard deviation exists for whatever distribution we pick and the sample size is appropriately large). Rather than simulate separate samples with n = 1 ... k, it's easier to add a random variate to a series and keep a running tally of the mean, which is easy with a little algebra.  This approach also allows tracking the progress of the mean of each series, which could also be useful.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;%let nsamp = 100;&lt;br /&gt;data normal;&lt;br /&gt;do sample = 1 to &amp;nsamp;&lt;br /&gt;  meanx = 0;&lt;br /&gt;  do obs = 1 to &amp;nsamp;&lt;br /&gt;    x = normal(0);&lt;br /&gt; meanx = ((meanx * (obs -1)) + x)/obs;&lt;br /&gt; output;&lt;br /&gt;  end;&lt;br /&gt;end;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;We can now plot the means vs. the number of observations.   &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;symbol1 i = none v = dot h = .2;&lt;br /&gt;proc gplot data = normal;&lt;br /&gt;plot meanx * obs;&lt;br /&gt;run;&lt;br /&gt;quit;&lt;br /&gt;&lt;br /&gt;symbol1 i=join v=none r=&amp;nsamp;&lt;br /&gt;proc gplot data=normal;&lt;br /&gt;  plot meanx * obs = sample / nolegend;&lt;br /&gt;run; quit;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The graphic resulting from the first &lt;tt&gt;proc gplot&lt;/tt&gt; is shown above, and demonstrates both how quickly the variability of the estimate of the mean decreases when n is small, and how little it changes when n is larger.  A plot showing the means for each sequence converging can be generated with the second block of code.  Note the use of the global macro variable &lt;tt&gt;nsamp&lt;/tt&gt; assigned using the &lt;tt&gt;%let&lt;/tt&gt; statement (section A.8.2).&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;We'll also generate sequences of variates in R.  We'll do this by putting the random variates in a matrix, and treating each row as a sequence.  We'll use the &lt;tt&gt;apply()&lt;/tt&gt; function (sections 1.10.6 and B.5.3) to treat each row of the matrix separately. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;numsim = 100&lt;br /&gt;matx = matrix(rnorm(numsim^2), nrow=numsim)&lt;br /&gt;&lt;br /&gt;runavg = function(x) { cumsum(x)/(1:length(x)) }&lt;br /&gt;ramatx = t(apply(matx, 1, runavg))&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The simple function &lt;tt&gt;runavg()&lt;/tt&gt; calculates the running average of a vector and returns the a vector of equal length.  By using it as the function in &lt;tt&gt;apply()&lt;/tt&gt; we can get the running average of each row.  The result must be transposed (with the &lt;tt&gt;t()&lt;/tt&gt; function, section 1.9.2) to keep the sequences in rows.  To plot the values, we'll use the &lt;tt&gt;type="n"&lt;/tt&gt; option to &lt;tt&gt;plot()&lt;/tt&gt;, specifying the first column of the running total as the y variable.  While it's  possible that the running average will surpass the average when n=1, we ignore that case in this simple demonstration.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;plot(x=1:numsim, y = ramatx[,1], type="n",&lt;br /&gt;  xlab="number of observations", ylab="running mean")&lt;br /&gt;rapoints = function(x) points(x~seq(1:length(x)), pch=20, cex=0.2)&lt;br /&gt;apply(ramatx,1,rapoints)&lt;br /&gt;&lt;br /&gt;plot(x=1:numsim, y = ramatx[,1], type="n",&lt;br /&gt;  xlab="number of observations", ylab="running mean")&lt;br /&gt;ralines = function(x) lines(x~seq(1:length(x)))&lt;br /&gt;apply(ramatx, 1, ralines)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Here we define another simple function to plot the values in a vector against the place number, then again use the &lt;tt&gt;apply()&lt;/tt&gt; function to plot each row as a vector.  The first set of code generates a plot resembling the SAS graphic presented above.  The second set of code will connect the values in each sequence, with results shown below.&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-x_h6AaAurFk/Tw3OhEFKjBI/AAAAAAAAAG0/BftIWkPf9i4/s1600/Rplot01.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 273px;" src="http://2.bp.blogspot.com/-x_h6AaAurFk/Tw3OhEFKjBI/AAAAAAAAAG0/BftIWkPf9i4/s400/Rplot01.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5696436171119954962" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-1726271591341175267?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ixdoITtHwMk:S4P8_fk2wdM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ixdoITtHwMk:S4P8_fk2wdM:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ixdoITtHwMk:S4P8_fk2wdM:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ixdoITtHwMk:S4P8_fk2wdM:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ixdoITtHwMk:S4P8_fk2wdM:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=ixdoITtHwMk:S4P8_fk2wdM:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=ixdoITtHwMk:S4P8_fk2wdM:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=ixdoITtHwMk:S4P8_fk2wdM:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/ixdoITtHwMk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/1726271591341175267/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2012/01/example-919-demonstrating-central-limit.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1726271591341175267?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1726271591341175267?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/ixdoITtHwMk/example-919-demonstrating-central-limit.html" title="Example 9.19: Demonstrating the central limit theorem" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-miPYCsak3CE/Tw3sZt4cM9I/AAAAAAAADRA/q-xo0fDauAE/s72-c/clt.jpg" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2012/01/example-919-demonstrating-central-limit.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DU4DRnw6eCp7ImA9WhRWGEs.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-213752615623972469</id><published>2012-01-05T14:51:00.009-05:00</published><updated>2012-01-06T11:39:37.210-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-06T11:39:37.210-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="subsetting" /><category scheme="http://www.blogger.com/atom/ns#" term="RCurl package" /><category scheme="http://www.blogger.com/atom/ns#" term="reshape package" /><category scheme="http://www.blogger.com/atom/ns#" term="point option" /><category scheme="http://www.blogger.com/atom/ns#" term="google spreadsheet" /><category scheme="http://www.blogger.com/atom/ns#" term="which.min() function" /><category scheme="http://www.blogger.com/atom/ns#" term="sort" /><category scheme="http://www.blogger.com/atom/ns#" term="nobs option" /><category scheme="http://www.blogger.com/atom/ns#" term="set statement options" /><category scheme="http://www.blogger.com/atom/ns#" term="read from URL" /><title>Example 9.18: Constructing the fastest relay team via enumeration</title><content type="html">In competitive swimming, the medley relay is a team event in which four different swimmers each swim one of the four strokes: freestyle, breaststroke, backstroke, and butterfly.  In general, every swimmer might be able swim any given stroke.  How can we choose the fastest relay team?  Here we solve this by enumerating all possible teams, though a more efficient routine likely exists.&lt;br /&gt;&lt;br /&gt;Some example practice times can be seen on &lt;a href="https://docs.google.com/spreadsheet/ccc?key=0AvJKgZUzMYLYdE5xTHlEWkNUM3NoOHB1ZTJoTFMzUUE"&gt;this&lt;/a&gt; Google Spreadsheet.  Also, using the steps outlined &lt;a href="http://blog.revolutionanalytics.com/2009/09/how-to-use-a-google-spreadsheet-as-data-in-r.html"&gt;here&lt;/a&gt;, the same spreadsheet is available as a CSV file &lt;a href="https://docs.google.com/spreadsheet/pub?hl=en_US&amp;hl=en_US&amp;key=0AvJKgZUzMYLYdE5xTHlEWkNUM3NoOHB1ZTJoTFMzUUE&amp;output=html"&gt;here&lt;/a&gt;.  (FTR, these are actual practice times for 100 yards for mostly 12-13 year-old swimmers; the names have been changed.)&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;We first read the data from the URL, using the technique outlined in section 1.1.6.  Note that if you cut-and-paste this, you'll need to get the whole URL onto one line-- we break it up here for display only.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;filename swimurl url 'https://docs.google.com/spreadsheet&lt;br /&gt;/pub?key=0AvJKgZUzMYLYdE5xTHlEWkNUM3NoOHB1ZTJoTFMzUUE&amp;&lt;br /&gt;single=true&amp;gid=0&amp;output=csv';&lt;br /&gt;&lt;br /&gt;proc import datafile=swimurl out=swim dbms=csv;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Next, we use the &lt;tt&gt;point=&lt;/tt&gt; option in nested &lt;tt&gt;set&lt;/tt&gt; statements to generate a single data set with all the possible combinations of names and times.  Meanwhile we change the names of the variables so they don't get overwritten in the next &lt;tt&gt;set&lt;/tt&gt; statement.  Note the use of the &lt;tt&gt;nobs&lt;/tt&gt; option to find the number of rows in the data set.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data permute &lt;br /&gt;    (keep=free freetime fly flytime back backtime  breast breasttime);&lt;br /&gt;set swim (rename = (swimmer=free freestyle=freetime)) nobs=nobs;&lt;br /&gt;do i = 1 to nobs;&lt;br /&gt;  set swim(rename = (swimmer=fly butterfly=flytime)) point=i;&lt;br /&gt;  do j = 1 to nobs;&lt;br /&gt;    set swim(rename = (swimmer=back backstroke=backtime)) point=j;&lt;br /&gt;    do k = 1 to nobs;&lt;br /&gt;      set swim(rename = (swimmer=breast breaststroke=breasttime)) &lt;br /&gt;        point = k;&lt;br /&gt;      output;&lt;br /&gt; end;&lt;br /&gt;  end;&lt;br /&gt;end;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The resulting data set has 12^4 rows, and includes rosters with the same swimmer swimming all four legs.  In fact, a quick glance will show that Anna has the best time in each stroke, and thus the best "team" based on these practice times would use her for each stroke.  This is against the rules, and also probably isn't reflective of performance in a race.  We'll remove illegal line-ups using a &lt;tt&gt;where&lt;/tt&gt; statement (section 1.5.1) and also calculate the total team time.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data prep;&lt;br /&gt;set permute;&lt;br /&gt;where free ne back and free ne breast and free ne fly and &lt;br /&gt;  back ne breast and back ne fly and breast ne fly;&lt;br /&gt;time = sum(freetime, flytime, backtime, breasttime);&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The resulting data set has (12 permute 4) lines.  To find the best team, we just sort by the total time and look at the first line.  Here the first 10 lines (10 best teams) are shown.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc sort data=prep; by time; run;&lt;br /&gt;proc print data=prep (obs=10); run;&lt;br /&gt;&lt;br /&gt;                                                        b&lt;br /&gt;                                                        r&lt;br /&gt;           f                             b              e&lt;br /&gt;           r              f              a              a&lt;br /&gt;           e              l              c      b       s&lt;br /&gt;           e              y              k      r       t&lt;br /&gt;   f       t              t      b       t      e       t      t&lt;br /&gt;   r       i      f       i      a       i      a       i      i&lt;br /&gt;   e       m      l       m      c       m      s       m      m&lt;br /&gt;   e       e      y       e      k       e      t       e      e&lt;br /&gt;&lt;br /&gt; Kara    109.3  Dora    126.8  Lara    117.7  Anna    126.9  480.7&lt;br /&gt; Anna    102.8  Dora    126.8  Lara    117.7  Beth    134.6  481.9&lt;br /&gt; Kara    109.3  Anna    120.5  Lara    117.7  Beth    134.6  482.1&lt;br /&gt; Anna    102.8  Dora    126.8  Lara    117.7  Honora  136.4  483.7&lt;br /&gt; Kara    109.3  Jane    129.8  Lara    117.7  Anna    126.9  483.7&lt;br /&gt; Kara    109.3  Anna    120.5  Lara    117.7  Dora    136.4  483.9&lt;br /&gt; Kara    109.3  Anna    120.5  Lara    117.7  Honora  136.4  483.9&lt;br /&gt; Kara    109.3  Lara    123.1  Jane    124.7  Anna    126.9  484.0&lt;br /&gt; Anna    102.8  Dora    126.8  Lara    117.7  Inez    136.8  484.1&lt;br /&gt; Carrie  112.7  Dora    126.8  Lara    117.7  Anna    126.9  484.1&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The best time shaves a whole second off the predicted time using the second-best team.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;Since published Google Spreadsheets use &lt;tt&gt;https&lt;/tt&gt; rather than &lt;tt&gt;http&lt;/tt&gt;, we use the &lt;tt&gt;RCurl&lt;/tt&gt; package and its &lt;tt&gt;getURL()&lt;/tt&gt; function.  (Note that if you cut-and-paste this, you'll need to get the whole URL onto one line-- we break it up here for display only.)  Then we can read the data with &lt;tt&gt;read.csv()&lt;/tt&gt; and &lt;tt&gt;textConnection()&lt;/tt&gt;.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(RCurl)&lt;br /&gt;swim = getURL("https://docs.google.com/spreadsheet&lt;br /&gt;/pub?key=0AvJKgZUzMYLYdE5xTHlEWkNUM3NoOHB1ZTJoTFMzUUE&amp;&lt;br /&gt;single=true&amp;gid=0&amp;output=csv")&lt;br /&gt;&lt;br /&gt;swim2=read.csv(textConnection(swim))&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;To make an object with the combinations of names, we use the &lt;tt&gt;expand.grid()&lt;/tt&gt; function highlighted in &lt;a href="http://sas-and-r.blogspot.com/2010/01/example-722-knapsack-problem.html"&gt;Example 7.22&lt;/a&gt;, providing as arguments the swimmers names four times.  As in the SAS example, the result has has 12^4 rows.  The &lt;tt&gt;combn()&lt;/tt&gt; function might be a better fit here, but was more difficult to use.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;test2 = expand.grid(swim2$Swimmer,swim2$Swimmer, swim2$Swimmer, swim2$Swimmer)  &lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;It'll be useful to assign these copies of the names to each of the strokes.  We'll do that with the &lt;tt&gt;rename()&lt;/tt&gt; function available in the &lt;tt&gt;reshape&lt;/tt&gt; package.  (This approach is mentioned in section 1.3.4.). Then we can remove the rows where the same name appears twice using some logic.  The logic is nested in the &lt;tt&gt;with()&lt;/tt&gt; function to save some keystrokes &lt;a href="http://sas-and-r.blogspot.com/2011/05/to-attach-or-not-attach-that-is.html"&gt;and is generally preferable to &lt;tt&gt;attach()&lt;/tt&gt;ing&lt;/a&gt; the test2 object.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(reshape)&lt;br /&gt;test2 = rename(test2, c("Var1" = "free", "Var2" = "fly", &lt;br /&gt;  "Var3" = "back", "Var4" = "breast"))&lt;br /&gt;test3 = with(test2, test2[(free != breast) &amp; (free != fly) &lt;br /&gt;  &amp; (free != back) &amp; (breast != fly) &amp; (breast != back) &lt;br /&gt;  &amp; (fly != back) ,])&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Finally, we can use the &lt;tt&gt;which.min()&lt;/tt&gt; function to pick the best team.  &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; bestteam =   &lt;br /&gt;+ test3[which.min(swim2$Freestyle[test3$free]+swim2$Breaststroke[test3$breast] +&lt;br /&gt;+ swim2$Butterfly[test3$fly] + swim2$Backstroke[test3$back]),]&lt;br /&gt;&gt;bestteam&lt;br /&gt;     free  fly back breast&lt;br /&gt;1631 Kara Dora Lara   Anna&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;For new users of R, this may look very peculiar-- it uses elegant but powerful features of the R language that may be challenging for new users to grasp.  Essentially, in &lt;tt&gt;swim2$Freestyle[test3$free]&lt;/tt&gt; we say: from the "freestyle" times in the swim2 object, take the time from the row that has the name in a row of "free" names in the test3 object.  The &lt;tt&gt;which.min()&lt;/tt&gt; function replicates this request for every row in the test3 object (which has all of the permutations) in it, returning the row number with that minimum sum.  The outer &lt;tt&gt;test3[rows,columns]&lt;/tt&gt; syntax grabs the &lt;i&gt;values&lt;/i&gt; in this row.  (The number 1631 is the row number, for some reason showing the row in the test2 object created by &lt;tt&gt;expand.grid()&lt;/tt&gt;.)&lt;br /&gt;&lt;br /&gt;Now, we might also want the actual times associated with the best team.  We can find them by calling the correct rows (names from the best team) and columns (stroke associated with that name) from the original data set. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; times = c(swim2[swim2$Swimmer == bestteam$free,2], &lt;br /&gt;+      swim2[swim2$Swimmer == bestteam$fly,3], &lt;br /&gt;+      swim2[swim2$Swimmer == bestteam$back,4], &lt;br /&gt;+      swim2[swim2$Swimmer == bestteam$breast,5])&lt;br /&gt;&gt; times&lt;br /&gt;[1] 109.3 126.8 117.7 126.9&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;If instead, one wanted to list the times in order, one approach would be to add columns to the test3 object with the time for each stroke, calculate their sum, and sort on the sum.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-213752615623972469?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_VZkK5AIadg:ca9Z2o03BvU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_VZkK5AIadg:ca9Z2o03BvU:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_VZkK5AIadg:ca9Z2o03BvU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_VZkK5AIadg:ca9Z2o03BvU:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_VZkK5AIadg:ca9Z2o03BvU:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=_VZkK5AIadg:ca9Z2o03BvU:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_VZkK5AIadg:ca9Z2o03BvU:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=_VZkK5AIadg:ca9Z2o03BvU:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/_VZkK5AIadg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/213752615623972469/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2012/01/example-918-constructing-fastest-relay.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/213752615623972469?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/213752615623972469?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/_VZkK5AIadg/example-918-constructing-fastest-relay.html" title="Example 9.18: Constructing the fastest relay team via enumeration" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>4</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2012/01/example-918-constructing-fastest-relay.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0MAQXY_cSp7ImA9WhRQEUU.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-5239656263349495475</id><published>2011-12-06T09:24:00.004-05:00</published><updated>2011-12-06T09:24:00.849-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-12-06T09:24:00.849-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="scatterplot" /><category scheme="http://www.blogger.com/atom/ns#" term="grammar of graphics" /><category scheme="http://www.blogger.com/atom/ns#" term="ggplot2 package" /><category scheme="http://www.blogger.com/atom/ns#" term="GGally package" /><category scheme="http://www.blogger.com/atom/ns#" term="Hadley Wickham" /><category scheme="http://www.blogger.com/atom/ns#" term="boxplot" /><category scheme="http://www.blogger.com/atom/ns#" term="John Emerson" /><category scheme="http://www.blogger.com/atom/ns#" term="generalized pairs plots" /><category scheme="http://www.blogger.com/atom/ns#" term="HELP data set" /><category scheme="http://www.blogger.com/atom/ns#" term="mosaic plot" /><category scheme="http://www.blogger.com/atom/ns#" term="pairs plots" /><title>Example 9.17: (much) better pairs plots</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-7fFbTF17tXo/Trg0rGB0qQI/AAAAAAAAAGI/Txv4OUBX9wc/s1600/Rplot02.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 400px;" src="http://3.bp.blogspot.com/-7fFbTF17tXo/Trg0rGB0qQI/AAAAAAAAAGI/Txv4OUBX9wc/s400/Rplot02.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5672341645630417154" /&gt;&lt;/a&gt;&lt;br /&gt;Pairs plots (section 5.1.17) are a useful way of displaying the pairwise relations between variables in a dataset.  But the default display is unsatisfactory when the variables aren't all continuous.  In this entry, we discuss ways to improve these displays that have been proposed by John Emerson, Walton Green, Barret Schloerke, Dianne Cook, Heike Hofmann, and Hadley Wickham in a manuscript under review entitled &lt;i&gt;The Generalized Pairs Plot&lt;/i&gt;.  http://www.blogger.com/img/blank.gif&lt;br /&gt;&lt;br /&gt;Implementations of the methods in the paper are available in the &lt;tt&gt;gpairs&lt;/tt&gt; and &lt;tt&gt;GGally&lt;/tt&gt; packages; here we use the latter, which is based on the &lt;a href="http://www.amazon.com/gp/product/0387245448/ref=as_li_ss_tl?ie=UTF8&amp;tag=sasandrblog-20&amp;linkCode=as2&amp;camp=1789&amp;creative=390957&amp;creativeASIN=0387245448"&gt;grammar of graphics&lt;/a&gt; and the ggplot2 package.  This is an R-only entry: we are unaware of efforts to replicate this approach in SAS.&lt;br /&gt;&lt;br /&gt;New users may find it easier to break process down into steps, rather than to do everything at once, as the R language allows.  One way to do that is to make a smaller version of a dataset, with just the analysis variables included.  here we use the HELP data set and choose two categorical variables (gender and housing status) and two continuous ones (the number of drinks per day and a measure of depressive symptoms).  Once this new subset is created, the call to &lt;tt&gt;ggpairs()&lt;/tt&gt; is straightforward.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(GGally)&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/r/data/help.csv")&lt;br /&gt;ds$sex = as.factor(ifelse(ds$female==1, "female", "male"))&lt;br /&gt;ds$housing = as.factor(ifelse(ds$homeless==1, "homeless", "housed"))&lt;br /&gt;smallds = subset(ds, select=c("housing", "sex", "i1", "cesd"))&lt;br /&gt;ggpairs(smallds, diag=list(continuous="density", discrete="bar"), axisLabels="show")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;For users more comfortable with R, the &lt;tt&gt;ggpairs&lt;/tt&gt; function allows you to select variables to include, via its &lt;tt&gt;columns&lt;/tt&gt; option.  The following line produces a plot identical to the above, without the &lt;tt&gt;subset()&lt;/tt&gt;.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ggpairs(ds, columns=c("housing", "sex", "i1", "cesd"),&lt;br /&gt;    diag=list(continuous="density",   discrete="bar"), axisLabels="show")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Various options are available for the diagonal elements of the plot matrix, and the off-diagonals can be controlled with &lt;tt&gt;upper&lt;/tt&gt; and &lt;tt&gt;lower&lt;/tt&gt; options. The &lt;tt&gt;examples(ggpairs)&lt;/tt&gt; command is very helpful for visualizing some of the possibilities.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-5239656263349495475?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=pU_HNl9T2cY:cozCkMmOV3s:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=pU_HNl9T2cY:cozCkMmOV3s:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=pU_HNl9T2cY:cozCkMmOV3s:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=pU_HNl9T2cY:cozCkMmOV3s:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=pU_HNl9T2cY:cozCkMmOV3s:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=pU_HNl9T2cY:cozCkMmOV3s:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=pU_HNl9T2cY:cozCkMmOV3s:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=pU_HNl9T2cY:cozCkMmOV3s:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/pU_HNl9T2cY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/5239656263349495475/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/12/example-917-much-better-pairs-plots.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/5239656263349495475?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/5239656263349495475?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/pU_HNl9T2cY/example-917-much-better-pairs-plots.html" title="Example 9.17: (much) better pairs plots" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-7fFbTF17tXo/Trg0rGB0qQI/AAAAAAAAAGI/Txv4OUBX9wc/s72-c/Rplot02.png" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/12/example-917-much-better-pairs-plots.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Dk8FRXw-eip7ImA9WhRRFUU.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-6621654968459651308</id><published>2011-11-29T09:09:00.010-05:00</published><updated>2011-11-29T11:40:14.252-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-29T11:40:14.252-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="SAS macro" /><category scheme="http://www.blogger.com/atom/ns#" term="proc gchart" /><category scheme="http://www.blogger.com/atom/ns#" term="Edward Tufte" /><category scheme="http://www.blogger.com/atom/ns#" term="repeated multiples" /><category scheme="http://www.blogger.com/atom/ns#" term="proc greplay" /><category scheme="http://www.blogger.com/atom/ns#" term="call symput" /><category scheme="http://www.blogger.com/atom/ns#" term="colors" /><category scheme="http://www.blogger.com/atom/ns#" term="Michael Friendly" /><category scheme="http://www.blogger.com/atom/ns#" term="pie() function" /><category scheme="http://www.blogger.com/atom/ns#" term="grep() function" /><category scheme="http://www.blogger.com/atom/ns#" term="college majors" /><category scheme="http://www.blogger.com/atom/ns#" term="drop statement" /><title>Example 9.16: Small multiples</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-Q9of7M3QUCA/TsP7G916GCI/AAAAAAAADPA/FasOjk7NPjM/s1600/small_r.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 600px; height: 300px;" src="http://3.bp.blogspot.com/-Q9of7M3QUCA/TsP7G916GCI/AAAAAAAADPA/FasOjk7NPjM/s1600/small_r.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5675656052515412002" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Small multiples are one of the great ideas of graphics visionary Edward Tufte (e.g., in &lt;a href="http://www.amazon.com/gp/product/0961392118/ref=as_li_tf_tl?ie=UTF8&amp;tag=sasandrblog-20&amp;linkCode=as2&amp;camp=217145&amp;creative=399369&amp;creativeASIN=0961392118" target="_blank"&gt;Envisioning Information&lt;/a&gt;).  Briefly, the idea is that if many variations on a theme are presented, differences quickly become apparent.  Today we offer general guidance on creating figures with small multiples.  &lt;br /&gt;&lt;br /&gt;As an example, we'll show graphics for the popularity, salary, and unemployment rates for college majors.  This data was discussed &lt;a href="http://simplystatistics.tumblr.com/post/12599452125/expected-salary-by-major" target="_blank"&gt;here&lt;/a&gt; where a scatterplot graphic was presented.  We draw on data and code presented there as well.   The scatterplot does not show the unemployment rate, and the width of the field names and arbitrarily sized text makes it difficult to determine the popularity ranking.  In contrast, the small multiples plot, demonstrated above, makes each of these features easy to see.  (Click on the picture for a clearer image.)&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;The graphics options in R, particularly &lt;tt&gt;par("mfrow")&lt;/tt&gt; or &lt;tt&gt;par("mfcol")&lt;/tt&gt;, are well-suited to small multiples.  The main tip here is to minimize the space reserved for margins and titles.  In the example below, we do this with the &lt;tt&gt;mar&lt;/tt&gt;, &lt;tt&gt;mgp&lt;/tt&gt;, and &lt;tt&gt;oma&lt;/tt&gt; settings.  We'll begin by setting up the data in a process that relies heavily on the code &lt;a href="http://rafalab.jhsph.edu/images/majors.zip"&gt;here&lt;/a&gt;.  (Note that the zip file referred to includes data already in the R format-- since our point today is not data management, we don't replicate the process used to make this out of the raw data.)&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;clean = function(x) as.numeric(gsub("\\$|, ", "", x))&lt;br /&gt;clean2 = function(x) as.numeric(gsub("%", "", x))&lt;br /&gt;load("table.rda")&lt;br /&gt;X[,2] = clean2(X[,2])&lt;br /&gt;for (i in 3:5) X[,i] = clean(X[,i])&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;X$cols = NA&lt;br /&gt;X$cols[grep("BUSI|ACC|FINAN",X[,1])] = 1&lt;br /&gt;X$cols[grep("ENGINEERING",X[,1])] = 2&lt;br /&gt;X$cols[grep("STAT|COMPU",X[,1])] = 3&lt;br /&gt;X$cols[grep("BIOL|CHEMI|PHYSICS|MATHEM",X[,1])] =  4&lt;br /&gt;X$cols[grep("ENGLISH|HISTORY|LANG|FINE|MUSIC|PHILOS|DRAMA|LIBERAL|ARCH|THEO",X[,1])] = 5&lt;br /&gt;X$cols[grep("SOCIO|PSYCH|POLI|ECONO|JOURNAL",X[,1])] = 6&lt;br /&gt;X$cols[grep("COMMUN|MARKET|COMMER|MULTI|MASS|ADVERT",X[,1])] = 7&lt;br /&gt;X$cols[grep("NURS|CRIM|EDU|PHYSICAL|FAMI|SOCIAL|TREAT|HOSP|HUMAN",X[,1])] = 8&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;This removes some funny characters and groups the fields together in a coherent manner.  Then we write a function to set the &lt;tt&gt;par()&lt;/tt&gt; values we need to change, and plot a pie for each row of the data set.  Here a for loop is used-- we're not aware of a vectorized version of the &lt;tt&gt;pie()&lt;/tt&gt; function.  Colors for each pie are assigned via the &lt;tt&gt;colors&lt;/tt&gt;.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;sortx = X[order(X$Popularity),]&lt;br /&gt;&lt;br /&gt;smajors = function(mt) {&lt;br /&gt;  old.par = par(no.readonly=TRUE) &lt;br /&gt;  on.exit(par(old.par))&lt;br /&gt;  nrows = sqrt(nrow(mt)) + (ceiling(sqrt(nrow(mt))) !=  sqrt(nrow(mt)))&lt;br /&gt;  par(mfrow=c(nrows,nrows), mar=c(1,0,0,0), oma=c(0,0,0,0), mgp=c(0,0,0))&lt;br /&gt;  colors = c("blue", "purple", "purple", "gray", "orange", "gray", "red", "black")&lt;br /&gt;  for (i in 1:nrow(mt)) {&lt;br /&gt;    pie(c(mt[i,2], 100 - mt[i,2]), labels=NA, radius=mt[i,4]/max(mt[,4]), &lt;br /&gt;        col = c("white",colors[mt[i,7]]))&lt;br /&gt;    mtext(substr(mt[i,1],1,19), side=1, cex=.8)&lt;br /&gt;  }&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;smajors(sortx[1:49,])&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The resulting plot is shown above.  There may be too many colors, though statistics and computing were assigned the same color as engineering.  We can easily read from the plot that computing, statistics, and engineering (purple) are the largest circles, and thus the best paying.  Similarly, the humanities (orange) are the worst paying.  They are also not terribly popular-- the first orange pie appears in the second row.  The "professions" (nursing, teaching, policing, therapy) don't pay well but are fairly popular.  Most pies have roughly equal unemployment, though nursing and the professions generally are notable for near full employment.  All in all, the rank, salary, unemployment percentage, and field are all clearer than in the scatterplot.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;In SAS, one can use the &lt;tt&gt;greplay&lt;/tt&gt; procedure to reproduce images in miniature.  One has to define the size and shape allotted for each replayed plot, in a stored "template."  This allows enormous control, but at the cost of some complexity.  For example, one can create a scatterplot matrix using &lt;tt&gt;proc gplot&lt;/tt&gt; instead of &lt;tt&gt;proc sgplot&lt;/tt&gt;, as in &lt;a href="http://www.datavis.ca/sasmac/scatmat.html"&gt;this&lt;/a&gt; implementation by Michael Friendly.  If you can generate your multiple images with a &lt;tt&gt;by&lt;/tt&gt; statement, the coding for this is not too painful.  However, in this case, it was not obvious how to change the color and radius for each pie using a &lt;tt&gt;by&lt;/tt&gt; statement in &lt;tt&gt;proc gchart&lt;/tt&gt; which includes pie charts, and would thus have been the obvious choice.  In such cases, it may be easier to plot the figures directly using an annotate data set.&lt;br /&gt;&lt;br /&gt;However, having demonstrated the use of annotate previously (e.g., &lt;a href="http://sas-and-r.blogspot.com/2010/11/example-813-bike-ride-plot-part-2.html"&gt;Example 8.13&lt;/a&gt;), we show here an application using &lt;tt&gt;greplay&lt;/tt&gt;, though the coding is a little bit involved.  In outline, we use a macro to call for a pie to be made from each observation of the original data set.  Then we use a template-making macro found &lt;a href="http://www.devenezia.com/downloads/sas/macros/index.php?m=makegridtemplate"&gt;here&lt;/a&gt; to generate the 7X7 grid template.  Finally, we replay the pies into the grid.  &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;/* read the data-- note that text file edited to remove spaces in &lt;br /&gt;    variable names */&lt;br /&gt;proc import datafile = &lt;br /&gt;   'c:\sas-r dictionary\p\book\web\blog\majors\majors\table.txt'&lt;br /&gt;   out = majors;&lt;br /&gt;   getnames = yes;&lt;br /&gt;   run;&lt;br /&gt;&lt;br /&gt;/* set up the field categories and colors */&lt;br /&gt;data m2;&lt;br /&gt;set majors;&lt;br /&gt;cols="      ";     /* make a missing character variable to hold them */&lt;br /&gt;mf = majorfield;   /* just a copy to save keystrokes */&lt;br /&gt;   /* the find() function is discussed in section 1.4.6 */&lt;br /&gt;if sum(find(mf,"BUSI"), find(mf,"ACC"), find(mf,"FINAN")) ne 0 then cols = "blue";&lt;br /&gt;if find(mf,"ENGINEERING") ne 0 the cols = "purple";&lt;br /&gt;if sum(find(mf,"STAT"), find(mf,"COMPU"), find(mf,"SYSTEMS")) ne 0 then cols = "purple";&lt;br /&gt;if sum(find(mf,"BIOL"), find(mf,"CHEMI"), find(mf,"PHYSICS"), find(mf,"MATH")) ne 0 then cols = "gray";&lt;br /&gt;if sum(find(mf,"ENGL"), find(mf,"HIST"), find(mf,"FRENCH"), find(mf,"FINE"),&lt;br /&gt;     find(mf,"MUSIC"), find(mf,"PHIL"), find(mf,"DRAMA"), find(mf,"LIBERAL"),&lt;br /&gt;     find(mf,"ARCH"), find(mf,"THEO")) ne 0 then cols = "orange";&lt;br /&gt;if sum(find(mf,"SOCI"), find(mf,"PSYCH"), find(mf,"POLI"), find(mf,"ECON"),&lt;br /&gt;     find(mf,"JOURN"), find(mf,"LIBERAL")) ne 0 then cols = "gray";&lt;br /&gt;if sum(find(mf,"COMMU"), find(mf,"MARKET"), find(mf,"COMMER"), find(mf,"MULTI"),&lt;br /&gt;     find(mf,"MASS"), find(mf,"ADVERT")) ne 0 then cols = "red";&lt;br /&gt;if sum(find(mf,"NURS"), find(mf,"CRIM"), find(mf,"EDU"), find(mf,"PHYSICAL"),&lt;br /&gt;     find(mf,"FAMI"), find(mf,"SOCIAL"), find(mf,"TREAT"),&lt;br /&gt;     find(mf,"HOSP"), find(mf,"HUMAN")) ne 0 then cols = "black";&lt;br /&gt;drop MF;   /* get rid of that keystroke-saver */&lt;br /&gt;run; &lt;br /&gt;&lt;br /&gt;/* order by popularity */&lt;br /&gt;proc sort data = m2; by popularity; run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The next macro takes a line number as input.  A data step then reads that line from the real data set and uses &lt;tt&gt;call symput&lt;/tt&gt; (section A.8.2) to extract as macro variables the median earnings used for the radius, the color, and the major name.  It then produces two rows of data-- one with the unemployed percent and the other with the employed percent.  We need this for the &lt;tt&gt;proc gchart&lt;/tt&gt; input, as shown in the second half of the macro.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;%macro smpie(obs);&lt;br /&gt;data t1;&lt;br /&gt;set m2 (firstobs = &amp;obs obs = &amp;obs);&lt;br /&gt;call symput('rm', medianearnings);&lt;br /&gt;call symput('color', cols);&lt;br /&gt;call symput('name', strip(substr(majorfield,1,19)));&lt;br /&gt;employed = "No"; percent = unemploymentpercent; output;&lt;br /&gt;employed = "Yes"; percent = 100 - unemploymentpercent; output;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;pattern1 color= white v=solid;&lt;br /&gt;pattern2 color= &amp;color v=solid;  /* only pattern2 should be needed, I think, but */&lt;br /&gt;pattern3 color= &amp;color v=solid;  /* sometimes SAS required pattern3, in my trials */&lt;br /&gt;title h=7pct "&amp;name";&lt;br /&gt;proc gchart data = t1 gout = kkpies;&lt;br /&gt;pie percent / group = majorfield sumvar = percent value = none &lt;br /&gt;    noheading nogroupheading radius = %sysevalf((&amp;rm * 45)/ 105000)&lt;br /&gt;    name = "PIE&amp;obs" ; &lt;br /&gt;run;quit;&lt;br /&gt;%mend smpie;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Of particular note in the forgoing are the &lt;tt&gt;gout&lt;/tt&gt; and &lt;tt&gt;name&lt;/tt&gt; options.  The former specifies a location where output plots should be stored.  The latter assigns a name to this particular plot.  In addition, the &lt;tt&gt;%sysevalf&lt;/tt&gt; macro function is needed to perform mathematical functions on the radius variable.  &lt;br /&gt;&lt;br /&gt;Next, we write and call another macro to call the first repeatedly.  Making the image small to begin with (using the &lt;tt&gt;goptions&lt;/tt&gt; statement [section 5.3]) reduces quality loss when replaying as small multiples.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;%macro pies;&lt;br /&gt;goptions hsize=1in vsize=1in;&lt;br /&gt;%do i = 1 %to 49;&lt;br /&gt;  %smpie(&amp;i);&lt;br /&gt;%end;&lt;br /&gt;%mend;  &lt;br /&gt;&lt;br /&gt;%pies;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Finally, we can make the template to replay the images, and replay them, both using &lt;tt&gt;proc greplay&lt;/tt&gt;.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;/* key elements of the %makegridtemplate macro: &lt;br /&gt;     how many rows and columns (down and across). &lt;br /&gt;     name for the template.&lt;br /&gt;   Note that the macro is called *inside* the proc greplay statement, &lt;br /&gt;   which allows the user access to  pro greplay statment options */&lt;br /&gt;proc greplay nofs  tc=work.templates;&lt;br /&gt;  %makeGridTemplate (across=7, down=7, ordering=LRTB, &lt;br /&gt;      hgap=0, vgap=0, gapT=0, gapL=0, gapr=0, name=sevensq,bordercolor=white)&lt;br /&gt;quit;&lt;br /&gt;&lt;br /&gt;/* this macro just types out text for us the sequence 1:pie1 2:pie2 ... 49:pie49 */&lt;br /&gt;/* we need that to replay the figures in proc greplay */&lt;br /&gt;%macro pielist;&lt;br /&gt;%do i = 1 %to 49; &amp;i:pie&amp;i %end;;&lt;br /&gt;%mend;&lt;br /&gt;&lt;br /&gt;filename pies "c:\temp\pies2.png";&lt;br /&gt;goptions hsize=7in vsize=7in gsfname=pies device=png;&lt;br /&gt;&lt;br /&gt;/* now ready to replay the plots&lt;br /&gt;   The proc greplay options say what template to use and where to find it, &lt;br /&gt;     and where to find the input and place the output */&lt;br /&gt;/* The treplay statement plays the old plots to the locations specified &lt;br /&gt;     in the template&lt;br /&gt;proc greplay template=sevensq tc=work.templates nofs gout=kkpies igout=kkpies;&lt;br /&gt;treplay %pielist;&lt;br /&gt;run;&lt;br /&gt;quit;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The net outcome of this is shown below.  The image is pretty disappointing-- the circles are not round,and the text is pretty blurry.  However, the message is as clear as in the prettier R version.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-fIXFwCpjYjE/Tsqs56VJX6I/AAAAAAAADPc/TZIzxwZLnZs/s1600/pies.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 700px; height: 700px;" src="http://1.bp.blogspot.com/-fIXFwCpjYjE/Tsqs56VJX6I/AAAAAAAADPc/TZIzxwZLnZs/s1600/pies.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5677540391164403618" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-6621654968459651308?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_FRrjCP4NC8:knEjtT0HYFc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_FRrjCP4NC8:knEjtT0HYFc:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_FRrjCP4NC8:knEjtT0HYFc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_FRrjCP4NC8:knEjtT0HYFc:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_FRrjCP4NC8:knEjtT0HYFc:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=_FRrjCP4NC8:knEjtT0HYFc:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=_FRrjCP4NC8:knEjtT0HYFc:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=_FRrjCP4NC8:knEjtT0HYFc:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/_FRrjCP4NC8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/6621654968459651308/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/11/example-916-small-multiples.html#comment-form" title="9 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6621654968459651308?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6621654968459651308?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/_FRrjCP4NC8/example-916-small-multiples.html" title="Example 9.16: Small multiples" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-Q9of7M3QUCA/TsP7G916GCI/AAAAAAAADPA/FasOjk7NPjM/s72-c/small_r.png" height="72" width="72" /><thr:total>9</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/11/example-916-small-multiples.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkEEQXYzeip7ImA9WhRSGUo.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-1106213750688019115</id><published>2011-11-22T10:10:00.000-05:00</published><updated>2011-11-22T10:10:00.882-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-22T10:10:00.882-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="proc gchart" /><category scheme="http://www.blogger.com/atom/ns#" term="bargraph.CI() function" /><category scheme="http://www.blogger.com/atom/ns#" term="sciplot package" /><category scheme="http://www.blogger.com/atom/ns#" term="dynamite plot" /><title>Example 9.15: Bar chart with error bars ("Dynamite plot")</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-mZciVBpPLEg/Tr3o4pme4lI/AAAAAAAADOY/EhEwECWk8HQ/s1600/dynomite_r.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 400px;" src="http://4.bp.blogspot.com/-mZciVBpPLEg/Tr3o4pme4lI/AAAAAAAADOY/EhEwECWk8HQ/s400/dynomite_r.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5673947165494272594" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The "dynamite plot", a bar chart plotting the a mean with a error bar, is one of the most reviled types of image among statisticians.  Reasons to dislike them are numerous, and are nicely summarized &lt;a href="http://emdbolker.wikidot.com/blog:dynamite"&gt;here&lt;/a&gt;. (Edward Tufte also suggests they be avoided.)  &lt;br /&gt;&lt;br /&gt;Nonetheless, as consulting statisticians, we're often required to meet the needs of our collaborators, or of the reviewers who will judge their submissions.  If you need to do it, here's how.  We demonstrate with the HELP data set.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;The plot can be created easily with &lt;tt&gt;proc gchart&lt;/tt&gt; (section 5.1.3).&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc gchart data = "c:\book\help.sas7bdat";&lt;br /&gt;vbar substance /&lt;br /&gt;   group=female&lt;br /&gt;   type=mean &lt;br /&gt;   sumvar=sexrisk&lt;br /&gt;   errorbar=top;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The syntax requests the basic chart variable be &lt;tt&gt;substance&lt;/tt&gt;, in groups defined by &lt;tt&gt;female&lt;/tt&gt;, where the mean of &lt;tt&gt;sexrisk&lt;/tt&gt; is plotted.  The &lt;tt&gt;errorbar&lt;/tt&gt; option can be used to make the full CI or just the top lines; the default is a 95% standard error of the mean, but that can also be adjusted.  The result is shown below.&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-c59GT_5iRN4/Tr3PWISWWoI/AAAAAAAADOM/OT2ti70H4rc/s1600/dynomite_SAS.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 213px;" src="http://2.bp.blogspot.com/-c59GT_5iRN4/Tr3PWISWWoI/AAAAAAAADOM/OT2ti70H4rc/s400/dynomite_SAS.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5673919084645210754" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;There are several easily googlable examples of how to do a basic dynamite plot in R, including one in &lt;tt&gt;example(barchart)&lt;/tt&gt;.  However, a grouped example with a nice legend is a bit harder to find.  The &lt;tt&gt;bargraph.CI()&lt;/tt&gt; function in the &lt;tt&gt;sciplot&lt;/tt&gt; package fits the bill.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(sciplot)&lt;br /&gt;bp = with(ds, bargraph.CI(x.factor=female, group=substance, response=sexrisk,&lt;br /&gt;  lc=FALSE, xlab="Female",&lt;br /&gt;  legend=TRUE, x.leg=3.3, cex.leg=1.3, cex.names=1.5, cex.lab = 1.5,&lt;br /&gt;  ci.fun=function(x) {c(mean(x) - 1.96*se(x), mean(x) + 1.96*se(x))}))&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The results are shown at the top.  The first row of options describe the variables to be used; the default plot statistic is the mean.  The second and third rows suppress the bottom of the confidence interval and customize the location, label, and font size in the legend and the x-axis label.  The only tricky bit is the last row, which provides the 95% CI limits to be plotted, as opposed to the default 1 standard error.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-1106213750688019115?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UnYO0-0BMaY:0jEJ1h7ndMU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UnYO0-0BMaY:0jEJ1h7ndMU:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UnYO0-0BMaY:0jEJ1h7ndMU:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UnYO0-0BMaY:0jEJ1h7ndMU:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UnYO0-0BMaY:0jEJ1h7ndMU:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=UnYO0-0BMaY:0jEJ1h7ndMU:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UnYO0-0BMaY:0jEJ1h7ndMU:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=UnYO0-0BMaY:0jEJ1h7ndMU:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/UnYO0-0BMaY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/1106213750688019115/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/11/example-915-bar-chart-with-error-bars.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1106213750688019115?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1106213750688019115?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/UnYO0-0BMaY/example-915-bar-chart-with-error-bars.html" title="Example 9.15: Bar chart with error bars (&quot;Dynamite plot&quot;)" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-mZciVBpPLEg/Tr3o4pme4lI/AAAAAAAADOY/EhEwECWk8HQ/s72-c/dynomite_r.png" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/11/example-915-bar-chart-with-error-bars.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkAMQXw9eyp7ImA9WhRSE0s.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-534893084426424756</id><published>2011-11-15T09:53:00.001-05:00</published><updated>2011-11-15T09:53:00.263-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-15T09:53:00.263-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="SAS formats" /><category scheme="http://www.blogger.com/atom/ns#" term="ods system" /><category scheme="http://www.blogger.com/atom/ns#" term="proc logistic" /><category scheme="http://www.blogger.com/atom/ns#" term="confint()" /><category scheme="http://www.blogger.com/atom/ns#" term="confidence intervals" /><category scheme="http://www.blogger.com/atom/ns#" term="profile likelihood" /><category scheme="http://www.blogger.com/atom/ns#" term="logistic regression" /><category scheme="http://www.blogger.com/atom/ns#" term="clodds statement" /><category scheme="http://www.blogger.com/atom/ns#" term="MASS library" /><title>Example 9.14: confidence intervals for logistic regression models</title><content type="html">Recently a student asked about the difference between &lt;tt&gt;confint()&lt;/tt&gt; and &lt;tt&gt;confint.default()&lt;/tt&gt; functions, both available in the MASS library to calculate confidence intervals from logistic regression models.  The following example demonstrates that they yield different results.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/r/data/help.csv")&lt;br /&gt;library(MASS)&lt;br /&gt;glmmod = glm(homeless ~ age + female, binomial, data=ds)&lt;br /&gt;&lt;br /&gt;&gt; summary(glmmod)&lt;br /&gt;Call:&lt;br /&gt;glm(formula = homeless ~ age + female, family = binomial, data = ds)&lt;br /&gt;&lt;br /&gt;Deviance Residuals: &lt;br /&gt;    Min       1Q   Median       3Q      Max  &lt;br /&gt;-1.3600  -1.1231  -0.9185   1.2020   1.5466  &lt;br /&gt;&lt;br /&gt;Coefficients:&lt;br /&gt;            Estimate Std. Error z value Pr(&gt;|z|)  &lt;br /&gt;(Intercept) -0.89262    0.45366  -1.968   0.0491 *&lt;br /&gt;age          0.02386    0.01242   1.921   0.0548 .&lt;br /&gt;female      -0.49198    0.22822  -2.156   0.0311 *&lt;br /&gt;---&lt;br /&gt;Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 &lt;br /&gt;&lt;br /&gt;(Dispersion parameter for binomial family taken to be 1)&lt;br /&gt;&lt;br /&gt;    Null deviance: 625.28  on 452  degrees of freedom&lt;br /&gt;Residual deviance: 617.19  on 450  degrees of freedom&lt;br /&gt;AIC: 623.19&lt;br /&gt;&lt;br /&gt;Number of Fisher Scoring iterations: 4&lt;br /&gt;&lt;br /&gt;&gt; exp(confint(glmmod))&lt;br /&gt;Waiting for profiling to be done...&lt;br /&gt;                2.5 %    97.5 %&lt;br /&gt;(Intercept) 0.1669932 0.9920023&lt;br /&gt;age         0.9996431 1.0496390&lt;br /&gt;female      0.3885283 0.9522567&lt;br /&gt;&gt; library(MASS)&lt;br /&gt;&gt; exp(confint.default(glmmod))&lt;br /&gt;                2.5 %    97.5 %&lt;br /&gt;(Intercept) 0.1683396 0.9965331&lt;br /&gt;age         0.9995114 1.0493877&lt;br /&gt;female      0.3909104 0.9563045&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Why are they different? Which one is correct?&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;Fortunately the detailed documentation in SAS can help resolve this.  The &lt;tt&gt;logistic&lt;/tt&gt; procedure (section 4.1.1) offers the &lt;tt&gt;clodds&lt;/tt&gt; option to the &lt;tt&gt;model&lt;/tt&gt; statement.  Setting this option to &lt;tt&gt;both&lt;/tt&gt; produces two sets of CL, based on the Wald test and on the profile-likelihood approach.  (Venzon, D. J. and Moolgavkar, S. H. (1988), “A Method for Computing Profile-Likelihood Based Confidence Intervals,” Applied Statistics, 37, 87–94.)&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ods output cloddswald = waldcl cloddspl = plcl;&lt;br /&gt;proc logistic data = "c:\book\help.sas7bdat"  plots=none;&lt;br /&gt;class female (param=ref ref='0');&lt;br /&gt;model homeless(event='1') = age female / clodds = both;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt; Odds Ratio Estimates and Profile-Likelihood Confidence Intervals&lt;br /&gt;&lt;br /&gt; Effect                Unit     Estimate     95% Confidence Limits&lt;br /&gt;&lt;br /&gt; AGE                 1.0000        1.024        1.000        1.050&lt;br /&gt; FEMALE 1 vs 0       1.0000        0.611        0.389        0.952&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;        Odds Ratio Estimates and Wald Confidence Intervals&lt;br /&gt;&lt;br /&gt; Effect                Unit     Estimate     95% Confidence Limits&lt;br /&gt;&lt;br /&gt; AGE                 1.0000        1.024        1.000        1.049&lt;br /&gt; FEMALE 1 vs 0       1.0000        0.611        0.391        0.956&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Unfortunately, the default precision of the printout isn't quite sufficient to identify whether this distinction aligns with the differences seen in the two R methods.  We get around this by using the ODS system to save the output as data sets (section A.7.1).  Then we can print the data sets, removing the default rounding formats to find all of the available precision. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;title "Wald CL";&lt;br /&gt;proc print data=waldcl; format _all_; run;&lt;br /&gt;title "PL CL";&lt;br /&gt;proc print data=plcl; format _all_; run;&lt;br /&gt;&lt;br /&gt;                              Wald CL  &lt;br /&gt;                                     Odds&lt;br /&gt;   Obs    Effect           Unit    RatioEst    LowerCL    UpperCL&lt;br /&gt;&lt;br /&gt;    1     AGE                1      1.02415    0.99951    1.04939&lt;br /&gt;    2     FEMALE 1 vs 0      1      0.61143    0.39092    0.95633&lt;br /&gt; &lt;br /&gt;  &lt;br /&gt;                               PL CL                            &lt;br /&gt;                                     Odds&lt;br /&gt;   Obs    Effect           Unit    RatioEst    LowerCL    UpperCL&lt;br /&gt;&lt;br /&gt;    1     AGE                1      1.02415    0.99964    1.04964&lt;br /&gt;    2     FEMALE 1 vs 0      1      0.61143    0.38853    0.95226&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;With this added precision, we can see that the &lt;tt&gt;confint.default()&lt;/tt&gt; function in the MASS library generates the Wald confidence limits, while the &lt;tt&gt;confint()&lt;/tt&gt; function produces the profile-likelihood limits.  This also explains the &lt;tt&gt;confint()&lt;/tt&gt; comment "Waiting for profiling to be done..."  Thus neither CI from the MASS library is incorrect, though the profile-likelihood method is thought to be superior, especially for small sample sizes.  Little practical difference is seen here.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-534893084426424756?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=xLaKW8-tHY4:RHG5t-OGLZM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=xLaKW8-tHY4:RHG5t-OGLZM:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=xLaKW8-tHY4:RHG5t-OGLZM:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=xLaKW8-tHY4:RHG5t-OGLZM:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=xLaKW8-tHY4:RHG5t-OGLZM:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=xLaKW8-tHY4:RHG5t-OGLZM:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=xLaKW8-tHY4:RHG5t-OGLZM:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=xLaKW8-tHY4:RHG5t-OGLZM:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/xLaKW8-tHY4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/534893084426424756/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/11/example-914-confidence-intervals-for.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/534893084426424756?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/534893084426424756?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/xLaKW8-tHY4/example-914-confidence-intervals-for.html" title="Example 9.14: confidence intervals for logistic regression models" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/11/example-914-confidence-intervals-for.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0MFRXs4fyp7ImA9WhRTF0o.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-6773109290045953925</id><published>2011-11-08T09:10:00.010-05:00</published><updated>2011-11-08T11:56:54.537-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-08T11:56:54.537-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="proc fcmp" /><category scheme="http://www.blogger.com/atom/ns#" term="proc mcmc" /><category scheme="http://www.blogger.com/atom/ns#" term="round function" /><category scheme="http://www.blogger.com/atom/ns#" term="negative binomial regression" /><title>Example 9.13: Negative binomial regression with proc mcmc</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-euBX78jktys/TqtbjmXONSI/AAAAAAAADNU/tg4On7OWzHw/s1600/mcmcdxnbreg.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://2.bp.blogspot.com/-euBX78jktys/TqtbjmXONSI/AAAAAAAADNU/tg4On7OWzHw/s400/mcmcdxnbreg.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5668725223127397666" /&gt;&lt;/a&gt;&lt;br /&gt;In practice, data that derive from counts rarely seem to be fit well by a Poisson model; one more flexible alternative is a negative binomial model.  In this SAS-only entry, we discuss how &lt;tt&gt;proc mcmc&lt;/tt&gt; can be used for estimation.  An overview of support for Bayesian methods in R can be found in the &lt;a href="http://cran.r-project.org/web/views/Bayesian.html"&gt;Bayesian Task View&lt;/a&gt;. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;As noted in &lt;a href="http://sas-and-r.blogspot.com/2011/03/example-830-compare-poisson-and.html"&gt;example 8.30&lt;/a&gt;, the SAS &lt;tt&gt;rand&lt;/tt&gt; function lacks the option to input the mean directly, instead using the basic parameters of the probability of success and the number of successes k.  (Though note the negative binomial has several formulations, which can cause problems when using multiple software systems.)  As developed in that example, we use the the &lt;tt&gt;proc fcmp&lt;/tt&gt; function to instead work with the mean.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc fcmp outlib=sasuser.funcs.test;&lt;br /&gt;function poismean_nb(mean, size);&lt;br /&gt;  return(size/(mean+size));&lt;br /&gt;  endsub;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;options cmplib=sasuser.funcs;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;With that preparation out of the way, we simulate some data--here an intercept of 0 and a slope of 1.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data test;&lt;br /&gt;  do i = 1 to 10000;&lt;br /&gt;    x = normal(0);&lt;br /&gt; mu = exp(0 + x);&lt;br /&gt; k = 2;&lt;br /&gt; y = rand("NEGBINOMIAL", poismean_nb(mu, k),k);&lt;br /&gt; output;&lt;br /&gt; end;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The &lt;tt&gt;proc mcmc&lt;/tt&gt; code presents a slight difficulty: the k successes before the random number of failures ought to be an integer, and &lt;tt&gt;proc mcmc&lt;/tt&gt; appears to lack an integer-valued distribution.  The model will run with continuous values of k, but its behavior is strange.  Instead, we put a prior on a new parameter, &lt;tt&gt;kstar&lt;/tt&gt; and take k as the rounded value (section 1.8.4) of &lt;tt&gt;kstar&lt;/tt&gt;; since the values must be &gt; 0, we also add 1 to the rounded value. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc mcmc data=test nmc=1000 thin=1 seed=10061966;&lt;br /&gt;parms beta0 1 beta1 1 kstar 10;&lt;br /&gt;&lt;br /&gt;prior b: ~ normal(0, var = 10000);&lt;br /&gt;prior kstar ~ igamma(.01, scale=0.01);&lt;br /&gt;&lt;br /&gt;k=round(kstar+1, 1);&lt;br /&gt;mu = exp(beta0 + beta1 * x);&lt;br /&gt;&lt;br /&gt;model y ~ negbin(k, poismean_nb(mu, k));&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The way the &lt;tt&gt;kstar&lt;/tt&gt; and &lt;tt&gt;k&lt;/tt&gt; business works is that SAS actually processes the programming statements in each iteration of the chain.  Posterior summaries just below, sample diagnostic plot above.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;                       Posterior Summaries&lt;br /&gt;&lt;br /&gt;Parameter        N  Mean  Standard              Percentiles&lt;br /&gt;                          Deviation         25%      50%     75%&lt;br /&gt;beta0        10000 0.00712 0.0131        -0.00171  0.00721 0.0156&lt;br /&gt;beta1        10000 0.9818  0.0128         0.9732   0.9814  0.9905&lt;br /&gt;kstar        10000 0.9648  0.2855         0.7112   0.9481  1.1974&lt;br /&gt;&lt;br /&gt;                       Posterior Intervals&lt;br /&gt;Parameter Alpha Equal-Tail Interval   HPD Interval&lt;br /&gt;beta0    0.050 -0.0195 0.0321       -0.0182 0.0328&lt;br /&gt;beta1    0.050  0.9569 1.0074        0.9562 1.0063&lt;br /&gt;kstar    0.050  0.5208 1.4709        0.5001 1.4348&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;If a simple model like the one shown here is all you need, &lt;tt&gt;proc genmod&lt;/tt&gt;'s &lt;tt&gt;bayes&lt;/tt&gt; statement can work for you.  But the formulation demonstrated above would be useful for a generalized linear mixed model, for example.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-6773109290045953925?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=PlbPd0G-xXM:6WitIRmLfKk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=PlbPd0G-xXM:6WitIRmLfKk:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=PlbPd0G-xXM:6WitIRmLfKk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=PlbPd0G-xXM:6WitIRmLfKk:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=PlbPd0G-xXM:6WitIRmLfKk:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=PlbPd0G-xXM:6WitIRmLfKk:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=PlbPd0G-xXM:6WitIRmLfKk:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=PlbPd0G-xXM:6WitIRmLfKk:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/PlbPd0G-xXM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/6773109290045953925/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/11/example-913-negative-binomial.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6773109290045953925?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6773109290045953925?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/PlbPd0G-xXM/example-913-negative-binomial.html" title="Example 9.13: Negative binomial regression with proc mcmc" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-euBX78jktys/TqtbjmXONSI/AAAAAAAADNU/tg4On7OWzHw/s72-c/mcmcdxnbreg.png" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/11/example-913-negative-binomial.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ck4MQXg_eCp7ImA9WhRTEEo.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-1528019020125181450</id><published>2011-10-31T10:23:00.012-04:00</published><updated>2011-10-31T10:23:00.640-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-31T10:23:00.640-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mosaic package" /><category scheme="http://www.blogger.com/atom/ns#" term="resampling based inference" /><category scheme="http://www.blogger.com/atom/ns#" term="permutation test" /><category scheme="http://www.blogger.com/atom/ns#" term="shuffle()" /><category scheme="http://www.blogger.com/atom/ns#" term="HELP data set" /><category scheme="http://www.blogger.com/atom/ns#" term="shuffle() function" /><title>Example 9.12: simpler ways to carry out permutation tests</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-PZbI9xtKI5k/TpmbmWlT-FI/AAAAAAAAAFY/VamXrVRSeXw/s1600/Rplot01.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 400px;" src="http://4.bp.blogspot.com/-PZbI9xtKI5k/TpmbmWlT-FI/AAAAAAAAAFY/VamXrVRSeXw/s400/Rplot01.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5663729089594521682" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;In a &lt;a href="http://sas-and-r.blogspot.com/2009/10/example-716-assess-robustness-of.html"&gt;previous&lt;/a&gt; entry, as well as section 2.4.3 of the book, we describe how to carry out a 2 group permutation test in SAS as well as with the &lt;a href="http://cran.r-project.org/web/packages/coin/index.html"&gt;coin&lt;/a&gt; package in R.  We demonstrate with comparing the ages of the female and male subjects in the &lt;a href="http://www.math.smith.edu/r/datasets.php"&gt;HELP&lt;/a&gt; study.&lt;br /&gt;&lt;br /&gt;In this entry, we revisit the permutation test using other functions.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;We describe a simpler interface to carry out and visualize permutation tests using the functions from the &lt;a href="http://cran.r-project.org/web/packages/mosaic/index.html"&gt;mosaic&lt;/a&gt; package.  &lt;br /&gt;&lt;br /&gt;We begin by replicating our previous example (section 2.6.4, p. 87).&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/r/data/help.csv")&lt;br /&gt;library(coin)&lt;br /&gt;numsim = 1000&lt;br /&gt;oneway_test(age ~ as.factor(female), &lt;br /&gt;  distribution=approximate(B=numsim-1), data=ds)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;which yields the following output:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt; Approximative 2-Sample Permutation Test&lt;br /&gt;&lt;br /&gt;data:  age by as.factor(female) (0, 1) &lt;br /&gt;Z = -0.9194, p-value = 0.3894&lt;br /&gt;alternative hypothesis: true mu is not equal to 0 &lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;We conclude that there is minimal evidence to contradict the null hypothesis that the two groups have the same ages back in their respective populations.&lt;br /&gt;&lt;br /&gt;Now we demonstrate another way to run this test in a more general form, using the mosaic package's &lt;tt&gt;do()&lt;/tt&gt; function combined with the &lt;tt&gt;*&lt;/tt&gt; operator to repeatedly carry out fitting a linear model with a parameter for &lt;tt&gt;female&lt;/tt&gt; which will calculate our test statistic (difference in means between females and males) repeatedly after shuffling the group indicators. The &lt;tt&gt;shuffle()&lt;/tt&gt; function permutes the group labels, and then the summary statistic is calculated.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; library(mosaic)&lt;br /&gt;&gt; obsdiff = with(ds, mean(age[female==1]) - mean(age[female==0]))&lt;br /&gt;&gt; obsdiff&lt;br /&gt;     mean &lt;br /&gt;0.7841284 &lt;br /&gt;&gt; summary(age ~ female, data=ds, fun=mean)&lt;br /&gt;age    N=453&lt;br /&gt;&lt;br /&gt;+-------+---+---+--------+&lt;br /&gt;|       |   |N  |mean    |&lt;br /&gt;+-------+---+---+--------+&lt;br /&gt;|female |No |346|35.46821|&lt;br /&gt;|       |Yes|107|36.25234|&lt;br /&gt;+-------+---+---+--------+&lt;br /&gt;|Overall|   |453|35.65342|&lt;br /&gt;+-------+---+---+--------+&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Now we can run the permutation test, then display the results on a souped-up histogram with different shading for values larger in magnitude than the observed statistic (see above).&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;res = do(numsim) * lm(age ~ shuffle(female), data=ds)&lt;br /&gt;pvalue = sum(abs(res$female) &gt; abs(obsdiff)) / numsim&lt;br /&gt;xhistogram(~ female, groups = abs(female) &gt; abs(obsdiff), &lt;br /&gt;  n=50, density=TRUE, data=res, xlab="difference between groups",&lt;br /&gt;  main=paste("Permutation test result: p=", round(pvalue, 3)))&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The results are similar to those from the previous test: there is little evidence to contradict the null hypothesis.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;In SAS, we'll take another approach, delving into the capabilities of &lt;tt&gt;proc iml&lt;/tt&gt; to make a manual permutation test.  We begin by reading the data and replicating the example in the book.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;libname k 'c:\book';&lt;br /&gt;proc npar1way data = k.help;&lt;br /&gt;class female;&lt;br /&gt;var age;&lt;br /&gt;exact scores=data / mc n= 9999 alpha = .05;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;                   Data Scores One-Way Analysis&lt;br /&gt;&lt;br /&gt;                  Chi-Square               0.8453&lt;br /&gt;                  DF                            1&lt;br /&gt;                  Pr &gt; Chi-Square          0.3579&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Permuting data is a very awkward thing to do in &lt;tt&gt;data&lt;/tt&gt; steps.  But it turns out to be easy in &lt;tt&gt;proc iml&lt;/tt&gt; (the built-in SAS matrix language).  Here we read in the key variables from the data set (&lt;tt&gt;use&lt;/tt&gt; and &lt;tt&gt;read&lt;/tt&gt;).  Then we generate the permutations (&lt;tt&gt;ranperm&lt;/tt&gt;).  However, this generates row for each permuted data set, while we need a column for each, so we transpose the matrix (&lt;tt&gt;t&lt;/tt&gt;) before saving it.  Then we save the resulting data in a SAS data set with the &lt;tt&gt;female&lt;/tt&gt; variable.  Note that we permuted the ages only, as opposed to the R example-- it doesn't matter which is permuted, of course.   Much of the &lt;tt&gt;proc iml&lt;/tt&gt; code used here can be found in section 1.9 of the book-- however, note that curly braces are required in the &lt;tt&gt;read&lt;/tt&gt; statement, as shown below.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc iml;&lt;br /&gt;use k.help;&lt;br /&gt;read all var{female age} into x;&lt;br /&gt;p = t(ranperm(x[,2],1000));&lt;br /&gt;paf = x[,1]||p;&lt;br /&gt;create newds from paf;&lt;br /&gt;append from paf;&lt;br /&gt;quit;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;With the permuted data in hand, we use &lt;tt&gt;proc ttest&lt;/tt&gt; (section 2.4.1) with the &lt;tt&gt;ODS&lt;/tt&gt; system to generate and save the differences.  Note that the default variable names from &lt;tt&gt;proc iml&lt;/tt&gt; are fairly nondescript.  With the 1000 permuted statistics in hand, we can generate a histogram of the statistics and a p-value with proc univariate.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ods output conflimits=diff;&lt;br /&gt;proc ttest data=newds plots=none;&lt;br /&gt;  class col1;&lt;br /&gt;  var col2 - col1001;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;proc univariate data=diff;&lt;br /&gt;  where method = "Pooled";&lt;br /&gt;  var mean;&lt;br /&gt;  histogram mean / normal;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;data diff2;&lt;br /&gt;set diff;&lt;br /&gt;absdiff = abs(mean);&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;proc univariate data=diff2&lt;br /&gt;  loccount mu0 = 0.7841284;&lt;br /&gt;  where method = "Pooled";&lt;br /&gt;  var absdiff;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;                     Location Counts: Mu0=0.78&lt;br /&gt;&lt;br /&gt;                     Count                Value&lt;br /&gt;&lt;br /&gt;                     Num Obs &gt; Mu0          357&lt;br /&gt;                     Num Obs ^= Mu0        1000&lt;br /&gt;                     Num Obs &lt; Mu0          643&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-847E00RT-uE/TqXAI0ecRBI/AAAAAAAADME/KK-qHoQQ-Aw/s1600/permhist.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://3.bp.blogspot.com/-847E00RT-uE/TqXAI0ecRBI/AAAAAAAADME/KK-qHoQQ-Aw/s400/permhist.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5667146963873448978" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-1528019020125181450?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=1jkoWORkXJY:q7ae5BOU0UY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=1jkoWORkXJY:q7ae5BOU0UY:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=1jkoWORkXJY:q7ae5BOU0UY:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=1jkoWORkXJY:q7ae5BOU0UY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=1jkoWORkXJY:q7ae5BOU0UY:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=1jkoWORkXJY:q7ae5BOU0UY:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=1jkoWORkXJY:q7ae5BOU0UY:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=1jkoWORkXJY:q7ae5BOU0UY:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/1jkoWORkXJY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/1528019020125181450/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/10/example-912-simpler-ways-to-carry-out.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1528019020125181450?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1528019020125181450?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/1jkoWORkXJY/example-912-simpler-ways-to-carry-out.html" title="Example 9.12: simpler ways to carry out permutation tests" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-PZbI9xtKI5k/TpmbmWlT-FI/AAAAAAAAAFY/VamXrVRSeXw/s72-c/Rplot01.png" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/10/example-912-simpler-ways-to-carry-out.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0IEQn0-fSp7ImA9WhRTEUw.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-7531046408436431123</id><published>2011-10-30T08:43:00.007-04:00</published><updated>2011-10-31T21:38:23.355-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-31T21:38:23.355-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mosaic package" /><category scheme="http://www.blogger.com/atom/ns#" term="Ken Beath" /><category scheme="http://www.blogger.com/atom/ns#" term="summary statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="proc tabulate" /><title>Proc tabulate for simple statistics (corrected)</title><content type="html">Ken Beath, of &lt;a href="http://www.mq.edu.au/"&gt;Macquarie University&lt;/a&gt;, commented on an earlier &lt;a href="http://sas-and-r.blogspot.com/2011/10/example-99-simplifying-r-using-mosaic.html"&gt;entry&lt;/a&gt; that the best way to generate summary statistics is using &lt;tt&gt;proc tabulate&lt;/tt&gt;.  While the best tools might differ, depending on the purpose, we wanted to share Ken's code demonstrating how to replicate the R &lt;a href="http://cran.r-project.org/web/packages/mosaic/index.html"&gt;mosaic&lt;/a&gt; package tables using &lt;tt&gt;proc tabulate&lt;/tt&gt;.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;Ken's fully annotated code is appended below; we highlight the key syntax elements here.  Reading in the data is shown in many examples.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc tabulate data=help;&lt;br /&gt; class substance;&lt;br /&gt; var cesd;&lt;br /&gt; table (substance all),cesd*(n nmiss mean median);&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The &lt;tt&gt;class&lt;/tt&gt; and &lt;tt&gt;var&lt;/tt&gt; statements serve their usual purpose of identifying the categorical and analysis variables.  The &lt;tt&gt;table&lt;/tt&gt; statement does the work of the procedure, specifying a table with rows (requested before the comma) for each level of &lt;tt&gt;substance&lt;/tt&gt; and overall, with columns (requested after the the *) to include the listed statistics for the analysis variable &lt;tt&gt;cesd&lt;/tt&gt;.  The resulting table is shown below.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;    ------------------------------------------------------------------------&lt;br /&gt;    |                  |                       cesd                        |&lt;br /&gt;    |                  |---------------------------------------------------|&lt;br /&gt;    |                  |     N      |   NMiss    |    Mean    |   Median   |&lt;br /&gt;    |------------------+------------+------------+------------+------------|&lt;br /&gt;    |substance         |            |            |            |            |&lt;br /&gt;    |------------------|            |            |            |            |&lt;br /&gt;    |alcohol           |      177.00|        0.00|       34.37|       36.00|&lt;br /&gt;    |------------------+------------+------------+------------+------------|&lt;br /&gt;    |cocaine           |      152.00|        0.00|       29.42|       30.00|&lt;br /&gt;    |------------------+------------+------------+------------+------------|&lt;br /&gt;    |heroin            |      124.00|        0.00|       34.87|       35.00|&lt;br /&gt;    |------------------+------------+------------+------------+------------|&lt;br /&gt;    |All               |      453.00|        0.00|       32.85|       34.00|&lt;br /&gt;    ------------------------------------------------------------------------&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Below we show Ken's code, complete with his helpful annotations.  Note his use of formats.  We're not fond of formats, but for presentation, as opposed to analysis, they can be very useful.&lt;br /&gt;&lt;br /&gt;(Note: a prior version of this entry inexplicably referred to &lt;tt&gt;proc report&lt;/tt&gt;, rather than &lt;tt&gt;proc tabulate&lt;/tt&gt;.)&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;PROC IMPORT OUT= help&lt;br /&gt;  DATAFILE= "C:\Users\kbeath\Documents\tabulate\help.csv" &lt;br /&gt;  DBMS=CSV REPLACE;&lt;br /&gt;  GETNAMES=YES;&lt;br /&gt;  DATAROW=2; &lt;br /&gt;RUN;&lt;br /&gt;&lt;br /&gt;/* missing option create a category missing for each categorical &lt;br /&gt;    variable, always a good idea;&lt;br /&gt;  the table statement specifies row then column;&lt;br /&gt;  so for this example we have substance defining the rows, and &lt;br /&gt;    cesd statistics the columns */&lt;br /&gt;&lt;br /&gt;proc tabulate data=help missing;&lt;br /&gt; class substance;&lt;br /&gt; var cesd;&lt;br /&gt; table substance,cesd*(mean n nmiss);&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;/* formchar specifies the characters used to form the borders &lt;br /&gt;      - we set them all to blank to have no borders;&lt;br /&gt;   mean*f=8.2 specifies that the mean is formatted using &lt;br /&gt;      an 8.2 format, etc*/&lt;br /&gt;&lt;br /&gt;proc tabulate data=help missing formchar='           ';&lt;br /&gt; class substance;&lt;br /&gt; var cesd;&lt;br /&gt; table substance,cesd*(mean*f=8.2 n*f=8. nmiss*f=8.);&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;/* substance="Substance" causes change in label to Substance */&lt;br /&gt;&lt;br /&gt;proc format;&lt;br /&gt; value $subf "alcohol"="Alcohol" &lt;br /&gt;          "cocaine"="Cocaine" "heroin"="Heroin";&lt;br /&gt;&lt;br /&gt;proc tabulate data=help missing formchar='           ';&lt;br /&gt; class substance;&lt;br /&gt; var cesd;&lt;br /&gt; format substance $subf.;&lt;br /&gt; table substance="Substance",&lt;br /&gt;            cesd="CESD"*(mean*f=8.2 n*f=8. nmiss*f=8.);&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;/* all the statistics. I've changed the format to 7.2 &lt;br /&gt;     so they all fit on a line */&lt;br /&gt;&lt;br /&gt;proc tabulate data=help missing formchar='           ';&lt;br /&gt; class substance;&lt;br /&gt; var cesd;&lt;br /&gt; format substance $subf.;&lt;br /&gt; table substance="Substance",&lt;br /&gt;            cesd="CESD"*(n*f=8. nmiss*f=8. &lt;br /&gt;            (mean std min q1 median q3 max)*f=7.2);&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;/* add a line for all */&lt;br /&gt;&lt;br /&gt;proc tabulate data=help missing formchar='           ';&lt;br /&gt; class substance;&lt;br /&gt; var cesd;&lt;br /&gt; format substance $subf.;&lt;br /&gt; table (substance="Substance" all),&lt;br /&gt;            cesd="CESD"*(n*f=8. nmiss*f=8. &lt;br /&gt;            (mean std min q1 median q3 max)*f=7.2);&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;/* to show how easy it is, further subdivide &lt;br /&gt;   by racial group */&lt;br /&gt;&lt;br /&gt;proc tabulate data=help missing formchar='           ';&lt;br /&gt; class substance racegrp;&lt;br /&gt; var cesd;&lt;br /&gt; format substance $subf.;&lt;br /&gt; table (racegrp all)*(substance="Substance" all),&lt;br /&gt;            cesd="CESD"*(n*f=8. nmiss*f=8. &lt;br /&gt;            (mean std min q1 median q3 max)*f=7.2);&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-7531046408436431123?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=MUrO1f0ppJA:xh7sdmlti2g:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=MUrO1f0ppJA:xh7sdmlti2g:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=MUrO1f0ppJA:xh7sdmlti2g:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=MUrO1f0ppJA:xh7sdmlti2g:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=MUrO1f0ppJA:xh7sdmlti2g:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=MUrO1f0ppJA:xh7sdmlti2g:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=MUrO1f0ppJA:xh7sdmlti2g:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=MUrO1f0ppJA:xh7sdmlti2g:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/MUrO1f0ppJA" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/7531046408436431123/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/10/proc-report-for-simple-statistics.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7531046408436431123?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7531046408436431123?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/MUrO1f0ppJA/proc-report-for-simple-statistics.html" title="Proc tabulate for simple statistics (corrected)" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/10/proc-report-for-simple-statistics.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUMEQXg_fCp7ImA9WhdaFUs.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-6667707065691061242</id><published>2011-10-25T14:30:00.027-04:00</published><updated>2011-10-25T14:30:00.644-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-25T14:30:00.644-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="time series" /><category scheme="http://www.blogger.com/atom/ns#" term="ts()" /><category scheme="http://www.blogger.com/atom/ns#" term="date formats" /><category scheme="http://www.blogger.com/atom/ns#" term="abline()" /><category scheme="http://www.blogger.com/atom/ns#" term="proc gplot" /><category scheme="http://www.blogger.com/atom/ns#" term="Bureau of Labor Statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="plot.ts()" /><category scheme="http://www.blogger.com/atom/ns#" term="axis statement" /><category scheme="http://www.blogger.com/atom/ns#" term="mtext()" /><category scheme="http://www.blogger.com/atom/ns#" term="reflabel option" /><category scheme="http://www.blogger.com/atom/ns#" term="offset option" /><category scheme="http://www.blogger.com/atom/ns#" term="href option" /><category scheme="http://www.blogger.com/atom/ns#" term="job creation" /><category scheme="http://www.blogger.com/atom/ns#" term="col option" /><title>Example 9.11: Employment plot</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-9mbpuUqNxPA/TpiAesWHYII/AAAAAAAADKs/bf53u-YhW7U/s1600/jobs1.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 287px;" src="http://4.bp.blogspot.com/-9mbpuUqNxPA/TpiAesWHYII/AAAAAAAADKs/bf53u-YhW7U/s400/jobs1.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5663417796206747778" /&gt;&lt;/a&gt;&lt;br /&gt;A facebook friend posted the picture reproduced above-- it makes the case that President Obama has been a successful creator of jobs, and also paints GW Bush as a president who lost jobs.  Another friend pointed out that to be fair, all of Bush's presidency ought to be included.  Let's make a fair plot of job growth and loss.  Data can be retrieved from the &lt;a href="http://data.bls.gov/cgi-bin/surveymost"&gt;Bureau of Labor Statistics&lt;/a&gt;, where Nick will be spending his next sabbatical.  The extract we use below is also available from the book &lt;a href="http://www.math.smith.edu/sasr/datasets/bls.csv"&gt;website&lt;/a&gt;.  This particular table reports the cumulative change over the past three months, adjusting for seasonal trends.  This tends to smooth out the line.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;The first job is to get the data into SAS.  Here we demonstrate reading it directly from a URL, as outlined in section 1.1.6.  &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;filename myurl&lt;br /&gt;   url "http://www.math.smith.edu/sasr/datasets/bls.csv";&lt;br /&gt;       &lt;br /&gt;data q_change;&lt;br /&gt;   infile myurl delimiter=',';&lt;br /&gt;input  Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Annual;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The raw data are in a pretty inconvenient format for plotting.  To make a long, narrow data set with a row for each month, we'll use &lt;tt&gt;proc transpose&lt;/tt&gt; (section 1.5.3) to flip each year on its side.  Then, to attach a date to each measure, we'll use the compress function.  First we add "01" (the first of the month) to the month name, which is in a variable created by &lt;tt&gt;proc transpose&lt;/tt&gt; with the default name "_name_".  Then we tack on the year variable, and input the string in the date format.  The resulting variable is a SAS date (number of days since 12/31/1959, see section 1.6.1).&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc transpose data=q_change out=q2;&lt;br /&gt;  by year;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;data q3;&lt;br /&gt;set q2;&lt;br /&gt;  date1 = input(compress("01"||_name_||year),date11.);&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Now the data are ready to plot.  It would probably be possible to use &lt;tt&gt;proc sgplot&lt;/tt&gt; but &lt;tt&gt;proc gplot&lt;/tt&gt; is more flexible and allows better control for presentation graphics.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;title "3-month change in private-sector jobs, seasonally adjusted";&lt;br /&gt;axis1 minor = none label = (h=2 angle = 90 "Thousands of jobs")&lt;br /&gt;  value = (h = 2);&lt;br /&gt;axis2 minor = none value = (h=2)label = none &lt;br /&gt;  offset = (1cm, -5cm)&lt;br /&gt;  reflabel = (h=1.5 "Truman" "Eisenhower" "Kennedy/Johnson" &lt;br /&gt;    "Nixon/Ford" "Carter" "Reagan" "GHW Bush" "Clinton" "GW Bush" "Obama" );&lt;br /&gt;symbol1 i=j v=none w=3;&lt;br /&gt;&lt;br /&gt;proc gplot data=q3;&lt;br /&gt;plot col1 * date1 / vaxis=axis1 haxis=axis2 vref=0 &lt;br /&gt;  href = '12apr1945'd '21jan1953'd '20jan1961'd '20jan1969'd &lt;br /&gt;    '20jan1977'd '21jan1981'd '20jan1989'd '20jan1993'd &lt;br /&gt;    '20jan2001'd '20jan2009'd;&lt;br /&gt;format date1 monyy6.;&lt;br /&gt;run;&lt;br /&gt;quit;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Much of the syntax above has been demonstrated in our book examples and blog entries.  What may be unfamiliar is the use of the &lt;tt&gt;href&lt;/tt&gt; option in the &lt;tt&gt;plot&lt;/tt&gt; statement and the &lt;tt&gt;reflabel&lt;/tt&gt; option in the &lt;tt&gt;axis&lt;/tt&gt; statement.  The former draws reference lines at the listed values in the plot, while the latter adds titles to these lines.  The resulting plot is shown here.&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-4Ba8VhXbJ70/TpiDUPHeQ6I/AAAAAAAADK4/bfyXrPNEo6M/s1600/job%2Bgrowth.bmp"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 250px;" src="http://1.bp.blogspot.com/-4Ba8VhXbJ70/TpiDUPHeQ6I/AAAAAAAADK4/bfyXrPNEo6M/s400/job%2Bgrowth.bmp" border="0" alt=""id="BLOGGER_PHOTO_ID_5663420915096896418" /&gt;&lt;/a&gt;&lt;br /&gt;Looking fairly across postwar presidencies, only the Kennedy/Johnson and Clinton years were mostly unmarred by periods with large losses in jobs.  The Carter years were also times jobs were consistently added.  While the graphic shared on facebook overstates the case against GW Bush, it fairly shows Obama as a job creator thus far, to the extent a president can be credited with jobs created on his watch.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;The main trick in R is loading the data and getting it into the correct format.&lt;br /&gt;here we use &lt;tt&gt;cbind()&lt;/tt&gt; to grab the appropriate columns, then transpose that matrix and turn it into a vector which serves as input for making a time series object with the &lt;tt&gt;ts()&lt;/tt&gt; command (as in section 4.2.8).  Once this is created, the default plot for a time series object is close to what we have in mind. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/sasr/datasets/bls.csv", &lt;br /&gt;              header=FALSE)&lt;br /&gt;jobs = with(ds, cbind(V2, V3, V4, V5, V6, V7, V8, V9, V10,&lt;br /&gt;                      V11, V12, V13))&lt;br /&gt;jobsts = ts(as.vector(t(jobs)), start=c(1945, 1), &lt;br /&gt;            frequency=12)&lt;br /&gt;plot(jobsts, plot.type="single", col=4,&lt;br /&gt;     ylab="number of jobs (in thousands)")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;All that remains is to add the reference lines for 0 jobs and the presidencies.  The lines are most easily added with the &lt;tt&gt;abline()&lt;/tt&gt; function (section 5.2.1).  Easier than adding labels for the lines within the plot function will be to use the &lt;tt&gt;mtext()&lt;/tt&gt; function to place the labels in the margins.  We'll write a little function to save a few keystrokes by plotting the line and adding the label together.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;abline(h=0)&lt;br /&gt;presline = function(date,line,name){&lt;br /&gt;  mtext(at=date,text= name, line=line)&lt;br /&gt;  abline(v = date)&lt;br /&gt;}&lt;br /&gt;presline(1946,1,"Truman")&lt;br /&gt;presline(1953,2,"Eisenhower")&lt;br /&gt;presline(1961,1,"Kennedy/Johnson")&lt;br /&gt;presline(1969,2,"Nixon/Ford")&lt;br /&gt;presline(1977,1,"Carter")&lt;br /&gt;presline(1981,2,"Reagan")&lt;br /&gt;presline(1989,1,"GHW Bush")&lt;br /&gt;presline(1993,2,"Clinton")&lt;br /&gt;presline(2001,1,"GW Bush")&lt;br /&gt;presline(2009,2,"Obama")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;It might be worthwhile to standardize the number of jobs to the population size, since the dramatic loss of jobs due to demobilization after the Second World War during a single month in 1945 (2.4 million) represented 1.7% of the population, while the recent loss of 2.3 million jobs in 2009 represented only 0.8% of the population.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-v-xdOW3h-vo/TqV02HuW9qI/AAAAAAAADL4/7Xe3WVfW5OE/s1600/emplot2.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 257px;" src="http://2.bp.blogspot.com/-v-xdOW3h-vo/TqV02HuW9qI/AAAAAAAADL4/7Xe3WVfW5OE/s400/emplot2.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5667064179250886306" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-6667707065691061242?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=GGIHFVEmB5s:2zk84D6avw4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=GGIHFVEmB5s:2zk84D6avw4:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=GGIHFVEmB5s:2zk84D6avw4:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=GGIHFVEmB5s:2zk84D6avw4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=GGIHFVEmB5s:2zk84D6avw4:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=GGIHFVEmB5s:2zk84D6avw4:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=GGIHFVEmB5s:2zk84D6avw4:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=GGIHFVEmB5s:2zk84D6avw4:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/GGIHFVEmB5s" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/6667707065691061242/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/10/example-911-employment-plot.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6667707065691061242?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6667707065691061242?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/GGIHFVEmB5s/example-911-employment-plot.html" title="Example 9.11: Employment plot" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-9mbpuUqNxPA/TpiAesWHYII/AAAAAAAADKs/bf53u-YhW7U/s72-c/jobs1.jpg" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/10/example-911-employment-plot.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkcMQXoyeSp7ImA9WhdbGEs.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-2288070908810046111</id><published>2011-10-17T11:08:00.000-04:00</published><updated>2011-10-17T11:08:00.491-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-17T11:08:00.491-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="rpart package" /><category scheme="http://www.blogger.com/atom/ns#" term="recursive partitioning" /><category scheme="http://www.blogger.com/atom/ns#" term="HELP data set" /><category scheme="http://www.blogger.com/atom/ns#" term="partykit package" /><category scheme="http://www.blogger.com/atom/ns#" term="regression trees" /><title>Example 9.10: more regression trees and recursive partitioning with "partykit"</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-7qM0bOZULTA/TphWH7a6P4I/AAAAAAAAAFM/tYh1hTCtXBA/s1600/Rplot01.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 400px;" src="http://4.bp.blogspot.com/-7qM0bOZULTA/TphWH7a6P4I/AAAAAAAAAFM/tYh1hTCtXBA/s400/Rplot01.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5663371225628032898" /&gt;&lt;/a&gt;&lt;br /&gt;We discuss &lt;a href="http://en.wikipedia.org/wiki/Recursive_partitioning"&gt;recursive partitioning&lt;/a&gt;, a technique for classification and regression using a decision tree in section 6.7.3 of the book.  Support for these methods is available within the &lt;a href="http://cran.r-project.org/web/packages/rpart/index.html"&gt;rpart&lt;/a&gt; package.  &lt;a href="http://www.statistik.lmu.de/~hothorn/"&gt;Torsten Hothorn&lt;/a&gt; and &lt;a href="http://eeecon.uibk.ac.at/~zeileis/"&gt;Achim Zeileis&lt;/a&gt; have extended the support for these methods with the &lt;a href="http://cran.r-project.org/web/packages/partykit/index.html"&gt;partykit&lt;/a&gt; package, which provides a toolkit with infrastructure for representing, summarizing, and visualizing tree-structured regression and classification models.&lt;br /&gt;&lt;br /&gt;In this entry, we revisit the example from the book, which worked to classify predictors of homelessness in the HELP study.  &lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/r/data/help.csv")&lt;br /&gt;library(rpart); library(partykit)&lt;br /&gt;ds$sub = as.factor(ds$substance)&lt;br /&gt;homeless.rpart = rpart(homeless ~ female + i1 + sub + sexrisk + mcs +&lt;br /&gt;  pcs, method="class", data=ds)&lt;br /&gt;plot(homeless.rpart)&lt;br /&gt;text(homeless.rpart)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;This reproduces Figure 6.2 (p. 236) from the book, while we can display the output from the classification tree using the &lt;tt&gt;printcp()&lt;/tt&gt; command.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; printcp(homeless.rpart)&lt;br /&gt;Classification tree:&lt;br /&gt;rpart(formula = home ~ female + i1 + sub + sexrisk + mcs + pcs, &lt;br /&gt;    data = ds, method = "class")&lt;br /&gt;Variables actually used in tree construction:&lt;br /&gt;[1] female  i1      mcs     pcs     sexrisk&lt;br /&gt;&lt;br /&gt;Root node error: 209/453 = 0.5&lt;br /&gt;n= 453 &lt;br /&gt;    CP nsplit rel error xerror xstd&lt;br /&gt;1 0.10      0       1.0    1.0 0.05&lt;br /&gt;2 0.05      1       0.9    1.1 0.05&lt;br /&gt;3 0.03      4       0.8    1.1 0.05&lt;br /&gt;4 0.02      5       0.7    1.0 0.05&lt;br /&gt;5 0.01      7       0.7    0.9 0.05&lt;br /&gt;6 0.01      9       0.7    0.9 0.05&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Using the partykit package, we can make a nice graphic describing these results. We'll use the &lt;tt&gt;plot.party()&lt;/tt&gt; function on a party object (coerced from the rpart object generated above using &lt;tt&gt;as.party()&lt;/tt&gt;).  This provides more information about the tree (as seen in the Figure above).&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;plot(as.party(homeless.rpart), type="simple")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;More information as well as a lovely vignette can be found &lt;a href="http://cran.r-project.org/web/packages/partykit/vignettes/partykit.pdf"&gt;here&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;Recursive partitioning is available through SAS Enterprise Miner, a module not always included in SAS installations.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-2288070908810046111?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-494AGVnvI0:lhT5PPrLgI8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-494AGVnvI0:lhT5PPrLgI8:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-494AGVnvI0:lhT5PPrLgI8:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-494AGVnvI0:lhT5PPrLgI8:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-494AGVnvI0:lhT5PPrLgI8:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=-494AGVnvI0:lhT5PPrLgI8:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-494AGVnvI0:lhT5PPrLgI8:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=-494AGVnvI0:lhT5PPrLgI8:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/-494AGVnvI0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/2288070908810046111/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/10/example-910-more-regression-trees-and.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/2288070908810046111?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/2288070908810046111?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/-494AGVnvI0/example-910-more-regression-trees-and.html" title="Example 9.10: more regression trees and recursive partitioning with &quot;partykit&quot;" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-7qM0bOZULTA/TphWH7a6P4I/AAAAAAAAAFM/tYh1hTCtXBA/s72-c/Rplot01.png" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/10/example-910-more-regression-trees-and.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0QMR3o4fSp7ImA9WhdbFU8.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-5486725777184061716</id><published>2011-10-13T11:35:00.009-04:00</published><updated>2011-10-13T11:56:26.435-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-13T11:56:26.435-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mean()" /><category scheme="http://www.blogger.com/atom/ns#" term="mosaic package" /><category scheme="http://www.blogger.com/atom/ns#" term="statistical education" /><category scheme="http://www.blogger.com/atom/ns#" term="sd()" /><category scheme="http://www.blogger.com/atom/ns#" term="summary()" /><category scheme="http://www.blogger.com/atom/ns#" term="favstats()" /><category scheme="http://www.blogger.com/atom/ns#" term="densityplot()" /><category scheme="http://www.blogger.com/atom/ns#" term="lattice library" /><title>Example 9.9: Simplifying R using the mosaic package (part 1)</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-oJ2rwoGTpiU/TpTdPCeBj0I/AAAAAAAAAFA/dX5C2QJxmwo/s1600/Rplot.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 327px;" src="http://4.bp.blogspot.com/-oJ2rwoGTpiU/TpTdPCeBj0I/AAAAAAAAAFA/dX5C2QJxmwo/s400/Rplot.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5662393881942134594" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;While both SAS and R are powerful systems for statistical analysis, they can be frustrating to new users or those learning statistics for the first time. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;The &lt;a href="http://cran.r-project.org/web/packages/mosaic/index.html"&gt;mosaic&lt;/a&gt; package is designed to help simplify the interface for such new users, while allowing them to undertake sophisticated analyses.  &lt;br /&gt;&lt;br /&gt;As an example of how the package simplifies life for the novice user, consider calculating summary statistics and displaying a densityplot for the CESD (measure of depressive symptom) scores by substance abuse group in the &lt;a href="http://www.math.smith.edu/sasr/datasets.php"&gt;HELP dataset&lt;/a&gt;.  Doing this in R without the package would require mastering a package such as plyr  to replicate results by substance or a typing-intensive use of syntax to select rows corresponding to each substance.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/r/data/help.csv")&lt;br /&gt;library(mosaic)&lt;br /&gt;options(digits=3)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;After loading the data and the package, and setting the number of digits to a more reasonable default, we can call the &lt;tt&gt;mean()&lt;/tt&gt; function to easily calculate this statistic (denoted by S in the result) for each of the three substance abuse groups alcohol, cocaine or heroin.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; mean(cesd ~ substance, data=ds)&lt;br /&gt;  substance    S   N Missing&lt;br /&gt;1   alcohol 34.4 177       0&lt;br /&gt;2   cocaine 29.4 152       0&lt;br /&gt;3    heroin 34.9 124       0&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Similar results are seen when we calculate the standard deviations per group:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; sd(cesd ~ substance, data=ds)&lt;br /&gt;  substance    S   N Missing&lt;br /&gt;1   alcohol 12.1 177       0&lt;br /&gt;2   cocaine 13.4 152       0&lt;br /&gt;3    heroin 11.2 124       0&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Another function can calculate a raft of summary statistics for each group that are nicely formatted.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; summary(cesd ~ substance, data=ds, fun=favstats)&lt;br /&gt;cesd    N=453&lt;br /&gt;+---------+-------+---+----+---+-------+---+----+-----+----+---+--------+&lt;br /&gt;|         |       |N  |min |Q1 |median |Q3 |max |mean |sd  |n  |missing |&lt;br /&gt;+---------+-------+---+----+---+-------+---+----+-----+----+---+--------+&lt;br /&gt;|substance|alcohol|177|4   |26 |36     |42 |58  |34.4 |12.1|177|0       |&lt;br /&gt;|         |cocaine|152|1   |19 |30     |39 |60  |29.4 |13.4|152|0       |&lt;br /&gt;|         |heroin |124|4   |28 |35     |43 |56  |34.9 |11.2|124|0       |&lt;br /&gt;+---------+-------+---+----+---+-------+---+----+-----+----+---+--------+&lt;br /&gt;|Overall  |       |453|1   |25 |34     |41 |60  |32.8 |12.5|453|0       |&lt;br /&gt;+---------+-------+---+----+---+-------+---+----+-----+----+---+--------+&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;These commands allow quick review of the data to ensure, for example, that assumptions of equal variance are justified, or that coding errors or missing values haven't crept in.&lt;br /&gt;&lt;br /&gt;A graphical depiction using a set of densityplots (shown above) can be created using the command:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;densityplot(~ cesd, group=substance, data=ds, auto.key=TRUE)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;We're unaware of any similar program that attempts to simplify SAS syntax for educational use.  To replicate the above results, we would use the &lt;tt&gt;means&lt;/tt&gt; and &lt;tt&gt;sgpanel&lt;/tt&gt; procedures.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data ds;&lt;br /&gt;set "C:\book\help.sas7bdat";&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;options ls=80;&lt;br /&gt;proc means data=ds fw=4&lt;br /&gt;  min q1 median q3 max mean std nmiss n;&lt;br /&gt;  class substance;&lt;br /&gt;  var cesd;&lt;br /&gt;run;&lt;br /&gt;                     Analysis Variable : CESD&lt;br /&gt;&lt;br /&gt;              N           Lower             Upper               Std&lt;br /&gt; SUBSTANCE  Obs   Min  Quartile  Median  Quartile   Max  Mean   Dev&lt;br /&gt; ------------------------------------------------------------------&lt;br /&gt; alcohol    177  4.00      26.0    36.0      42.0  58.0  34.4  12.1&lt;br /&gt; cocaine    152  1.00      19.0    30.0      39.0  60.0  29.4  13.4&lt;br /&gt; heroin     124  4.00      28.0    35.0      43.0  56.0  34.9  11.2&lt;br /&gt; ------------------------------------------------------------------&lt;br /&gt;&lt;br /&gt;                                 N     N&lt;br /&gt;                    SUBSTANCE  Obs  Miss      N&lt;br /&gt;                    ---------------------------&lt;br /&gt;                    alcohol    177     0    177&lt;br /&gt;                    cocaine    152     0    152&lt;br /&gt;                    heroin     124     0    124&lt;br /&gt;                    ---------------------------&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;After reading the data in, the &lt;tt&gt;means&lt;/tt&gt;procedure can produce any of the desired statistics (plus may others) directly.  To replicate the &lt;tt&gt;mosaic&lt;/tt&gt; package in printing a single statistic, list only that statistic in the &lt;tt&gt;proc means&lt;/tt&gt; statement.  Note that the overall statistic in the R table is not included.  To replicate that row, you would re-run the above code, omitting the &lt;tt&gt;class&lt;/tt&gt; statement.&lt;br /&gt;&lt;br /&gt;To the best of our knowledge, there still does not exist an easy way to plot multiple densities in a single SAS plot.  In example 2.6.4 we show how it can be done using &lt;tt&gt;proc kde&lt;/tt&gt;, saving the density estimates, and plotting separately.  (&lt;a href="http://www.math.smith.edu/sasr/examples.php"&gt;Code&lt;/a&gt; for this is included at the book web site.)  But in the interest of simple code, we show a simpler method using &lt;tt&gt;proc sgpanel&lt;/tt&gt;.  The result, show below, is less useful than the R plot from the the &lt;tt&gt;mosaic&lt;/tt&gt; package, but still gets the point across.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc sgpanel data=ds;&lt;br /&gt;  panelby substance / columns=1;&lt;br /&gt;  density cesd / type=kernel;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-r3fF-Zes8PY/TpWoved8f7I/AAAAAAAADKM/Yw4Xn1Qet1Y/s1600/three%2Bdensities.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 300px; height: 400px;" src="http://1.bp.blogspot.com/-r3fF-Zes8PY/TpWoved8f7I/AAAAAAAADKM/Yw4Xn1Qet1Y/s400/three%2Bdensities.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5662617640074248114" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-5486725777184061716?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=FnLc-zzIRoU:p-Uts6EvVts:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=FnLc-zzIRoU:p-Uts6EvVts:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=FnLc-zzIRoU:p-Uts6EvVts:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=FnLc-zzIRoU:p-Uts6EvVts:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=FnLc-zzIRoU:p-Uts6EvVts:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=FnLc-zzIRoU:p-Uts6EvVts:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=FnLc-zzIRoU:p-Uts6EvVts:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=FnLc-zzIRoU:p-Uts6EvVts:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/FnLc-zzIRoU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/5486725777184061716/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/10/example-99-simplifying-r-using-mosaic.html#comment-form" title="5 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/5486725777184061716?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/5486725777184061716?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/FnLc-zzIRoU/example-99-simplifying-r-using-mosaic.html" title="Example 9.9: Simplifying R using the mosaic package (part 1)" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-oJ2rwoGTpiU/TpTdPCeBj0I/AAAAAAAAAFA/dX5C2QJxmwo/s72-c/Rplot.png" height="72" width="72" /><thr:total>5</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/10/example-99-simplifying-r-using-mosaic.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUACQXw6fSp7ImA9WhdUF04.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-6123062771318785516</id><published>2011-10-04T10:16:00.006-04:00</published><updated>2011-10-04T10:16:00.215-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-10-04T10:16:00.215-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="R2winbugs" /><category scheme="http://www.blogger.com/atom/ns#" term="Markov Chain Monte Carlo" /><category scheme="http://www.blogger.com/atom/ns#" term="proc mcmc" /><category scheme="http://www.blogger.com/atom/ns#" term="OpenBUGS" /><category scheme="http://www.blogger.com/atom/ns#" term="WinBUGS" /><category scheme="http://www.blogger.com/atom/ns#" term="proc genmod" /><category scheme="http://www.blogger.com/atom/ns#" term="rjags" /><category scheme="http://www.blogger.com/atom/ns#" term="clustering" /><category scheme="http://www.blogger.com/atom/ns#" term="JAGS" /><category scheme="http://www.blogger.com/atom/ns#" term="Bayesian methods" /><category scheme="http://www.blogger.com/atom/ns#" term="random statement" /><category scheme="http://www.blogger.com/atom/ns#" term="MCMC" /><title>Example 9.8: New stuff in SAS 9.3-- Bayesian random effects models in Proc MCMC</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-Y-EFt4mH9Kg/ToNhX3uLjlI/AAAAAAAADJA/aGUpg50ObQQ/s1600/mcmc1.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://4.bp.blogspot.com/-Y-EFt4mH9Kg/ToNhX3uLjlI/AAAAAAAADJA/aGUpg50ObQQ/s400/mcmc1.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5657472619630005842" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Rounding off our reports on major new developments in SAS 9.3, today we'll talk about &lt;tt&gt;proc mcmc&lt;/tt&gt; and the &lt;tt&gt;random&lt;/tt&gt; statement.&lt;br /&gt;&lt;br /&gt;Stand-alone packages for fitting very general Bayesian models using Markov chain Monte Carlo (MCMC) methods have been available for quite some time now.  The best known of these are BUGS and its derivatives &lt;a href="http://www.mrc-bsu.cam.ac.uk/bugs/winbugs/contents.shtml"&gt;WinBUGS&lt;/a&gt; (last updated in 2007) and &lt;a href="http://www.openbugs.info/"&gt;OpenBUGS&lt;/a&gt; .  There are also some packages available that call these tools from R.&lt;br /&gt;&lt;br /&gt;Today we'll consider a relatively simple model: Clustered Poisson data where cluster means are a constant plus a cluster-specific exponentially-distributed random effect.  To be clear:&lt;br /&gt;y_ij ~ Poisson(mu_i)&lt;br /&gt;log(mu_i) = B_0 + r_i&lt;br /&gt;r_i ~ Exponential(lambda)&lt;br /&gt;Of course in Bayesian thinking all effects are random-- here we use the term in the sense of cluster-specific effects.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;Several SAS procedures have a &lt;tt&gt;bayes&lt;/tt&gt; statement that allow some specific models to be fit.  For example, in Section 6.6 and &lt;a href="http://sas-and-r.blogspot.com/2010/12/example-817-logistic-regression-via.html"&gt;example 8.17&lt;/a&gt;, we show Bayesian Poisson and logistic regression, respectively, using &lt;tt&gt;proc genmod&lt;/tt&gt;.  But our example today is a little unusual, and we could not find a canned procedure for it.  For these more general problems, SAS has &lt;tt&gt;proc mcmc&lt;/tt&gt;, which in SAS 9.3 allows random effects to be easily modeled.  &lt;br /&gt;&lt;br /&gt;We begin by generating the data, and fitting the naive (unclustered) model.  We set B_0 = 1 and lambda = 0.4.  There are 200 clusters of 10 observations each, which we might imagine represent 10 students from each of 200 classrooms.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data test2;&lt;br /&gt;truebeta0 = 1;&lt;br /&gt;randscale = .4;&lt;br /&gt;call streaminit(1944);&lt;br /&gt;  do i = 1 to 200;&lt;br /&gt;    randint = rand("EXPONENTIAL") * randscale;&lt;br /&gt;    do ni = 1 to 10;&lt;br /&gt;      mu = exp(truebeta0  + randint); &lt;br /&gt;      y = rand("POISSON", mu);&lt;br /&gt;      output;&lt;br /&gt;    end;&lt;br /&gt;  end;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;proc genmod data = test2;&lt;br /&gt;model y = / dist=poisson;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;                      Standard       Wald 95%     &lt;br /&gt;Parameter  Estimate     Error   Confidence Limits&lt;br /&gt;&lt;br /&gt;Intercept    1.4983    0.0106    1.4776    1.5190&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Note the inelegant SAS syntax for fitting an intercept-only model.  The result is pretty awful-- 50% bias with respect to the global mean.  Perhaps we'll do better by acknowledging the clustering.  We might try that with normally distributed random effects in &lt;tt&gt;proc glimmix&lt;/tt&gt;.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc glimmix data = test2 method=laplace;&lt;br /&gt;class i;&lt;br /&gt;model y = / dist = poisson solution;&lt;br /&gt;random int / subject = i type = un;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;   Cov                               Standard&lt;br /&gt;   Parm       Subject    Estimate       Error&lt;br /&gt;   UN(1,1)    i            0.1682     0.01841&lt;br /&gt;&lt;br /&gt;                       Standard&lt;br /&gt;Effect     Estimate     Error  t Value  Pr &gt; |t|&lt;br /&gt;Intercept    1.3805   0.03124    44.20    &lt;.0001&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;No joy-- still a 40% bias in the estimated mean.  And the variance of the random effects is biased by more than 50%!  Let's try fitting the model that generated the data.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc mcmc data=test2 nmc=10000 thin=10 seed=2011;&lt;br /&gt;parms fixedint 1 gscale 0.4;&lt;br /&gt;&lt;br /&gt;prior fixedint ~ normal(0, var = 10000);&lt;br /&gt;prior gscale ~ igamma(.01 , scale = .01 ) ;&lt;br /&gt;&lt;br /&gt;random rint ~ gamma(shape=1, scale=gscale) subject = i initial=0.0001;&lt;br /&gt;mu = exp(fixedint + rint);&lt;br /&gt;model y ~ poisson(mu);&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The key points of the &lt;tt&gt;proc mcmc&lt;/tt&gt; statement are &lt;tt&gt;nmc&lt;/tt&gt;, the total number of Monte Carlo iterations to perform, and &lt;tt&gt;thin&lt;/tt&gt;, which includes only every nth sample for inference.  The &lt;tt&gt;prior&lt;/tt&gt; and &lt;tt&gt;model&lt;/tt&gt; statements are fairly obvious; we note that in more complex models, parameters that are listed within a single &lt;tt&gt;prior&lt;/tt&gt; statement are sampled as a block.  We're placing priors on the fixed (shared) intercept and the scale of the exponential.  The &lt;tt&gt;mu&lt;/tt&gt; line is actually just a programming statement-- it uses the same syntax as data step programming.  &lt;br /&gt;The newly available statement is &lt;tt&gt;random&lt;/tt&gt;.   The syntax here is similar to those for the other priors, with the addition of the &lt;tt&gt;subject&lt;/tt&gt; option, which generates a unique parameter for each level of the subject variable.  The random effects themselves can be used in later statements, as shown, to enter into data distributions.  A final note here is that the exponential distribution isn't explicitly available, but since the gamma distribution with shape fixed at 1 defines the exponential, this is not a problem.  Here are the key results.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;           Posterior Summaries&lt;br /&gt;&lt;br /&gt;                               Standard        &lt;br /&gt;  Parameter        N     Mean Deviation  &lt;br /&gt;  fixedint      1000   1.0346    0.0244  &lt;br /&gt;  gscale        1000   0.3541    0.0314  &lt;br /&gt;&lt;br /&gt;           Posterior Intervals&lt;br /&gt;&lt;br /&gt; Parameter    Alpha        HPD Interval&lt;br /&gt; fixedint     0.050      0.9834      1.0791&lt;br /&gt; gscale       0.050      0.2937      0.4163&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The 95% HPD regions include the true values of the parameters and the posterior means are much less biased than in the model assuming normal random effects.&lt;br /&gt;&lt;br /&gt;As usual, MCMC models should be evaluated carefully for convergence and coverage.  In this example, I have some concerns (see default diagnostic figure above) and if it were real data I would want to do more.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;The &lt;a href="http://cran.r-project.org/web/views/Bayesian.html"&gt;CRAN task view on Bayesian Inference&lt;/a&gt; includes a summary of tools for general and model-specific MCMC tools.  However, there is nothing like &lt;tt&gt;proc mcmc&lt;/tt&gt; in terms of being a general and easy to use tool that is native to R.  The nearest options are to use R front ends to WinBUGS/OpenBUGS (R2WinBUGS) or JAGS (rjags).  (A brief worked &lt;a href="http://www.johnmyleswhite.com/notebook/2010/08/20/using-jags-in-r-with-the-rjags-package/"&gt;example&lt;/a&gt; of using rjags was posted last year by John Myles White.)  Alternatively, with some math and a little sweat, the &lt;tt&gt;mcmc&lt;/tt&gt; package would also work.  We'll explore an approach through one or more of these packages in a later entry, and would welcome a collaboration from anyone who would like to take that on.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-6123062771318785516?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=Y_9cbuprB3s:Q7-rxJY9f-Q:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=Y_9cbuprB3s:Q7-rxJY9f-Q:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=Y_9cbuprB3s:Q7-rxJY9f-Q:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=Y_9cbuprB3s:Q7-rxJY9f-Q:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=Y_9cbuprB3s:Q7-rxJY9f-Q:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=Y_9cbuprB3s:Q7-rxJY9f-Q:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=Y_9cbuprB3s:Q7-rxJY9f-Q:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=Y_9cbuprB3s:Q7-rxJY9f-Q:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/Y_9cbuprB3s" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/6123062771318785516/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/10/example-98-new-stuff-in-sas-93-bayesian.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6123062771318785516?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6123062771318785516?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/Y_9cbuprB3s/example-98-new-stuff-in-sas-93-bayesian.html" title="Example 9.8: New stuff in SAS 9.3-- Bayesian random effects models in Proc MCMC" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-Y-EFt4mH9Kg/ToNhX3uLjlI/AAAAAAAADJA/aGUpg50ObQQ/s72-c/mcmc1.png" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/10/example-98-new-stuff-in-sas-93-bayesian.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8AQXg5fCp7ImA9WhdUEU4.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-7498970926291888461</id><published>2011-09-27T10:14:00.006-04:00</published><updated>2011-09-27T10:14:00.624-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-27T10:14:00.624-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="frailty models" /><category scheme="http://www.blogger.com/atom/ns#" term="Cox proportional hazards model" /><category scheme="http://www.blogger.com/atom/ns#" term="survival package" /><category scheme="http://www.blogger.com/atom/ns#" term="simulate data" /><category scheme="http://www.blogger.com/atom/ns#" term="survival analysis" /><category scheme="http://www.blogger.com/atom/ns#" term="random statement" /><category scheme="http://www.blogger.com/atom/ns#" term="proc phreg" /><title>Example 9.7: New stuff in SAS 9.3-- Frailty models</title><content type="html">Shared frailty models are a way of allowing correlated observations into &lt;a href="http://en.wikipedia.org/wiki/Proportional_hazards_models"&gt;proportional hazards models&lt;/a&gt;.  Briefly, instead of l_i(t) = l_0(t)e^(x_iB), we allow l_ij(t) = l_0(t)e^(x_ijB + g_i), where observations j are in clusters i, g_i is typically normal with mean 0, and g_i is uncorrelated with g_i'.  The nomenclature frailty comes from examining the logs of the g_i and rewriting the model as l_ij(t) = l_0(t)u_i*e^(xB) where the u_i are now lognormal with median 0.  Observations j within cluster i share the frailty u_i, and fail faster (are frailer) than average if u_i &gt; 1.&lt;br /&gt;&lt;br /&gt;In SAS 9.2, this model could not be fit, though it is included in the &lt;tt&gt;survival&lt;/tt&gt; package in R.  (Section 4.3.2)  With SAS 9.3, it can now be fit.  We explore here through simulation, extending the approach shown in &lt;a href="http://sas-and-r.blogspot.com/2010/03/example-730-simulate-censored-survival.html"&gt;example 7.30&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;To include frailties in the model, we loop across the clusters to first generate the frailties, then insert the loop from example 7.30, which now represents the observations within cluster, adding the frailty to the survival time model.  There's no need to adjust the censoring time.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data simfrail;&lt;br /&gt;  beta1 = 2;&lt;br /&gt;  beta2 = -1;&lt;br /&gt;  lambdat = 0.002; *baseline hazard;&lt;br /&gt;  lambdac = 0.004; *censoring hazard;&lt;br /&gt;  do i = 1 to 250; *new frailty loop;&lt;br /&gt;    frailty = normal(1999) * sqrt(.5);&lt;br /&gt;    do j = 1 to 5; *original loop;&lt;br /&gt;      x1 = normal(0);&lt;br /&gt;      x2 = normal(0);&lt;br /&gt;      * new model of event time, with frailty added;&lt;br /&gt;      linpred = exp(-beta1*x1 - beta2*x2 + frailty);&lt;br /&gt;      t = rand("WEIBULL", 1, lambdaT * linpred);&lt;br /&gt;        * time of event;&lt;br /&gt;      c = rand("WEIBULL", 1, lambdaC);&lt;br /&gt;        * time of censoring;&lt;br /&gt;      time = min(t, c);    * which came first?;&lt;br /&gt;      censored = (c lt t);&lt;br /&gt;    output;&lt;br /&gt; end;&lt;br /&gt;  end;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;For comparison's sake, we replicate the naive model assuming independence:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc phreg data=simfrail;&lt;br /&gt;  model time*censored(1) = x1 x2;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;               Parameter   Standard                         Hazard&lt;br /&gt; Parameter DF   Estimate      Error Chi-Square Pr &gt; ChiSq    Ratio&lt;br /&gt;&lt;br /&gt; x1         1    1.68211    0.05859   824.1463     &lt;.0001    5.377&lt;br /&gt; x2         1   -0.88585    0.04388   407.4942     &lt;.0001    0.412&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The parameter estimates are rather biased.  In contrast, here is the correct frailty model.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc phreg data=simfrail;&lt;br /&gt;  class i;&lt;br /&gt;  model time*censored(1) = x1 x2;&lt;br /&gt;  random i / noclprint;&lt;br /&gt;run;&lt;br /&gt;                    Cov         REML    Standard&lt;br /&gt;                    Parm    Estimate       Error&lt;br /&gt;&lt;br /&gt;                    i         0.5329     0.07995&lt;br /&gt;&lt;br /&gt;               Parameter   Standard                         Hazard&lt;br /&gt; Parameter DF   Estimate      Error Chi-Square Pr &gt; ChiSq    Ratio&lt;br /&gt;&lt;br /&gt; x1         1    2.03324    0.06965   852.2179     &lt;.0001    7.639&lt;br /&gt; x2         1   -1.00966    0.05071   396.4935     &lt;.0001    0.364&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;This returns estimates gratifyingly close to the truth.  The syntax of the &lt;tt&gt;random&lt;/tt&gt; statement is fairly straightforward-- the &lt;tt&gt;noclprint&lt;/tt&gt; option prevents printing all the values of &lt;tt&gt;i&lt;/tt&gt;.  The clustering variable must be specified in the &lt;tt&gt;class&lt;/tt&gt; statement.  The output shows the estimated variance of the g_i.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;In our book (section 4.16.14) we show an example of fitting the uncorrelated data model, but we don't display a frailty model.  Here, we use the data generated in SAS, so we omit the data simulation in R.  As in SAS, it would be a trivial extension of the code presented in &lt;a href="http://sas-and-r.blogspot.com/2010/03/example-730-simulate-censored-survival.html"&gt;example 7.30&lt;/a&gt;.  For parallelism, we show the results of ignoring the correlation, first.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; library(survival)&lt;br /&gt;&gt; with(simfrail, coxph(formula = Surv(time, 1-censored) ~ x1 + x2))&lt;br /&gt;&lt;br /&gt;     coef exp(coef) se(coef)     z p&lt;br /&gt;x1  1.682     5.378   0.0586  28.7 0&lt;br /&gt;x2 -0.886     0.412   0.0439 -20.2 0&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;with identical results to above.  Note that the &lt;tt&gt;Surv&lt;/tt&gt; function expects an indicator of the event, vs. SAS expecting a censoring indicator.  &lt;br /&gt;&lt;br /&gt;As with SAS, the syntax for incorporating the frailty is simple.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; with(simfrail, coxph(formula = Surv(time, 1-censored) ~ x1 + x2 &lt;br /&gt;    + frailty(i)))&lt;br /&gt;&lt;br /&gt;            coef se(coef) se2    Chisq DF  p&lt;br /&gt;x1          2.02 0.0692   0.0662 850     1 0&lt;br /&gt;x2         -1.00 0.0506   0.0484 393     1 0&lt;br /&gt;frailty(i)                       332   141 0&lt;br /&gt;&lt;br /&gt;Variance of random effect= 0.436&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Here, the results differ slightly from the SAS model, but still return parameter estimates that are quite similar.  We're not familiar enough with the computational methods to diagnose the differences.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-7498970926291888461?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=SWqbdbuxoZs:sAMtf0v09DY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=SWqbdbuxoZs:sAMtf0v09DY:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=SWqbdbuxoZs:sAMtf0v09DY:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=SWqbdbuxoZs:sAMtf0v09DY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=SWqbdbuxoZs:sAMtf0v09DY:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=SWqbdbuxoZs:sAMtf0v09DY:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=SWqbdbuxoZs:sAMtf0v09DY:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=SWqbdbuxoZs:sAMtf0v09DY:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/SWqbdbuxoZs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/7498970926291888461/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/09/example-97-new-stuff-in-sas-93-frailty.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7498970926291888461?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7498970926291888461?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/SWqbdbuxoZs/example-97-new-stuff-in-sas-93-frailty.html" title="Example 9.7: New stuff in SAS 9.3-- Frailty models" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/09/example-97-new-stuff-in-sas-93-frailty.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C04CQHo7fip7ImA9WhdVFkw.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-4099352612946442459</id><published>2011-09-21T09:30:00.001-04:00</published><updated>2011-09-21T09:32:41.406-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-21T09:32:41.406-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="customizing plots" /><category scheme="http://www.blogger.com/atom/ns#" term="missing data modeling" /><category scheme="http://www.blogger.com/atom/ns#" term="comparing models" /><category scheme="http://www.blogger.com/atom/ns#" term="assumptions" /><category scheme="http://www.blogger.com/atom/ns#" term="graphics" /><category scheme="http://www.blogger.com/atom/ns#" term="multiple imputation" /><title>Example 9.6: Model comparison plots (Completed)</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-0HGL_FHHWbQ/TnnfsIBxYxI/AAAAAAAAAE4/v1jb1zUNw1c/s1600/Rplot.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 327px;" src="http://1.bp.blogspot.com/-0HGL_FHHWbQ/TnnfsIBxYxI/AAAAAAAAAE4/v1jb1zUNw1c/s400/Rplot.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5654796756302521106" /&gt;&lt;/a&gt;&lt;br /&gt;We often work in settings where the data set has a lot of missing data-- some missingness in the (many) covariates, some in the main exposure of interest, and still more in the outcome. (Nick describes this as "job security for statisticians").&lt;br /&gt;&lt;br /&gt;Some analysts are leery of imputing anything at all, preferring to rely on the assumption that the data are missing completely at random.  Others will use multiple imputation for covariates, but feel they should use "real" data for the exposure and outcome.  Still others will impute the exposure but not the outcome.  Theory and experiments suggest (&lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/16980150"&gt;Moons et al JCE 2006&lt;/a&gt;) that all missing data should be imputed.  Depending on the imputation method, this may offer some protection against missing data that missing at random, more general than missing completely at random.&lt;br /&gt;&lt;br /&gt;In one analysis, we decided to use each of these approaches and demonstrate the results that would be obtained.  The data are shown below.  The first column denotes the data used, the second has the effect on the mean and CI limits for the effect.  How can we present these results clearly?  We designed a graphic that requires some customization using either SAS or R but which makes the point elegantly.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;1 .11  &lt;br /&gt;1 -.05&lt;br /&gt;1 .28 &lt;br /&gt;2 .07 &lt;br /&gt;2 .21 &lt;br /&gt;2 -.07&lt;br /&gt;3 .06 &lt;br /&gt;3 -.08&lt;br /&gt;3 .2 &lt;br /&gt;4 0 &lt;br /&gt;4 -.13&lt;br /&gt;4 .12 &lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;The SAS version is shown below.  (Click on it for a larger image.)  To generate it, add a final column to the data, where the effect estimate is repeated but the other values are not.  Then a basic plot can be created in &lt;tt&gt;proc gplot&lt;/tt&gt; with the &lt;tt&gt;hiloc&lt;/tt&gt; interpolation in the &lt;tt&gt;symbol&lt;/tt&gt; statement and the &lt;tt&gt;overlay&lt;/tt&gt; option in the &lt;tt&gt;plot&lt;/tt&gt; statement.  (See book section 5.3 and other blog &lt;a href="http://sas-and-r.blogspot.com/2011/03/example-830-compare-poisson-and.html"&gt;entries&lt;/a&gt; for details.)   Try the code without the &lt;tt&gt;axis&lt;/tt&gt; statements to see what happens.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data ke1;&lt;br /&gt;input datatype estimate meanval;&lt;br /&gt;cards;&lt;br /&gt;1 .11 .11 &lt;br /&gt;1 -.05 .&lt;br /&gt;1 .28 .&lt;br /&gt;2 .07 .07&lt;br /&gt;2 .21 .&lt;br /&gt;2 -.07 .&lt;br /&gt;3 .06 .06&lt;br /&gt;3 -.08 .&lt;br /&gt;3 .2 .&lt;br /&gt;4 0 0&lt;br /&gt;4 -.13 .&lt;br /&gt;4 .12 .&lt;br /&gt;;;&lt;br /&gt;cards;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;symbol1 i=hiloc c=black v=none;&lt;br /&gt;symbol2 i=none v=dot h=1  c=black;&lt;br /&gt;axis1 minor=none order = (1 to 4 by 1)&lt;br /&gt;  value = (tick = 1 "Complete" &lt;br /&gt;             justify=c  "Case" justify = c "(N = 2055)"&lt;br /&gt;           tick=2 "MI" justify=c  "Covariates only" &lt;br /&gt;             justify=c "(N = 2961)"&lt;br /&gt;           tick=3 "MI" justify=c  "Covariates and exposure" &lt;br /&gt;             justify=c "(N = 3994)"&lt;br /&gt;           tick=4 "MI" justify=c  "All variables" &lt;br /&gt;             justify=c "(N = 6782)"&lt;br /&gt;           )&lt;br /&gt;  label = none&lt;br /&gt;  offset = (2 cm, 2 cm)&lt;br /&gt;;&lt;br /&gt;axis2 minor=none order = (-.2 to .3 by .1) &lt;br /&gt;  label = (angle=90 "Effect of exposure on outcome");&lt;br /&gt;title "Compare missingness approaches";&lt;br /&gt;proc gplot data = ke1;&lt;br /&gt;plot (estimate meanval) * datatype / &lt;br /&gt;  overlay haxis=axis1 vref=0 vaxis=axis2;&lt;br /&gt;run;&lt;br /&gt;quit;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The two &lt;tt&gt;axis&lt;/tt&gt; statements make the plot work.  The &lt;tt&gt;axis1&lt;/tt&gt; statement uses the &lt;tt&gt;value&lt;/tt&gt; option to hand-write the labels describing the data sets.  Note that the &lt;tt&gt;justify = c&lt;/tt&gt; causes a new line to be started.  The &lt;tt&gt;offset&lt;/tt&gt; option adds a little space to the left and right of the data.   The &lt;tt&gt;axis2&lt;/tt&gt; statement specifies the range and label for the vertical axis.  The extra &lt;tt&gt;symbol&lt;/tt&gt; statement and the &lt;tt&gt;overlay&lt;/tt&gt; option just plot the dots that call attention to the effect estimates-- otherwise they would show just a small crossbar at the effect.&lt;br /&gt;&lt;br /&gt;The plot suggests that as more observations are included and the multiple imputation gains accuracy the effect attenuates and the standard errors decrease.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;In R we create the equivalent plot in multiple steps, first by creating an empty plot of the correct size then iterating through each of the lines.  As with the SAS approach, a little manipulation of the raw data is required. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;n = c(2055, 2961, 3994, 6782)&lt;br /&gt;labels = c("Complete Case", "MI\ncovariates only",&lt;br /&gt;           "MI\ncovariates and exposure",&lt;br /&gt;           "MI\nall variables")&lt;br /&gt;est =    c(0.11,  0.07,  0.06,  0)&lt;br /&gt;lower = c(-0.05, -0.07, -0.08, -0.13)&lt;br /&gt;upper =  c(0.28,  0.21,  0.20,  0.12)&lt;br /&gt;&lt;br /&gt;plot(c(0.5, 4.5), c(min(lower)-.10, max(upper)), type="n", xlab="", &lt;br /&gt;  xaxt="n", ylab="Effect of exposure on outcome")&lt;br /&gt;title("Compare missingness approaches")&lt;br /&gt;for (i in 1:length(n)) {&lt;br /&gt;  points(i, est[i])&lt;br /&gt;  lines(c(i,i), c(lower[i], upper[i]))&lt;br /&gt;  stringval = paste(labels[i],"\n(N=",n[i],")")&lt;br /&gt;  text(i, min(lower) - .05, stringval, cex=.6)&lt;br /&gt;}&lt;br /&gt;abline(h=0, lty=2)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The resulting plot is shown at the top.  As opposed to the SAS approach, more of the figure can be defined using the data.  For example, the y-axis values are determined from the minimum and maximum values to plot.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-UOxUz3qayuk/Tnnk7hXGEYI/AAAAAAAADHg/wBalSUuYDH8/s1600/MI%2Bexamples.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://4.bp.blogspot.com/-UOxUz3qayuk/Tnnk7hXGEYI/AAAAAAAADHg/wBalSUuYDH8/s400/MI%2Bexamples.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5654802518359019906" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Note: a draft of this entry was published accidentally.  Many apologies. --Ken&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-4099352612946442459?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UqtxVmJLBvM:1W0KLS8raek:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UqtxVmJLBvM:1W0KLS8raek:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UqtxVmJLBvM:1W0KLS8raek:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UqtxVmJLBvM:1W0KLS8raek:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UqtxVmJLBvM:1W0KLS8raek:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=UqtxVmJLBvM:1W0KLS8raek:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=UqtxVmJLBvM:1W0KLS8raek:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=UqtxVmJLBvM:1W0KLS8raek:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/UqtxVmJLBvM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/4099352612946442459/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/09/example-96-model-comparison-plots.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/4099352612946442459?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/4099352612946442459?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/UqtxVmJLBvM/example-96-model-comparison-plots.html" title="Example 9.6: Model comparison plots (Completed)" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-0HGL_FHHWbQ/TnnfsIBxYxI/AAAAAAAAAE4/v1jb1zUNw1c/s72-c/Rplot.png" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/09/example-96-model-comparison-plots.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkICQXY7eip7ImA9WhdWGU4.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-7293485842820602839</id><published>2011-09-13T14:13:00.005-04:00</published><updated>2011-09-13T14:29:20.802-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-13T14:29:20.802-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="flexmix package" /><category scheme="http://www.blogger.com/atom/ns#" term="mixtools package" /><category scheme="http://www.blogger.com/atom/ns#" term="FLXPmultinom function" /><category scheme="http://www.blogger.com/atom/ns#" term="proc fmm" /><category scheme="http://www.blogger.com/atom/ns#" term="FLXMRglmfix function" /><category scheme="http://www.blogger.com/atom/ns#" term="proc glm" /><category scheme="http://www.blogger.com/atom/ns#" term="finite mixture models" /><title>Example 9.5: New stuff in SAS 9.3-- proc FMM</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-j-PJ3er7jRU/Tmoa5euo02I/AAAAAAAADGo/Yn-gGOun3is/s1600/ANCOVAPlot1.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://2.bp.blogspot.com/-j-PJ3er7jRU/Tmoa5euo02I/AAAAAAAADGo/Yn-gGOun3is/s400/ANCOVAPlot1.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5650358257293251426" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Finite mixture models (FMMs) can be used in settings where some unmeasured classification separates the observed data into groups with different exposure/outcome relationships.  One familiar example of this is a zero-inflated model, where some observations come from a degenerate distribution with all mass at 0.  In that case the exposure/outcome relationship is less interesting in the degenerate distribution group, but there would be considerable interest in the estimated probability of group membership.  Another possibly familiar setting is the estimation of a continuous density as a mixture of normal distributions.&lt;br /&gt;&lt;br /&gt;More generally, there could be several groups, with "concomitant" covariates predicting group membership.  Each group might have different sets of predictors and outcomes from different distribution families.  On the other hand, in a "homogenous" mixture setting, all groups have the same distributional form, but with different parameter values.  If the covariates in the model are the same, this setting is similar to an ordinary regression model where every observed covariate is interacted with the (unobserved) group membership variable.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;SAS 9.3 includes the "experimental" &lt;tt&gt;FMM&lt;/tt&gt; procedure to fit these models.  We're unsure what criteria SAS uses to decide when a procedure is experimental and when it becomes "production", but experimental procedures in SAS/STAT usually do become production eventually.  &lt;br /&gt;&lt;br /&gt;As hinted at above, the generality implies by the models is fairly vast, and the FMM procedure includes a lot of generality.  The most obvious limitation is that it requires independent observations.&lt;br /&gt;&lt;br /&gt;We'll demonstrate with a simulated data set.  We create a variable &lt;tt&gt;x&lt;/tt&gt; that predicts both group membership and an outcome &lt;tt&gt;y&lt;/tt&gt; with different linear regression parameters depending on group.  The mixing probability follows a logistic regression with intercept=-2 and slope=1.  The intercept and slope for the outcome are (0, 1) and (3, 1.2) for groups 0 and 1, respectively.  The resulting data is plotted above using a default plot from &lt;tt&gt;proc glm&lt;/tt&gt; using the actual group membership.  A histogram and nearest normal density for the residuals from a simple OLS are shown, with R code, below-- they appear more or less normal.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data fmmtest;&lt;br /&gt;do i = 1 to 5000;&lt;br /&gt;  x = normal(0);&lt;br /&gt;  group = (exp(-1 + 2*x)/(1 + exp(-1 + 2*x))) &lt;br /&gt;    gt uniform(0);&lt;br /&gt;  y = (group * 3) + ((1 + group/5) * x) + &lt;br /&gt;    normal(0) * sqrt(1);&lt;br /&gt;  output;&lt;br /&gt;  end;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;title "Fixed k = 2";&lt;br /&gt;proc fmm data = fmmtest;&lt;br /&gt;  model y = x / k=2 equate=scale;&lt;br /&gt;  probmodel x;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;In the &lt;tt&gt;model&lt;/tt&gt; statement, the option &lt;tt&gt;k&lt;/tt&gt; defines how many groups (or "components") are to be included.  There are also options &lt;tt&gt;kmin&lt;/tt&gt; and &lt;tt&gt;kmax&lt;/tt&gt; which will fit models with various numbers of components and report results of one of them based on some model criterion.  The &lt;tt&gt;equate&lt;/tt&gt; option allows the user to force some elements of the component distributions to be equal.  Here we force the residuals to be equal, since that's the model that generated our data.  The &lt;tt&gt;probmodel&lt;/tt&gt; statement is how the concomitant variables enter the model.  The default settings model a logistic (or generalized logit) regression using the listed covariates.&lt;br /&gt;&lt;br /&gt;We show only the parameter estimates.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;                                Standard&lt;br /&gt;Component  Effect     Estimate     Error  z Value  Pr &gt; |z|&lt;br /&gt;        1  Intercept    2.9856   0.04856    61.48    &lt;.0001&lt;br /&gt;        1  x            1.2060   0.03927    30.71    &lt;.0001&lt;br /&gt;        2  Intercept  -0.01978   0.02512    -0.79    0.4309&lt;br /&gt;        2  x            0.9889   0.02453    40.32    &lt;.0001&lt;br /&gt;        1  Variance     1.0279   0.02531&lt;br /&gt;        2  Variance     1.0279   0.02531&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;            Parameter Estimates for Mixing Probabilities&lt;br /&gt;                         Standard&lt;br /&gt;Effect       Estimate       Error    z Value    Pr &gt; |z|&lt;br /&gt;Intercept     -1.0018     0.05665     -17.68      &lt;.0001&lt;br /&gt;x              1.9403     0.07413      26.17      &lt;.0001&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The recovery of the true parameters is quite good.  This may be unsurprising given the sample size and the distinctness of the components, but we think it's pretty impressive.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;The &lt;a href="http://cran.r-project.org/web/views/Cluster.html"&gt;CRAN Task View on Cluster Analysis &amp; Finite Mixture Models&lt;/a&gt; provides an overview of packages available for R. &lt;br /&gt;&lt;br /&gt;First, we'll make the data and take a look at the simple model diagnostics.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; x = rnorm(5000)&lt;br /&gt;&gt; probgroup1 = exp(-1 + 2*x)/(1 + exp(-1 + 2*x))&lt;br /&gt;&gt; group = ifelse(probgroup1 &gt; runif(5000),1,0)&lt;br /&gt;&gt; y = (group * 3) + ((1 + group/5) * x) + rnorm(5000);&lt;br /&gt;&lt;br /&gt;&gt; resids = residuals(lm(y~x))&lt;br /&gt;&gt; hist(resids, freq = FALSE)&lt;br /&gt;&gt; dvals = seq(from=min(resids), to=max(resids),length=100)&lt;br /&gt;&gt; lines(dvals, dnorm(dvals, mean(resids), sd(resids)))&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The histogram of the residuals from the simple OLS model is shown below.  They appear reasonably normal.&lt;br /&gt;&lt;br /&gt; Ron Pearson &lt;a href="http://exploringdatablog.blogspot.com/2011/08/fitting-mixture-distributions-with-r.html"&gt;recently&lt;/a&gt; showed a simple example using the &lt;tt&gt;mixtools&lt;/tt&gt; package. We tried to replicate the example above using &lt;tt&gt;mixtools&lt;/tt&gt; but were unable to find functionality for concomitant variables.    If you fit the closest model, assuming the mixing probabilities depend on no covariates, this is what happens:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; library(mixtools)&lt;br /&gt;&gt; mixout.mt = regmixEM(y,x,k=2,arbvar = FALSE)&gt; summary(mixout.mt)&lt;br /&gt;summary of regmixEM object:&lt;br /&gt;  comp 1   comp 2&lt;br /&gt;lambda  0.5275915 0.472408&lt;br /&gt;sigma   1.0553877 1.055388&lt;br /&gt;beta1  -0.0035015 2.237736&lt;br /&gt;beta2   1.5068146 2.004008&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;which is pretty awful.&lt;br /&gt;&lt;br /&gt;Fortunately, the &lt;tt&gt;flexmix&lt;/tt&gt; package, while a bit more difficult to use, offers the needed flexibility.  As a side note, the generality of the package is pretty awe-inspiring.  Nice work!&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(flexmix)&lt;br /&gt;mixout.fm=flexmix(y~x, k=2, model=FLXMRglmfix(y~x, varFix=TRUE),&lt;br /&gt;   concomitant=FLXPmultinom(~ x))&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The &lt;tt&gt;flexmix&lt;/tt&gt; function uses a variety of special objects that are created by other functions the package provides.  Here we use the &lt;tt&gt;FLXMRglmfix&lt;/tt&gt; function to force equal variances across the components and the &lt;tt&gt;FLXPmultinom&lt;/tt&gt; function to define the logistic regression on the covariate &lt;tt&gt;x&lt;/tt&gt;.  The results are:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; parameters(mixout5)&lt;br /&gt;                   Comp.1       Comp.2&lt;br /&gt;coef.(Intercept) 3.000412 -0.008360171&lt;br /&gt;coef.x           1.190454  1.047660263&lt;br /&gt;sigma            0.972306  0.972306015&lt;br /&gt;&gt; parameters(mixout5, which = "concomitant")&lt;br /&gt;            1          2&lt;br /&gt;(Intercept) 0  0.9605367&lt;br /&gt;x           0 -1.9109429&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;which also reproduces reality with impressive accuracy.  Note that the concomitant variable model apparently predicts membership in the second component, so the signs are reversed from the generating model.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-oC8XBmLMkGU/Tm-L1a0WEdI/AAAAAAAADHE/5nKQaTfEiEY/s1600/histresids.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 327px;" src="http://1.bp.blogspot.com/-oC8XBmLMkGU/Tm-L1a0WEdI/AAAAAAAADHE/5nKQaTfEiEY/s400/histresids.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5651889807221461458" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-7293485842820602839?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=AF7MHZDbdl8:NkJGNYDPpEg:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=AF7MHZDbdl8:NkJGNYDPpEg:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=AF7MHZDbdl8:NkJGNYDPpEg:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=AF7MHZDbdl8:NkJGNYDPpEg:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=AF7MHZDbdl8:NkJGNYDPpEg:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=AF7MHZDbdl8:NkJGNYDPpEg:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=AF7MHZDbdl8:NkJGNYDPpEg:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=AF7MHZDbdl8:NkJGNYDPpEg:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/AF7MHZDbdl8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/7293485842820602839/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/09/example-95-new-stuff-in-sas-93-proc-fmm.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7293485842820602839?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/7293485842820602839?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/AF7MHZDbdl8/example-95-new-stuff-in-sas-93-proc-fmm.html" title="Example 9.5: New stuff in SAS 9.3-- proc FMM" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-j-PJ3er7jRU/Tmoa5euo02I/AAAAAAAADGo/Yn-gGOun3is/s72-c/ANCOVAPlot1.png" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/09/example-95-new-stuff-in-sas-93-proc-fmm.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEAHQ3czfip7ImA9WhdVFE8.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-6849371745052059255</id><published>2011-09-06T14:12:00.007-04:00</published><updated>2011-09-19T06:05:32.986-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-09-19T06:05:32.986-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="proc mi" /><category scheme="http://www.blogger.com/atom/ns#" term="non-monotonic missingness" /><category scheme="http://www.blogger.com/atom/ns#" term="multiple imputation" /><title>Example 9.4: New stuff in SAS 9.3-- MI FCS</title><content type="html">&lt;blockquote&gt;&lt;/blockquote&gt;We begin the new academic year with a series of entries exploring new capabilities of SAS 9.3, and some functionality we haven't previously written about.&lt;br /&gt;&lt;br /&gt;We'll begin with multiple imputation.  Here, SAS has previously been limited to multivariate normal data or to monotonic missing data patterns.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;SAS 9.3 adds the &lt;tt&gt;FCS&lt;/tt&gt; statement to &lt;tt&gt;proc mi&lt;/tt&gt;.  This implements a fully conditional specification imputation method (e.g., van Buuren, S. (2007), "Multiple Imputation of Discrete and Continuous Data by Fully Conditional Specification," Statistical Methods in Medical Research, 16, 219–242.)  Briefly, we begin by imputing all the missing data with a simple method.  Then missing values for each variable are imputed using a model created with the real and current imputed values for the other variables, iterating across the variables several times.&lt;br /&gt;&lt;br /&gt;We replicate the multiple imputation example from the book, section 6.5.  In that example, we used the &lt;tt&gt;mcmc&lt;/tt&gt; statement for imputation: at the time, this was the only method available in SAS when a non-monotonic missingness pattern was present.  We noted at the time that this was not "strictly appropriate" since &lt;tt&gt;mcmc&lt;/tt&gt; method assumes multivariate normality, and two of our missing variables were dichotomous.&lt;br /&gt;&lt;pre&gt;filename myhm url "http://www.math.smith.edu/sasr/datasets/helpmiss.csv" lrecl=704;&lt;br /&gt;&lt;br /&gt;proc import replace datafile=myhm out=help dbms=dlm;&lt;br /&gt;delimiter=',';&lt;br /&gt;getnames=yes;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;proc mi data = help nimpute=20 out=helpmi20fcs;&lt;br /&gt;class homeless female;&lt;br /&gt;var i1 homeless female sexrisk indtot mcs pcs;&lt;br /&gt;fcs&lt;br /&gt; logistic (female)&lt;br /&gt; logistic (homeless);&lt;br /&gt;run;&lt;/pre&gt;In the &lt;tt&gt;fcs&lt;/tt&gt; statement, you list the method (&lt;tt&gt;logistic, discrim, reg, regpmm&lt;/tt&gt;) to be used, naming the variable for which the method is to be used in parentheses following the method.  (You can also specify a subset of covariates to be used in the method, using the usual SAS model-building syntax.)  Omitted covariates are imputed using the default &lt;tt&gt;reg&lt;/tt&gt; method.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;ods output parameterestimates=helpmipefcs&lt;br /&gt;covb = helpmicovbfcs;&lt;br /&gt;proc logistic data=helpmi20fcs descending;&lt;br /&gt;by _imputation_;&lt;br /&gt;model homeless=female i1 sexrisk indtot /covb;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;proc mianalyze parms=helpmipefcs covb=helpmicovbfcs;&lt;br /&gt;  modeleffects intercept female i1 sexrisk indtot;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;with the following primary result:&lt;br /&gt;&lt;pre&gt;Parameter    Estimate   Std Error  95% Conf. Limits&lt;br /&gt;&lt;br /&gt;intercept   -2.492733    0.591241  -3.65157  -1.33390 &lt;br /&gt;female      -0.245103    0.244029  -0.72339   0.23319&lt;br /&gt;i1           0.023207    0.005610   0.01221   0.03420&lt;br /&gt;sexrisk      0.058642    0.035803  -0.01153   0.12882&lt;br /&gt;indtot       0.047971    0.015745   0.01711   0.07883&lt;br /&gt;&lt;/pre&gt;which is quite similar to our previous results.  Given the small proportion of missing values, this isn't very surprising.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;Several R packages allow imputation for a general pattern of missingness and missing outcome distribution.  A brief summary of missing data tools in R can be found in the &lt;a href="http://cran.r-project.org/web/views/Multivariate.html"&gt;CRAN Task view on Multivariate Statistics&lt;/a&gt;.  We'll return to this topic from the R perspective in a future entry.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-6849371745052059255?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-W3B1MsI7Ok:z1bzggsoJ6I:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-W3B1MsI7Ok:z1bzggsoJ6I:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-W3B1MsI7Ok:z1bzggsoJ6I:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-W3B1MsI7Ok:z1bzggsoJ6I:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-W3B1MsI7Ok:z1bzggsoJ6I:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=-W3B1MsI7Ok:z1bzggsoJ6I:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=-W3B1MsI7Ok:z1bzggsoJ6I:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=-W3B1MsI7Ok:z1bzggsoJ6I:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/-W3B1MsI7Ok" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/6849371745052059255/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/09/example-94-new-stuff-in-sas-93-mi-fcs.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6849371745052059255?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6849371745052059255?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/-W3B1MsI7Ok/example-94-new-stuff-in-sas-93-mi-fcs.html" title="Example 9.4: New stuff in SAS 9.3-- MI FCS" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/09/example-94-new-stuff-in-sas-93-mi-fcs.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DU4GQXw4fyp7ImA9WhdREUk.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-614514173514775662</id><published>2011-07-31T17:32:00.004-04:00</published><updated>2011-07-31T17:32:00.237-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-31T17:32:00.237-04:00</app:edited><title>Taking August off!</title><content type="html">We'll be back with recharged batteries and lots of new entries in September.  Have a great summer*!&lt;br /&gt;&lt;br /&gt;As usual, please send any questions you have about using SAS or R.&lt;br /&gt;&lt;br /&gt;*Not valid in the southern hemisphere.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-614514173514775662?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=XHIM93iginw:x3QQkFyxrNk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=XHIM93iginw:x3QQkFyxrNk:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=XHIM93iginw:x3QQkFyxrNk:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=XHIM93iginw:x3QQkFyxrNk:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=XHIM93iginw:x3QQkFyxrNk:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=XHIM93iginw:x3QQkFyxrNk:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=XHIM93iginw:x3QQkFyxrNk:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=XHIM93iginw:x3QQkFyxrNk:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/XHIM93iginw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/614514173514775662/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/07/taking-august-off.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/614514173514775662?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/614514173514775662?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/XHIM93iginw/taking-august-off.html" title="Taking August off!" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/07/taking-august-off.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C08AQXw8eSp7ImA9WhdSFk8.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-3320461948713937922</id><published>2011-07-25T15:24:00.005-04:00</published><updated>2011-07-25T15:24:00.271-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-25T15:24:00.271-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="sas7bdat package" /><category scheme="http://www.blogger.com/atom/ns#" term="SAS data sets" /><category scheme="http://www.blogger.com/atom/ns#" term="convert SAS to R" /><category scheme="http://www.blogger.com/atom/ns#" term="Matt Shotwell" /><category scheme="http://www.blogger.com/atom/ns#" term="read.sas7bdat()" /><category scheme="http://www.blogger.com/atom/ns#" term="sas7bdat format" /><title>Really useful R package: sas7bdat</title><content type="html">For SAS users, one hassle in trying things in R, let alone migrating, is the difficulty of getting data out of SAS and into R.  In our book (section 1.2.2) and in a blog &lt;a href="http://sas-and-r.blogspot.com/2009/08/example-79-get-data-from-sas-into-r.html"&gt;entry&lt;/a&gt; we've covered getting data out of SAS native data sets.  Unfortunately, for all of these methods, you need a working, licensed version of SAS. &lt;br /&gt;&lt;br /&gt;However &lt;a href="http://biostat.mc.vanderbilt.edu/wiki/main/MattShotwell"&gt;Matt Shotwell&lt;/a&gt; has &lt;a href="http://biostatmatt.com/archives/1468"&gt;reverse-engineered the sas7bdat file format&lt;/a&gt;.  This means that you can now read a SAS data set &lt;i&gt;without a working copy of SAS&lt;/i&gt;.  This is a wonderful thing, and in fact SAS Institute ought to have provided this ability long ago.  The package is experimental, but it worked fine for two small data sets.  Matt tells me that as of 7/2011, the package only works for sas7bdat files generated on 32-bit Windows systems.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;Install the package sas7bdat.  The use the &lt;tt&gt;read.sas7bdat()&lt;/tt&gt; function.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(sas7bdat)&lt;br /&gt;helpfromSAS = read.sas7bdat("http://www.math.smith.edu&lt;br /&gt;/sasr/datasets/help.sas7bdat")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;(Note that newlines are not allowed in the URL in practice, but formatting for the blog required it.)&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; is.data.frame(helpfromSAS)&lt;br /&gt;[1] TRUE&lt;br /&gt;&gt; summary(helpfromSAS$MCS)&lt;br /&gt;   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. &lt;br /&gt;  6.763  21.680  28.600  31.680  40.940  62.180 &lt;br /&gt;&gt; with(helpfromSAS, summary(SUBSTANCE))&lt;br /&gt;alcohol cocaine  heroin &lt;br /&gt;    177     152     124&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;It's unclear why all the variable names are all capitalized.  That didn't happen in another trial, so it must be something about the way the help.sas7bdat data set was constructed.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-3320461948713937922?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=fwFPoUJ20U4:rgcV4Aj8J44:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=fwFPoUJ20U4:rgcV4Aj8J44:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=fwFPoUJ20U4:rgcV4Aj8J44:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=fwFPoUJ20U4:rgcV4Aj8J44:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=fwFPoUJ20U4:rgcV4Aj8J44:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=fwFPoUJ20U4:rgcV4Aj8J44:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=fwFPoUJ20U4:rgcV4Aj8J44:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=fwFPoUJ20U4:rgcV4Aj8J44:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/fwFPoUJ20U4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/3320461948713937922/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/07/really-useful-r-package-sas7bdat.html#comment-form" title="10 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/3320461948713937922?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/3320461948713937922?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/fwFPoUJ20U4/really-useful-r-package-sas7bdat.html" title="Really useful R package: sas7bdat" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>10</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/07/really-useful-r-package-sas7bdat.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C08EQX8zeyp7ImA9WhdSEE0.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-3853714244647297332</id><published>2011-07-18T11:10:00.017-04:00</published><updated>2011-07-18T11:10:00.183-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-18T11:10:00.183-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mosaic package" /><category scheme="http://www.blogger.com/atom/ns#" term="chisq.test()" /><category scheme="http://www.blogger.com/atom/ns#" term="association plot" /><category scheme="http://www.blogger.com/atom/ns#" term="expected cell counts" /><category scheme="http://www.blogger.com/atom/ns#" term="xchisq.test()" /><category scheme="http://www.blogger.com/atom/ns#" term="observed cell counts" /><category scheme="http://www.blogger.com/atom/ns#" term="mosaic plot" /><category scheme="http://www.blogger.com/atom/ns#" term="Michael Friendly" /><category scheme="http://www.blogger.com/atom/ns#" term="chi-square test" /><category scheme="http://www.blogger.com/atom/ns#" term="teaching statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="Project MOSAIC" /><title>Example 9.3: augmented display of contingency table</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-iS3SWsvXsNk/TgIlQ9M7XBI/AAAAAAAAADQ/HyzZnYtswTQ/s1600/Rplot01.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 327px;" src="http://3.bp.blogspot.com/-iS3SWsvXsNk/TgIlQ9M7XBI/AAAAAAAAADQ/HyzZnYtswTQ/s400/Rplot01.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5621096258148719634" /&gt;&lt;/a&gt;&lt;br /&gt;SAS and R often provide different levels of details from output.  This is particularly true for the descriptive analysis of contingency tables, where SAS makes it easy to display tables with additional quantities (such as the observed cell count).&lt;br /&gt;&lt;br /&gt;The &lt;a href="http://cran.r-project.org/web/packages/mosaic/index.html"&gt;mosaic&lt;/a&gt; package has added functionality to calculate these quantities in R.  We demonstrate using an example from the HELP dataset.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/r/data/help.csv")&lt;br /&gt;library(mosaic)&lt;br /&gt;ds$gender = ifelse(ds$female==1, "female", "male")&lt;br /&gt;ds$homeless = ifelse(ds$homeless==1, "homeless", "housed")&lt;br /&gt;tab = xtabs(~ gender + homeless, data=ds)&lt;br /&gt;&gt; tab&lt;br /&gt;        homeless&lt;br /&gt;gender   homeless housed&lt;br /&gt;  female       40     67&lt;br /&gt;  male        169    177&lt;br /&gt;&gt; xchisq.test(tab)&lt;br /&gt;&lt;br /&gt; Pearson's Chi-squared test with Yates' continuity correction&lt;br /&gt;&lt;br /&gt;data:  tab &lt;br /&gt;X-squared = 3.8708, df = 1, p-value = 0.04913&lt;br /&gt;&lt;br /&gt;  40.00    67.00 &lt;br /&gt;( 49.37) ( 57.63)&lt;br /&gt; [1.78]   [1.52] &lt;br /&gt;&lt;-1.33&gt;  &lt; 1.23&gt; &lt;br /&gt;   &lt;br /&gt; 169.00   177.00 &lt;br /&gt;(159.63) (186.37)&lt;br /&gt; [0.55]   [0.47] &lt;br /&gt;&lt; 0.74&gt;  &lt;-0.69&gt; &lt;br /&gt;   &lt;br /&gt;key:&lt;br /&gt; observed&lt;br /&gt; (expected)&lt;br /&gt; [contribution to X-squared]&lt;br /&gt; &lt;residual&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;We see that there is a borderline statistically significant association between gender and homeless status in the HELP study.  We interpret that we see fewer than expected females who are homeless, and more males who are homeless.&lt;br /&gt;&lt;br /&gt;Another idea is to use graphical depictions of the association in this table. One approach is a mosaic plot (note: no relation to &lt;a href="http://mosaic-web.org/"&gt;Project MOSAIC&lt;/a&gt; and the mosaic package).  A &lt;a href="http://www.childrensmercy.org/stats/definitions/mosaic.htm"&gt;mosaic plot&lt;/a&gt; starts as a square with area equal to one.  It is divided into columns based on the prevalence in each of the values for the column variable (in this case, gender).  Then each bar is divided vertically based on the conditional probability of the other variable within that category.&lt;br /&gt;&lt;br /&gt;Another graphical display of a table is the association plot.  In an association plot, there is also a box for each cell of the table.  The area of the box is proportional to the difference between the observed and expected (assuming no association) frequencies.  In a typical presentation, excess observed counts are black and above the line, while deficient counts are red and below the line.&lt;br /&gt;&lt;br /&gt;Above, we show the mosaic plot (on the left) and association plot (on the right).  Both of these displays demonstrate that there is an association.  The mosaic plot indicates that only about a quarter of the sample is female (indicated by the width of the columns), and that homelessness is present in about half the subjects (area shaded in light grey).  The slight association demonstrated is that there are fewer homeless women than expected (since the horizontal line moves down between the first and second column).  Similarly, for the association plot we note that the expected cell count is less than the observed (indicated in red with values below the line) for the female homeless group.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;par(mfrow=c(1,2))&lt;br /&gt;mosaicplot(tab, color=TRUE, main="mosaic plot")&lt;br /&gt;assocplot(tab)&lt;br /&gt;title("association plot")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;As in &lt;a href="http://sas-and-r.blogspot.com/search/label/Michael%20Friendly"&gt;Example 8.32&lt;/a&gt;, we find SAS macros for mosaic plots among the contributions of &lt;a href="http://www.datavis.ca/"&gt;Michael Friendly&lt;/a&gt;.  In this complex case, they are somewhat more difficult to access than others.  The code for the plots themselves can be downloaded &lt;a href="http://www.datavis.ca/mosaics/mosaics.html#sec:Obtain"&gt;here&lt;/a&gt;, while it's useful to also run a &lt;a href="http://www.datavis.ca/sasmac/mosaic.html"&gt;wrapper macro&lt;/a&gt;.  After downloading the files, the following code can be used to make the figure below.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;title 'Install mosaic modules';&lt;br /&gt;* location of the zipped files;&lt;br /&gt;filename mosaic  'c:\ken\sasmacros\mosaics';&lt;br /&gt;* storage location of compiled macros;&lt;br /&gt;libname  mosaic   'c:\ken\sasmacros\mosaics';&lt;br /&gt;&lt;br /&gt;* Code to read in, compile and store the macros;&lt;br /&gt;proc iml ;&lt;br /&gt;   reset storage=mosaic.mosaic;&lt;br /&gt;   %include mosaic(mosaics) ;&lt;br /&gt;   store module=_all_;&lt;br /&gt;   show storage;&lt;br /&gt;quit;&lt;br /&gt;&lt;br /&gt;* Prep: create the table, save the cell counts;&lt;br /&gt;proc freq data = "c:\book\help.sas7bdat";&lt;br /&gt;tables homeless * female / out=outhelp;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;* Read in the wrapper macro;&lt;br /&gt;%include "c:\ken\sasmacros\mosaics\mosaic.sas";&lt;br /&gt;&lt;br /&gt;* Make the plot;&lt;br /&gt;%mosaic(data=outhelp,var = female homeless, &lt;br /&gt;        sort=homeless descending female, space = 1 1);&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The &lt;tt&gt;sort&lt;/tt&gt; and &lt;tt&gt;space&lt;/tt&gt; options make the results more similar to those shown for &lt;tt&gt;mosaicplot()&lt;/tt&gt;.  In this version, the colors reflect the signs of the residuals.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-WJOllKjIhkw/Th3aMSlFlHI/AAAAAAAADEw/u_pJpWFrg3I/s1600/sasmosaic.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 235px;" src="http://2.bp.blogspot.com/-WJOllKjIhkw/Th3aMSlFlHI/AAAAAAAADEw/u_pJpWFrg3I/s400/sasmosaic.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5628895013963666546" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-3853714244647297332?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=X7Xc9X6XqmI:3H9IsDUnXEc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=X7Xc9X6XqmI:3H9IsDUnXEc:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=X7Xc9X6XqmI:3H9IsDUnXEc:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=X7Xc9X6XqmI:3H9IsDUnXEc:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=X7Xc9X6XqmI:3H9IsDUnXEc:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=X7Xc9X6XqmI:3H9IsDUnXEc:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=X7Xc9X6XqmI:3H9IsDUnXEc:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=X7Xc9X6XqmI:3H9IsDUnXEc:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/X7Xc9X6XqmI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/3853714244647297332/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/07/example-93-augmented-display-of.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/3853714244647297332?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/3853714244647297332?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/X7Xc9X6XqmI/example-93-augmented-display-of.html" title="Example 9.3: augmented display of contingency table" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-iS3SWsvXsNk/TgIlQ9M7XBI/AAAAAAAAADQ/HyzZnYtswTQ/s72-c/Rplot01.png" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/07/example-93-augmented-display-of.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUcGQXg4eyp7ImA9WhdTFE0.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-2992374824250763032</id><published>2011-07-11T13:57:00.012-04:00</published><updated>2011-07-11T13:57:00.633-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-11T13:57:00.633-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="smoothScatter()" /><category scheme="http://www.blogger.com/atom/ns#" term="pch" /><category scheme="http://www.blogger.com/atom/ns#" term="proc sgplot" /><category scheme="http://www.blogger.com/atom/ns#" term="proc kde" /><category scheme="http://www.blogger.com/atom/ns#" term="cex" /><category scheme="http://www.blogger.com/atom/ns#" term="markerattrs" /><title>Example 9.2:  Transparency and bivariate KDE</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-_tEykEX97ec/ThTMwnyCKSI/AAAAAAAADBE/v4fJAtjlNiE/s1600/transp.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://3.bp.blogspot.com/-_tEykEX97ec/ThTMwnyCKSI/AAAAAAAADBE/v4fJAtjlNiE/s400/transp.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5626346970177218850" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;In &lt;a href="http://sas-and-r.blogspot.com/2011/07/example-91-scatterplots-with-binning.html"&gt;Example 9.1&lt;/a&gt;, we showed a binning approach to plotting bivariate relationships in a large data set.  Here we show more sophisticated approaches: transparent overplotting and formal two-dimensional kernel density estimation.  We use the 10,000 simulated bivariate normals shown in Example 9.1.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;In SAS, transparency can be found in &lt;tt&gt;proc sgplot&lt;/tt&gt;, with results shown above.  The options here are fairly self-explanatory.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc sgplot data=mvnorms;&lt;br /&gt;  scatter x=x1 y=x2 / markerattrs=(symbol=CircleFilled size = .05in) &lt;br /&gt;          transparency=0.85;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The image gives a good sense of the overall density, with the darker (overplotted) areas reflecting more observations.  Overplotting was the problem we sought to avoid with the binning, but here it becomes an advantage.  &lt;br /&gt;&lt;br /&gt;Another approach is to use bivariate kernel density estimation.  This is perhaps more similar to the binning shown previously, but without the stricture of regular polygons.  It also offers some default values for smoothing, though whether or not these are good default values could be debated.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;proc kde data=mvnorms;&lt;br /&gt;  bivar x1 x2 / plots=contour;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-jDw20vz6xfc/ThTRnUHUogI/AAAAAAAADBk/qPJUVlQq2pE/s1600/SASbivarkde.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://4.bp.blogspot.com/-jDw20vz6xfc/ThTRnUHUogI/AAAAAAAADBk/qPJUVlQq2pE/s400/SASbivarkde.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5626352307837116930" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;In R, the basic &lt;tt&gt;plot()&lt;/tt&gt; function appears to include transparency, though you must select a suitably pale color to see it.  The &lt;tt&gt;pch, col&lt;/tt&gt;, and &lt;tt&gt;cex&lt;/tt&gt; parameters govern the shape, color, and size of the plotted symbols, respectively. &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;plot(xvals[,1], xvals[,2], pch=19, col="#00000022", cex=0.1)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-YWG8xkhTOSY/ThTP2dZxvII/AAAAAAAADBU/XF_ceCg2GZI/s1600/transpR.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 327px;" src="http://1.bp.blogspot.com/-YWG8xkhTOSY/ThTP2dZxvII/AAAAAAAADBU/XF_ceCg2GZI/s400/transpR.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5626350369005223042" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Bivariate kernel density estimation is available in the &lt;tt&gt;smoothScatter()&lt;/tt&gt; function, which is in included in the R distribution as part of the &lt;tt&gt;graphics&lt;/tt&gt; package.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;smoothScatter(xvals[,1], xvals[,2])&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-rTbKlRAco-k/ThTQpYbUzQI/AAAAAAAADBc/FSthDkVIuSE/s1600/Rbivarkde.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 327px;" src="http://4.bp.blogspot.com/-rTbKlRAco-k/ThTQpYbUzQI/AAAAAAAADBc/FSthDkVIuSE/s400/Rbivarkde.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5626351243842866434" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-2992374824250763032?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=lfi-xW_KKJY:JnmlIS32-kI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=lfi-xW_KKJY:JnmlIS32-kI:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=lfi-xW_KKJY:JnmlIS32-kI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=lfi-xW_KKJY:JnmlIS32-kI:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=lfi-xW_KKJY:JnmlIS32-kI:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=lfi-xW_KKJY:JnmlIS32-kI:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=lfi-xW_KKJY:JnmlIS32-kI:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=lfi-xW_KKJY:JnmlIS32-kI:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/lfi-xW_KKJY" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/2992374824250763032/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/07/example-92-transparency-and-bivariate.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/2992374824250763032?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/2992374824250763032?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/lfi-xW_KKJY/example-92-transparency-and-bivariate.html" title="Example 9.2:  Transparency and bivariate KDE" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-_tEykEX97ec/ThTMwnyCKSI/AAAAAAAADBE/v4fJAtjlNiE/s72-c/transp.jpg" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/07/example-92-transparency-and-bivariate.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Dk4CQXszcCp7ImA9WhZaGUs.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-1777630381843316721</id><published>2011-07-05T14:02:00.006-04:00</published><updated>2011-07-06T11:09:20.588-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-06T11:09:20.588-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="hexagon" /><category scheme="http://www.blogger.com/atom/ns#" term="binning" /><category scheme="http://www.blogger.com/atom/ns#" term="multivariate normal" /><category scheme="http://www.blogger.com/atom/ns#" term="hexbin package" /><category scheme="http://www.blogger.com/atom/ns#" term="proc gmap" /><category scheme="http://www.blogger.com/atom/ns#" term="heat map" /><category scheme="http://www.blogger.com/atom/ns#" term="proc simnormal" /><category scheme="http://www.blogger.com/atom/ns#" term="hexbin()" /><category scheme="http://www.blogger.com/atom/ns#" term="mvrnorm()" /><category scheme="http://www.blogger.com/atom/ns#" term="matrix()" /><category scheme="http://www.blogger.com/atom/ns#" term="large datasets" /><category scheme="http://www.blogger.com/atom/ns#" term="MASS library" /><title>Example 9.1: Scatterplots with binning for large datasets</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-4wSTDGoWSaA/TfUg0vqWc-I/AAAAAAAAADA/j-Wa2HkwYeQ/s1600/forblog.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 320px;" src="http://4.bp.blogspot.com/-4wSTDGoWSaA/TfUg0vqWc-I/AAAAAAAAADA/j-Wa2HkwYeQ/s400/forblog.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5617432200733946850" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Scatterplots can get very hard to interpret when displaying large datasets, as points inevitably overplot and can't be individually discerned.  A number of approaches have been crafted to help with this problem.  One approach uses binning.  This approach is also sometimes called a heat map, and can be though of as a two-dimensional histogram, where shades of the bins take the place of the heights of the bars.  Any regular tesselation of the plane can be used, but there is some attraction to using hexagons.  Why?  In the &lt;a href="http://cran.r-project.org/web/packages/hexbin/vignettes/hexagon_binning.pdf"&gt;vignettes&lt;/a&gt; for the hexbin package author &lt;a href="http://www.meetup.com/R-Users/members/7965896/"&gt;Nicholas Lewin-Koh&lt;/a&gt; notes:&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;There are many reasons for using hexagons, at least over squares. Hexagons have symmetry of nearest neighbors which is lacking in square bins. Hexagons are the maximum number of sides a polygon can have for a regular tesselation of the plane, so in terms of packing a hexagon is 13% more efficient for covering the plane than squares. This property translates into better sampling efficiency at least for elliptical shapes. Lastly hexagons are visually less biased for displaying densities than other regular tesselations. &lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;On the other hand, it's unclear whether these advantages are relevant here or whether they outweigh the simplicity of the square and the constant x and y values accompanying it.&lt;br /&gt;&lt;br /&gt;In this entry, we demonstrate the use of a binned scatterplot for data from a sample of 10,000 generated bivariate normal random variables (section 1.10.6).&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;In R, we use the &lt;a href="http://cran.r-project.org/web/packages/hexbin/index.html"&gt;hexbin&lt;/a&gt; package to generate our plot, after generating our bivariate normals with correlation approximately 0.52.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(MASS)&lt;br /&gt;library(hexbin)&lt;br /&gt;mu = c(1, -1)&lt;br /&gt;Sigma = matrix(c(3, 2,&lt;br /&gt;                 2, 5), nrow=2)&lt;br /&gt;xvals = mvrnorm(10000, mu, Sigma)&lt;br /&gt;Sigma[1,2]/sqrt(Sigma[1,1]*Sigma[2,2])    # correlation&lt;br /&gt;plot(hexbin(xvals[,1], xvals[,2]), xlab="X1", ylab="X2")&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;We're not aware of a SAS procedure to generate a binned scatterplot or of previously existing macros to do it.  &lt;a href="http://www.kenkleinman.net/home/"&gt;Ken&lt;/a&gt; wrote a relatively simple macro to do it, which can be found &lt;a href="http://www.kenkleinman.net/home/index.php/sas-and-r-code/sas-macros.html"&gt;here&lt;/a&gt;.  The macro uses &lt;tt&gt;proc gmap&lt;/tt&gt;, and we hope that someone will develop an approach using &lt;tt&gt;proc template&lt;/tt&gt; and &lt;tt&gt;proc sgrender&lt;/tt&gt;, as demonstrated in an &lt;a href="http://support.sas.com/kb/35/156.html"&gt;example&lt;/a&gt; from SAS Institute.&lt;br /&gt;&lt;br /&gt;After running the macro, the following code generates the image shown below.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;data Sigma (type=cov);&lt;br /&gt;infile cards;&lt;br /&gt;input _type_ $ _Name_ $ x1 x2;&lt;br /&gt;cards;&lt;br /&gt;cov x1 3 2&lt;br /&gt;cov x2 2 5&lt;br /&gt;;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;proc simnormal data=Sigma out=mvnorms numreal = 10000;&lt;br /&gt;  var x1 x2;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;%twodhist(data=mvnorms,x=x1,y=x2,nbinsx=30,nbinsy=30,nshades=9);&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/--uLzBni5Dz0/TfoHTL4caOI/AAAAAAAAC3k/QLvvkAEb9zo/s1600/twodhist.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 307px;" src="http://1.bp.blogspot.com/--uLzBni5Dz0/TfoHTL4caOI/AAAAAAAAC3k/QLvvkAEb9zo/s400/twodhist.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5618811511286556898" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;We note that the default number of shades shown in R, and the number chosen here for SAS, seem to exceed the eye's ability to differentiate, especially for the darker shades.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Update&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;An anonymous commenter reported that the SAS code bombed when run.  I (Ken) added a new version of the code at the link listed above.  I note it here only to emphasize that in either SAS or R, settings or objects in the environment can affect the performance of code.  If your plan to share code, an item to add to your checklist is to run the code in a fresh session.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-1777630381843316721?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=QoLXy1h0A3s:SI3OF7Nr0ZI:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=QoLXy1h0A3s:SI3OF7Nr0ZI:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=QoLXy1h0A3s:SI3OF7Nr0ZI:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=QoLXy1h0A3s:SI3OF7Nr0ZI:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=QoLXy1h0A3s:SI3OF7Nr0ZI:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=QoLXy1h0A3s:SI3OF7Nr0ZI:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=QoLXy1h0A3s:SI3OF7Nr0ZI:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=QoLXy1h0A3s:SI3OF7Nr0ZI:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/QoLXy1h0A3s" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/1777630381843316721/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/07/example-91-scatterplots-with-binning.html#comment-form" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1777630381843316721?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1777630381843316721?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/QoLXy1h0A3s/example-91-scatterplots-with-binning.html" title="Example 9.1: Scatterplots with binning for large datasets" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-4wSTDGoWSaA/TfUg0vqWc-I/AAAAAAAAADA/j-Wa2HkwYeQ/s72-c/forblog.png" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/07/example-91-scatterplots-with-binning.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0QEQ38-eSp7ImA9WhZaFU8.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-1394812797125070184</id><published>2011-07-01T09:50:00.006-04:00</published><updated>2011-07-01T10:08:22.151-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-01T10:08:22.151-04:00</app:edited><title>A third year of entries!</title><content type="html">Contrary to previous reports, we started blogging after our book was published, with the conceit that we were adding examples to the book.  Today marks the second anniversary of the book's appearance and of the blog.  To celebrate, we're turning over our calendar, and starting a new volume of entries-- Example 9.1 will appear on July 5th.&lt;br /&gt;&lt;br /&gt;It's very difficult to get an accurate measure of our viewership.  As I write this, Feedburner reports about 650 regular readers, but this omits people who see us on R-bloggers and SAS Community Planet or SAS-X.  As consumers of those aggregators, we assume there are many others who see us without subscribing directly.&lt;br /&gt;&lt;br /&gt;We also get a fair number of views that derive directly from Google searches, which means we must be doing something right.  Google Analytics reports over 100,000 total pageviews, while Feedburner claims 250,000.  &lt;br /&gt;&lt;br /&gt;So far, our most popular entries, according to feedburner are:&lt;br /&gt;Example 7.38: Kaplan-Meier survival estimates&lt;br /&gt;Example 7.39: Nelson-Aalen estimate of survivorship&lt;br /&gt;Example 8.3: Pyramid plots&lt;br /&gt;Example 7.30: Simulate censored survival data&lt;br /&gt;Example 8.5: Bubble plots part 3&lt;br /&gt;&lt;br /&gt;Blogger's internal metrics vary somewhat:&lt;br /&gt;Example 7.35: Propensity score matching&lt;br /&gt;Example 8.7: Hosmer and Lemeshow goodness-of-fit&lt;br /&gt;Example 7.38: Kaplan-Meier survival estimates&lt;br /&gt;Example 7.30: Simulate censored survival data&lt;br /&gt;Example 7.25: Compare draws with distribution&lt;br /&gt;&lt;br /&gt;All of your attention is really gratifying, and we hope we're useful to people-- it's hard work finding new material and collaborating on a new post every week!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-1394812797125070184?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=r8vZlEhmBvU:eYKTv75Jqt4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=r8vZlEhmBvU:eYKTv75Jqt4:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=r8vZlEhmBvU:eYKTv75Jqt4:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=r8vZlEhmBvU:eYKTv75Jqt4:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=r8vZlEhmBvU:eYKTv75Jqt4:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=r8vZlEhmBvU:eYKTv75Jqt4:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=r8vZlEhmBvU:eYKTv75Jqt4:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=r8vZlEhmBvU:eYKTv75Jqt4:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/r8vZlEhmBvU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/1394812797125070184/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/07/third-year-of-entries.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1394812797125070184?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/1394812797125070184?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/r8vZlEhmBvU/third-year-of-entries.html" title="A third year of entries!" /><author><name>Ken Kleinman</name><uri>http://www.blogger.com/profile/09525118721291529157</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="28" height="32" src="http://3.bp.blogspot.com/_zLwIdu2sLKM/SjKezvboI7I/AAAAAAAACOY/GiDRa1D4MVA/S220/kleinman.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/07/third-year-of-entries.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04EQXg_eCp7ImA9WhZaEUU.&quot;"><id>tag:blogger.com,1999:blog-1275149608391671670.post-6711624620402823613</id><published>2011-06-27T10:45:00.000-04:00</published><updated>2011-06-27T10:45:00.640-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-27T10:45:00.640-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="kurtosis" /><category scheme="http://www.blogger.com/atom/ns#" term="ods system" /><category scheme="http://www.blogger.com/atom/ns#" term="descriptive statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="skewness" /><category scheme="http://www.blogger.com/atom/ns#" term="skewness()" /><category scheme="http://www.blogger.com/atom/ns#" term="proc univariate" /><category scheme="http://www.blogger.com/atom/ns#" term="moments package" /><category scheme="http://www.blogger.com/atom/ns#" term="central moments" /><category scheme="http://www.blogger.com/atom/ns#" term="kurtosis()" /><title>Example 8.42: skewness and kurtosis and more moments (oh my!)</title><content type="html">&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-s61ydFuD5nU/TfVz9ZYQ9oI/AAAAAAAAADI/nJ1rSxPbtPY/s1600/kurtosis.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 320px;" src="http://1.bp.blogspot.com/-s61ydFuD5nU/TfVz9ZYQ9oI/AAAAAAAAADI/nJ1rSxPbtPY/s400/kurtosis.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5617523608836437634" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;While &lt;a href="http://http://en.wikipedia.org/wiki/Skewness"&gt;skewness&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Kurtosis"&gt;kurtosis&lt;/a&gt; are not as often calculated and reported as mean and standard deviation, they can be useful at times.  Skewness is the 3rd moment around the mean, and characterizes whether the distribution is symmetric (skewness=0).  Kurtosis is a function of the 4th central moment, and characterizes &lt;i&gt;peakedness&lt;/i&gt;, where the normal distribution has a value of 3 and smaller values correspond to thinner tails (less peakedness).&lt;br /&gt;&lt;br /&gt;Some packages (including &lt;a href="http://support.sas.com/onlinedoc/913/getDoc/en/proc.hlp/a002473332.htm"&gt;SAS&lt;/a&gt;) subtract three from the kurtosis, so that the normal distribution has a kurtosis of 0 (this is sometimes called &lt;i&gt;excess kurtosis&lt;/i&gt;.  &lt;br /&gt;&lt;br /&gt;&lt;b&gt;R&lt;/b&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;library(moments)&lt;br /&gt;library(lattice)&lt;br /&gt;ds = read.csv("http://www.math.smith.edu/r/data/help.csv")&lt;br /&gt;ds$gender = ifelse(ds$female==1, "female", "male")&lt;br /&gt;densityplot(~ cesd, data=ds, groups=gender, auto.key=TRUE)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;We see that the distribution of CESD scores is skewed with a long left tail, and appears somewhat less peaked than a normal distribution.  This is confirmed by the actual statistics:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&gt; with(ds, tapply(cesd, gender, skewness))&lt;br /&gt;    female       male &lt;br /&gt;-0.4906171 -0.2464390 &lt;br /&gt;&gt; with(ds, tapply(cesd, gender, kurtosis))    # kurtosis&lt;br /&gt;  female     male &lt;br /&gt;2.748968 2.547061 &lt;br /&gt;&gt; with(ds, tapply(cesd, gender, kurtosis))-3  # excess kurtosis&lt;br /&gt;    female       male &lt;br /&gt;-0.2510318 -0.4529394 &lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;SAS&lt;/b&gt;&lt;br /&gt;SAS includes much detail on the moments and other statistics in the output from &lt;tt&gt;proc univariate&lt;/tt&gt;.  As usual, the quantity of output can be off-putting for new users and students. Here we extract the moments we need with the ODS system.  We also generate kernel density estimates roughly analogous to the &lt;tt&gt;densityplot()&lt;/tt&gt; results shown above.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;ods output moments = cesdmoments;&lt;br /&gt;proc univariate data="c:\book\help.sas7bdat";&lt;br /&gt;  class female;&lt;br /&gt;  var cesd;&lt;br /&gt;  histogram cesd / kernel;&lt;br /&gt;run;&lt;br /&gt;&lt;br /&gt;proc print data=cesdmoments; &lt;br /&gt;  where label1 = "Skewness";&lt;br /&gt;  var female label1 nvalue1 label2 nvalue2;&lt;br /&gt;run;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;With the result:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;Obs   FEMALE    Label1         nValue1    Label2         nValue2&lt;br /&gt;&lt;br /&gt;  4     0      Skewness      -0.247513   Kurtosis      -0.442010&lt;br /&gt; 10     1      Skewness      -0.497620   Kurtosis      -0.204928&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;We note that the default is to produce unbiased (REML) estimates, rather than the biased method of moments estimator produced by the &lt;tt&gt;kurtosis()&lt;/tt&gt; function (and that SAS presents the excess kurtosis).&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-Q35nvdX4Qmc/TgIwBZ00v9I/AAAAAAAAC_s/Q0BP0R4iQDU/s1600/kernel.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 213px;" src="http://2.bp.blogspot.com/-Q35nvdX4Qmc/TgIwBZ00v9I/AAAAAAAAC_s/Q0BP0R4iQDU/s400/kernel.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5621108085582249938" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1275149608391671670-6711624620402823613?l=sas-and-r.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=c_sqtgFHiAw:erUxGrl7hjY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=c_sqtgFHiAw:erUxGrl7hjY:J3aVl1i_38o"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=J3aVl1i_38o" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=c_sqtgFHiAw:erUxGrl7hjY:qj6IDK7rITs"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=qj6IDK7rITs" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=c_sqtgFHiAw:erUxGrl7hjY:63t7Ie-LG7Y"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?d=63t7Ie-LG7Y" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=c_sqtgFHiAw:erUxGrl7hjY:F7zBnMyn0Lo"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=c_sqtgFHiAw:erUxGrl7hjY:F7zBnMyn0Lo" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/SASandR?a=c_sqtgFHiAw:erUxGrl7hjY:gIN9vFwOqvQ"&gt;&lt;img src="http://feeds.feedburner.com/~ff/SASandR?i=c_sqtgFHiAw:erUxGrl7hjY:gIN9vFwOqvQ" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;&lt;img src="http://feeds.feedburner.com/~r/SASandR/~4/c_sqtgFHiAw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://sas-and-r.blogspot.com/feeds/6711624620402823613/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://sas-and-r.blogspot.com/2011/06/example-842-skewness-and-kurtosis-and.html#comment-form" title="5 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6711624620402823613?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1275149608391671670/posts/default/6711624620402823613?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/SASandR/~3/c_sqtgFHiAw/example-842-skewness-and-kurtosis-and.html" title="Example 8.42: skewness and kurtosis and more moments (oh my!)" /><author><name>Nick Horton</name><uri>http://www.blogger.com/profile/00242216324355342047</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="25" height="32" src="http://1.bp.blogspot.com/_Y7yEfDrPG4o/SjJwk69Z2tI/AAAAAAAAAAM/KoYswr20tck/S220/nh2.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-s61ydFuD5nU/TfVz9ZYQ9oI/AAAAAAAAADI/nJ1rSxPbtPY/s72-c/kurtosis.png" height="72" width="72" /><thr:total>5</thr:total><feedburner:origLink>http://sas-and-r.blogspot.com/2011/06/example-842-skewness-and-kurtosis-and.html</feedburner:origLink></entry></feed>

