<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;C08ERHo6fSp7ImA9WhBaFE8.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384</id><updated>2013-05-24T11:56:45.415-07:00</updated><category term="Nuisance parameters" /><category term="Seminars" /><category term="Statistics Canada" /><category term="Degrees of freedom" /><category term="Bootstrap" /><category term="Simulation" /><category term="SURE model" /><category term="H-P filter" /><category term="Structural breaks" /><category term="Power" /><category term="Canadian data" /><category term="Hypothesis testing" /><category term="Computing" /><category term="Distributions" /><category term="Courses" /><category term="Trends" /><category term="Information theory" /><category term="Business cycle" /><category term="Mean squared error" /><category term="EViews" /><category term="Confidence intervals" /><category term="History of econometrics" /><category term="Blogs" /><category term="Binomial distribution" /><category term="Cointegration" /><category term="Gretl" /><category term="Monte Carlo" /><category term="Granger causality" /><category term="Economic growth" /><category term="Grad. students" /><category term="Dynamic model" /><category term="Quotes" /><category term="Publishing" /><category term="Poisson distribution" /><category term="LIML" /><category term="SHAZAM" /><category term="Goodness of fit" /><category term="Autocorrelation" /><category term="2SLS" /><category term="Multicollinearity" /><category term="GLS" /><category term="Teaching econometrics" /><category term="Consumer demand" /><category term="Heteroskadasticity" /><category term="Careers" /><category term="Asymptotic theory" /><category term="Weak Instruments" /><category term="Bias correction" /><category term="Nobel Prize" /><category term="NZ data" /><category term="Circular data" /><category term="Bayesian inference" /><category term="VECM models" /><category term="Economic statistics" /><category term="Sample selection" /><category term="Statistics" /><category term="macroeconometrics" /><category term="New Zealand" /><category term="Co-authors" /><category term="Instrumental variables" /><category term="Regression models" /><category term="STATA" /><category term="Videos" /><category term="Royal Statistical Society" /><category term="NBER" /><category term="UK data" /><category term="Mathematics" /><category term="Estimation" /><category term="MLE" /><category term="VAR models" /><category term="CPI" /><category term="PPP" /><category term="Miscellaneous" /><category term="Difference-in-differences" /><category term="Freeware" /><category term="Dummy variables" /><category term="Extreme value theory" /><category term="OLS" /><category term="Robust estimation" /><category term="Nonlinear models" /><category term="Simultaneous equations models" /><category term="Specification testing" /><category term="FIML" /><category term="Graphs" /><category term="Cooking" /><category term="Measurement error" /><category term="ARIMA models" /><category term="American Statistical Association" /><category term="Count data" /><category term="Jobs" /><category term="Sheep" /><category term="Humour" /><category term="unit roots" /><category term="3SLS" /><category term="Survey data" /><category term="Time series" /><category term="History of statistics" /><category term="Forecasting" /><category term="Conferences" /><category term="Data" /><category term="Continuous-time model" /><category term="Panel data" /><category term="p-values" /><category term="ChiSquare distribution" /><category term="Consistency" /><category term="LDV models" /><category term="Financial econometrics" /><category term="Seasonal adjustment" /><category term="Sports" /><category term="Normal distribution" /><category term="Uniform distribution" /><category term="Personal gripes" /><category term="Statistics NZ" /><category term="R" /><category term="GMM" /><title>Econometrics Beat: Dave Giles' Blog</title><subtitle type="html" /><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://davegiles.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>386</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/blogspot/jjOHE" /><feedburner:info uri="blogspot/jjohe" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;C08ERHo5eSp7ImA9WhBaFE8.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-1883032100022272599</id><published>2013-05-23T13:31:00.000-07:00</published><updated>2013-05-24T11:56:45.421-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-24T11:56:45.421-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="History of statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Economic statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="Computing" /><title>Actually Computing the Sample Variance!</title><content type="html">&lt;div style="text-align: justify;"&gt;
I always enjoy the posts from John Cook on his &lt;b&gt;&lt;a href="http://www.johndcook.com/blog" target="_blank"&gt;The Endeavour&lt;/a&gt;&lt;/b&gt; blog. John's a knowledgable guy and there's a lot on his blog that's of interest to econometricians. Take a look for yourself!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Back in 2008, John had &lt;b&gt;&lt;a href="http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/" target="_blank"&gt;a post&lt;/a&gt;&lt;/b&gt; that's relevant to something I've been blogging about recently. It also reminded me of some important issues associated with computation - issues that we used to worry about a great deal in the bad old days of "hand calculations", and computers with short word-lengths and very limited memory&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
One thing that needs to be stressed to students is that the &lt;i&gt;algebraic&lt;/i&gt; formulae that they learn about are not necessarily expressed in the form that's most appropriate &lt;i&gt;computationally&lt;/i&gt;. By "appropriate", I'm referring to both computational accuracy and computational speed. There are actually lots and lots of examples that illustrate the point that I want to make. However, let's just consider the "simple problem" of computing the variance of a sample of data.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;/div&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;div style="text-align: justify;"&gt;
Different estimators of the population variance have been the topic of a couple &lt;span style="background-color: white;"&gt;of my recent posts&lt;/span&gt; (&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/whats-variance-of-sample-variance.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt; and&lt;b&gt; &lt;a href="http://davegiles.blogspot.ca/2013/05/variance-estimators-that-minimize-mse.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;). I'll be concentrating here on the usual sample variance formula,&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; s&lt;sup&gt;2&lt;/sup&gt; = [1 / (n - 1)] Σ[(x&lt;sub&gt;i&lt;/sub&gt; - x*)&lt;sup&gt;2&lt;/sup&gt;] , &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;(1)&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
where "n" is the sample size, and x* = (1 / n)Σ(x&lt;sub&gt;i&lt;/sub&gt;) is the simple sample mean.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
However, the focal point of what I'll be talking about is the term, Σ[(x&lt;sub&gt;i&lt;/sub&gt; - x*)&lt;sup&gt;2&lt;/sup&gt;], so my comments apply in varying degrees to other related measures that we use to estimate the population variance, or to describe the variability of a sample.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
So, what's the big deal here? The formula for s&lt;sup&gt;2&lt;/sup&gt; looks straightforward enough. First, you compute the sample average, x*. Then, you take the difference between each sample observation and x*. You square each of these differences. You add up all of the squared difference; and finally, you divide the answer by a number. Nothing to it!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
What about the number of steps that were required to do this? What about the possibility of "rounding errors" as you go through the various steps I've outlined?&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Are these the steps that are &lt;i&gt;actually followed&lt;/i&gt;&amp;nbsp;when you are using a statistical package and you use a command such as:&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; scalar &amp;nbsp;SAMPVAR = @vars(X) &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;(in EViews)&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
or&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;SAMPVAR = var(X) &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; (in R or in gretl) &amp;nbsp;?&lt;br /&gt;
&lt;br /&gt;
Before we answer this question, notice that there's more than one way to write the expression for s&lt;sup&gt;2&lt;/sup&gt;. For example, a mathematically equivalent formula is:&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; s&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;= [1 / (n - 1)] [Σ(x&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;)&amp;nbsp;- nx*&lt;sup&gt;2&lt;/sup&gt;] . &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; (2)&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
The formula in (1) is sometimes called "the direct method" for computing s&lt;sup&gt;2&lt;/sup&gt;, and the formula in (2) is sometimes called the "one pass method". In a world of perfect &lt;i&gt;arithmetic&lt;/i&gt;, both methods would yield the same &lt;i&gt;numerical&lt;/i&gt;&amp;nbsp;result.&lt;br /&gt;
&lt;br /&gt;
The "one pass method" used to be popular some years ago when we relied on calculators with very limited memory, but no decent statistical package would use it today. The reason is that it's prone to "catastrophic loss of precision" (to borrow a phrase from Braun and Murdoch, 2007, p.11). Those authors show that if applied to the sample {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, coding the formulae (1) and (2) in R yields the correct answer of 11. However, if you add the value 1.e10 to each sample value, coding formula (2) in R yields the value 0 instead of 11. (Recall that adding a constant doesn't alter the variance, so 11 is still the correct result.) &lt;br /&gt;
&lt;br /&gt;
Not surprisingly, the var( . ) function in R &lt;i&gt;doesn't&lt;/i&gt; use the "one pass method"!&lt;br /&gt;
&lt;br /&gt;
In fact, there are other mathematically equivalent, and useful, expressions for s&lt;sup&gt;2&lt;/sup&gt; that you may not have encountered. For instance, we can write:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;s&lt;sup&gt;2&lt;/sup&gt; = [1 / (n(n -1))] [nΣ(x&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) - (Σ(x&lt;sub&gt;i&lt;/sub&gt;))&lt;sup&gt;2&lt;/sup&gt;] &amp;nbsp; &amp;nbsp;. &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; (3)&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
John Cook calls this the "sum of squares method".&lt;br /&gt;
&lt;br /&gt;
Then there's the "corrected two pass algorithm", suggested by &amp;nbsp;Chan &lt;i&gt;et al&lt;/i&gt;. (1983), and favoured by Press &lt;i&gt;et al. &lt;/i&gt;in their various editions of &lt;a href="http://www.mpi-hd.mpg.de/astrophysik/HEA/internal/Numerical_Recipes/f14-1.pdf" style="font-style: italic; font-weight: bold;" target="_blank"&gt;Numerical Recipes&lt;/a&gt;:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;s&lt;sup&gt;2&lt;/sup&gt; = [1 / (n - 1)] {Σ(x&lt;sub&gt;i&lt;/sub&gt; -x*)&lt;sup&gt;2&lt;/sup&gt; - (1 / n) [Σ(x&lt;sub&gt;i&lt;/sub&gt; - x*)]&lt;sup&gt;2&lt;/sup&gt;} . &amp;nbsp; &amp;nbsp; (4)&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;a href="http://www.math.uah.edu/stat/sample/Variance.html" target="_blank"&gt;Another&lt;/a&gt;&lt;/b&gt; algebraically equivalent formula is:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; s&lt;sup&gt;2&lt;/sup&gt; = [1 / (2n(n -1))] ΣΣ(x&lt;sub&gt;i&lt;/sub&gt; - x&lt;sub&gt;j&lt;/sub&gt;)&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; (5)&lt;br /&gt;
&lt;br /&gt;
Now, let's get back to the issue of computational accuracy.&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Using a simple simulation experiment, John Cook &lt;a href="http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/" style="font-weight: bold;" target="_blank"&gt;illustrated&lt;/a&gt;&amp;nbsp;that the "sum of squares" formula is hopelessly unreliable. So, we can rule out the formulae in (2) and (3) when it comes to computation. John also found that the "direct method" is very reliable.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
In fact, it's essentially as reliable as Welford's (1962) method for computing a variance. The latter method involves a simple algorithm, and is often recommended for the robustness of its computational accuracy (&lt;i&gt;e.g&lt;/i&gt;., Knuth, 1997). The algorithm is described in &lt;b&gt;&lt;a href="http://www.johndcook.com/standard_deviation.html" target="_blank"&gt;another post&lt;/a&gt;&lt;/b&gt; by John Cook.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
What about the formulae in (4) and (5)? How good are they from a computational perspective?&lt;br /&gt;
&lt;br /&gt;
I'll leave the answer to this question to those of you who'd like to run a little simulation experiment.&lt;br /&gt;
&lt;br /&gt;
So, the take-away message is very simple - just because it's convenient to write a formula in a particular way, &lt;i&gt;algebraically&lt;/i&gt;, this doesn't necessarily mean that this representation is the best way to &lt;i&gt;code&lt;/i&gt;&amp;nbsp;the calculations. You have to consider computational speed, at least to some extent, and you &lt;i&gt;always&lt;/i&gt;&amp;nbsp;need to be thinking about &lt;i&gt;computational accuracy&lt;/i&gt;.&lt;/div&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;References&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;a href="http://www.amazon.com/First-Course-Statistical-Programming/dp/0521694248/ref=sr_1_1_title_1_pap?s=books&amp;amp;ie=UTF8&amp;amp;qid=1369419000&amp;amp;sr=1-1&amp;amp;keywords=Braun+a+first+course+in" target="_blank"&gt;Braun, W. J. and D. J. Murdoch&lt;/a&gt;&lt;/b&gt;, 2007. &lt;i&gt;A First Course in Statistical Programming With R&lt;/i&gt;, Cambridge University Press, Cambridge.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;b style="text-align: justify;"&gt;&lt;a href="http://www.cs.yale.edu/publications/techreports/tr222.pdf" target="_blank"&gt;Chan, T. F&lt;/a&gt;&lt;/b&gt;&lt;span style="text-align: justify;"&gt;., G. H. Golub, and R. J. LeVeque, 1983. Algorithms for computing the sample variance: Analysis and recommendations. &lt;/span&gt;&lt;i style="text-align: justify;"&gt;American Statistician&lt;/i&gt;&lt;span style="text-align: justify;"&gt;, 37, 242-247.&lt;/span&gt;&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;b&gt;&lt;a href="http://www.amazon.com/gp/product/0201896842/104-5257411-2220725?ie=UTF8&amp;amp;tag=theende-20&amp;amp;linkCode=xm2&amp;amp;camp=1789&amp;amp;creativeASIN=0201896842" target="_blank"&gt;Knuth, D. E&lt;/a&gt;&lt;/b&gt;., 1997. &lt;i&gt;The Art of Computer Programming, Volume 2: Seminumerical Algorithms&lt;/i&gt;, 3rd ed., Addison Wesley, Reading, MA.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;b style="text-align: justify;"&gt;&lt;a href="http://www.amazon.com/Numerical-Recipes-Fortran-Scientific-Computing/dp/052143064X/ref=sr_ob_13?s=books&amp;amp;ie=UTF8&amp;amp;qid=1369419105&amp;amp;sr=1-13" target="_blank"&gt;Press, W. H&lt;/a&gt;&lt;/b&gt;&lt;span style="text-align: justify;"&gt;., S. A. Teukolsky, W. T. Vettering, and B. P. Flannery, 1992.&amp;nbsp;&lt;/span&gt;&lt;i style="text-align: justify;"&gt;Numerical Recipes in FORTRAN 77: The Art of Scientific Computing&lt;/i&gt;&lt;span style="text-align: justify;"&gt;, 2nd ed., Cambridge University Press, Cambridge.&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;a href="http://amstat.tandfonline.com/doi/abs/10.1080/00401706.1962.10490022#.UZ-vsbXvvIU" target="_blank"&gt;Welford, B. P&lt;/a&gt;&lt;/b&gt;., 1962. Note on a method for calculating corrected sums of squares and products. &lt;i&gt;Technometrics&lt;/i&gt;, 4, 419-420.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="font-style: italic;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/Myak-Ukgkqw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/1883032100022272599/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/actually-computing-sample-variance.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/1883032100022272599?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/1883032100022272599?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/Myak-Ukgkqw/actually-computing-sample-variance.html" title="Actually Computing the Sample Variance!" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/actually-computing-sample-variance.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0YDQ308fSp7ImA9WhBaEks.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-3397782686875600148</id><published>2013-05-22T12:23:00.001-07:00</published><updated>2013-05-22T15:19:32.375-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-22T15:19:32.375-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Regression models" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Mean squared error" /><category scheme="http://www.blogger.com/atom/ns#" term="Bayesian inference" /><category scheme="http://www.blogger.com/atom/ns#" term="GLS" /><category scheme="http://www.blogger.com/atom/ns#" term="OLS" /><title>Minimum MSE Estimation of a Regression Model</title><content type="html">&lt;span style="text-align: justify;"&gt;Students of econometrics encounter the Gauss-Markhov Theorem (GMT) at a fairly early stage - even if they don't see a formal proof to begin with. This theorem deals with a particular property of the OLS estimator of the coefficient vector, β, in the following linear regression model:&lt;/span&gt;
&lt;br /&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; y = Xβ + ε &amp;nbsp;; &amp;nbsp;ε ~ [0 , σ&lt;sup&gt;2&amp;nbsp;&lt;/sup&gt;I&lt;sub&gt;n&lt;/sub&gt;] ,&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;where X is (n x k), non-random, and of rank k.&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;The GMT states that among all &lt;i&gt;linear&lt;/i&gt; estimators of β that are also &lt;i&gt;unbiased&lt;/i&gt; estimators, the OLS estimator of β is &lt;i&gt;most efficient&lt;/i&gt;. That is, OLS is the &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/a-visual-proof-that-ols-is-blu.html" target="_blank"&gt;BLU estimator&lt;/a&gt;&lt;/b&gt; for β.&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;&lt;/span&gt;&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;A "linear" estimator is simply one that can be written as a linear function of the random sample data. Here, these are the observed values of the elements of y. We can write the OLS estimator of β as b = (X'X)&lt;sup&gt;-1&lt;/sup&gt;X'y = Ay; where A = (X'X)&lt;sup&gt;-1&lt;/sup&gt;X' is a non-random matrix. So, each element of b is a linear combination of the elements of y, with weights that &lt;i&gt;aren't random&lt;/i&gt;. It's a linear estimator.&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;div&gt;
&lt;span style="text-align: justify;"&gt;Notice that the GMT holds without having to assume that the errors in the model are Normally distributed. (If they do happen to be Normal, then the OLS estimator of β is "Best Unbiased" - that is, we don't have to restrict ourselves to the family of linear estimators.) Also, if ε has a non-scalar (but known) covariance matrix, then the GLS estimator of β is BLU.&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;When I introduce students to the GMT I usually emphasize that it's a result that's of only limited interest. There are lots of interesting and important estimators that &lt;i&gt;aren't&lt;/i&gt;&amp;nbsp;linear estimators. Moreover, why on earth would we want to constrain ourselves to considering only estimators that are unbiased? Econometricians use non-linear and biased estimators all of the time. For example, most Instrumental Variables and GMM estimators fall into this category.&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;Putting this important point aside for the moment, it's also interesting to ask students to derive the linear minimum MSE estimator of β in the above model. That is, think of the family of estimators of β that are of the form, b = Ay, and determine what choice of A leads to the estimator with the &lt;i&gt;smallest MSE&lt;/i&gt;. Strictly speaking, as b and β are &lt;i&gt;vectors&lt;/i&gt;, we need to think of the MSE &lt;i&gt;matrices&lt;/i&gt;, defined as MSE = V + (Bias Bias'), where V is the covariance matrix of the estimator and Bias is the (k x 1) bias &lt;i&gt;vector&lt;/i&gt;. This quantity can be made scalar by taking trace(MSE).&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;If you want to try and answer this question, you need to know a bit of matrix differential calculus - specifically, you need to know how to differentiate functions of &lt;i&gt;matrices&lt;/i&gt; with respect to the elements of a &lt;i&gt;matrix&lt;/i&gt;. There are plenty of books on this, but the&lt;a href="http://www.tc.umn.edu/~nydic001/docs/unpubs/Schonemann_Trace_Derivatives_Presentation.pdf" target="_blank"&gt; &lt;b&gt;free download&lt;/b&gt;&lt;/a&gt; from Steven Nydick is all you need for this particular problem.&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="text-align: justify;"&gt;However, to make life a little easier, but without altering the message, we can simplify the model to the case where k = 1:&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="text-align: justify;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;y&lt;sub&gt;i&lt;/sub&gt; = βx&lt;sub&gt;i&lt;/sub&gt; + ε&lt;sub&gt;i&lt;/sub&gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;; &amp;nbsp; &amp;nbsp;ε&lt;sub&gt;i&lt;/sub&gt; ~ i.i.d. [0 , σ&lt;sup&gt;2&lt;/sup&gt;]&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;and consider the family of estimators,&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; β* = [a&lt;sub&gt;1&lt;/sub&gt;y&lt;sub&gt;1&lt;/sub&gt; + a&lt;sub&gt;2&lt;/sub&gt;y&lt;sub&gt;2&lt;/sub&gt; + ......... + a&lt;sub&gt;n&lt;/sub&gt;y&lt;sub&gt;n&lt;/sub&gt;] ,&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;where the a&lt;sub&gt;i&amp;nbsp;&lt;/sub&gt;&lt;/span&gt;weights are &lt;i&gt;non-random&lt;/i&gt;.&lt;/div&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;Immediately, E[β*] = Σ(a&lt;sub&gt;i&lt;/sub&gt;βx&lt;sub&gt;i&lt;/sub&gt;) = βΣ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;).&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;So, &amp;nbsp; &amp;nbsp; &amp;nbsp; Bias[β*] = E[β*] - β = β[Σ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;) - 1].&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;In addition, given the i.i.d. assumption for the errors,&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="text-align: justify;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Var.[β*] = Σ(a&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;Var.(y&lt;sub&gt;i&lt;/sub&gt;)) = σ&lt;sup&gt;2&lt;/sup&gt;Σ(a&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) .&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;So, &amp;nbsp; &amp;nbsp; &amp;nbsp;M = MSE[β*] = σ&lt;sup&gt;2&lt;/sup&gt;Σ(a&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) + β&lt;sup&gt;2&lt;/sup&gt;[Σ(a&lt;sub&gt;i&lt;/sub&gt;&lt;/span&gt;&lt;/span&gt;x&lt;sub&gt;i&lt;/sub&gt;)&amp;nbsp;- 1]&lt;sup&gt;2&lt;/sup&gt;. &lt;br /&gt;
&lt;br /&gt;
If we differentiate M, partially, with respect to each of the a&lt;sub&gt;j&lt;/sub&gt;s, and set these derivatives to zero, we get:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;2σ&lt;sup&gt;2&lt;/sup&gt;a&lt;sub&gt;j&lt;/sub&gt; + 2β&lt;sup&gt;2&lt;/sup&gt;x&lt;sub&gt;j&lt;/sub&gt;[Σ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;)] = 2β&lt;sup&gt;2&lt;/sup&gt;x&lt;sub&gt;j&lt;/sub&gt; &amp;nbsp; &amp;nbsp;; &amp;nbsp; j = 1, 2, ....., n. &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;(1)&lt;br /&gt;
&lt;br /&gt;
Then, dividing by 2 and multiplying each side of each of these "n" equations, (1), by x&lt;sub&gt;j&lt;/sub&gt;, we get:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;σ&lt;sup&gt;2&lt;/sup&gt;a&lt;sub&gt;j&lt;/sub&gt;x&lt;sub&gt;j&lt;/sub&gt; + β&lt;sup&gt;2&lt;/sup&gt;x&lt;sub&gt;j&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;[Σ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;)] = β&lt;sup&gt;2&lt;/sup&gt;x&lt;sub&gt;j&lt;/sub&gt;&lt;sup&gt;2 &amp;nbsp; &amp;nbsp;&lt;span style="font-size: small;"&gt;; &amp;nbsp; &amp;nbsp; j = 1, 2, ....., n .&lt;/span&gt;&lt;/sup&gt;&lt;br /&gt;
&lt;br /&gt;
Now, sum over all j:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;σ&lt;sup&gt;2&lt;/sup&gt;Σ(a&lt;sub&gt;j&lt;/sub&gt;x&lt;sub&gt;j&lt;/sub&gt;) + β&lt;sup&gt;2&lt;/sup&gt;Σ(x&lt;sub&gt;j&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;)Σ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;) = β&lt;sup&gt;2&lt;/sup&gt;Σ(x&lt;sub&gt;j&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) &amp;nbsp; &amp;nbsp;. &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;(2)&lt;br /&gt;
&lt;br /&gt;
Similarly, dividing each equation in (1) by 2, multiplying each side by y&lt;sub&gt;j&lt;/sub&gt;, and summing over all j, we get:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; σ&lt;sup&gt;2&lt;/sup&gt;Σ(a&lt;sub&gt;j&lt;/sub&gt;y&lt;sub&gt;j&lt;/sub&gt;) + β&lt;sup&gt;2&lt;/sup&gt;Σ(x&lt;sub&gt;j&lt;/sub&gt;y&lt;sub&gt;j&lt;/sub&gt;)Σ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;) = β&lt;sup&gt;2&lt;/sup&gt;Σ(x&lt;sub&gt;j&lt;/sub&gt;y&lt;sub&gt;j&lt;/sub&gt;). &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;(3)&lt;br /&gt;
&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;
Notice that (3) can be re-written as:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;subi sub=""&gt;&lt;br /&gt;&lt;/subi&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;subi sub=""&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; σ&lt;sup&gt;2&lt;/sup&gt;β* +&amp;nbsp;&lt;span style="text-align: start;"&gt;&amp;nbsp;β&lt;sup&gt;2&lt;/sup&gt;Σ(x&lt;sub&gt;j&lt;/sub&gt;y&lt;sub&gt;j&lt;/sub&gt;)Σ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;) = β&lt;sup&gt;2&lt;/sup&gt;Σ(x&lt;sub&gt;j&lt;/sub&gt;y&lt;sub&gt;j&lt;/sub&gt;) . &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;(4)&lt;/span&gt;&lt;/subi&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: start;"&gt;
It's now a simple matter to solve equations (2) and (4) for Σ(a&lt;sub&gt;i&lt;/sub&gt;x&lt;sub&gt;i&lt;/sub&gt;) and β*. The solution for the latter is:&lt;/div&gt;
&lt;div style="text-align: start;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: start;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;β* = {β&lt;sup&gt;2&lt;/sup&gt; / [(σ&lt;sup&gt;2&lt;/sup&gt; / Σ(x&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;)) + β&lt;sup&gt;2&lt;/sup&gt;] } b ,&lt;/div&gt;
&lt;div style="text-align: start;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: start;"&gt;
where b = [Σ(x&lt;sub&gt;i&lt;/sub&gt;y&lt;sub&gt;i&lt;/sub&gt;) / Σ(x&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;)] is the OLS estimator of β.&lt;/div&gt;
&lt;div style="text-align: start;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: start;"&gt;
There are three important things to notice about β*:&lt;/div&gt;
&lt;div style="text-align: start;"&gt;
&lt;ol&gt;
&lt;li&gt;It isn't really an "estimator" because it's a function of the unknown parameters, β and σ&lt;sup&gt;2&lt;/sup&gt;. It can't actually be used!&lt;/li&gt;
&lt;li&gt;|β*| &amp;lt; |b| . This MMSE "estimator" of β "shrinks" the OLS estimator towards towards the origin.&lt;/li&gt;
&lt;li&gt;The "estimator", β* is both non-linear and biased.&lt;/li&gt;
&lt;/ol&gt;
&lt;/div&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;Non-linear, biased, shrinkage estimators - ones that are genuine estimators and &lt;i&gt;don't&lt;/i&gt;&amp;nbsp;involve the unknown parameters -&amp;nbsp;are often used in regression analysis. Examples are the Stein, James-Stein, Ridge, and Bayes estimators. The last of these can be especially appealing, as Bayes estimators allow us to shrink the OLS estimator towards a point that reflects our prior beliefs - &lt;i&gt;not necessarily towards the origin&lt;/i&gt;.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;So, we can now see why trying to "free up" one of the conditions associated with the Gauss-Markhov Theorem, by considering estimators that are linear, but not necessarily unbiased, really doesn't lead us very far if we have in mind that we want to minimize MSE. This also serves as &lt;a href="http://davegiles.blogspot.ca/2013/05/variance-estimators-that-minimize-mse.html" style="font-weight: bold;" target="_blank"&gt;another example &lt;/a&gt;of a situation&lt;b&gt;&amp;nbsp;&lt;/b&gt;where the MMSE estimator isn't feasible (computable).&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;&amp;nbsp;&lt;/span&gt;
&lt;br /&gt;
&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;© 2013, David E. Giles&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span style="text-align: justify;"&gt;&lt;span style="text-align: justify;"&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/cEIeK8bc8t4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/3397782686875600148/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/minimum-mse-estimation-of-regression.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3397782686875600148?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3397782686875600148?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/cEIeK8bc8t4/minimum-mse-estimation-of-regression.html" title="Minimum MSE Estimation of a Regression Model" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/minimum-mse-estimation-of-regression.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE4GQH4yfyp7ImA9WhBaEk4.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-7904834353777863258</id><published>2013-05-22T08:35:00.001-07:00</published><updated>2013-05-22T08:35:21.097-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-22T08:35:21.097-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="EViews" /><title>EViews Tutorials</title><content type="html">&lt;div style="text-align: justify;"&gt;
If you're a student who is just learning to use the&lt;b&gt;&lt;a href="http://www.eviews.com/home.html" target="_blank"&gt; EViews&lt;/a&gt;&lt;/b&gt; econometrics package, the tutorials that IHS (the supplier of EViews) has made available should be very helpful. You'll find them &lt;b&gt;&lt;a href="http://www.eviews.com/Learning/index.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
There are 13 tutorials at this time, ranging from "EViews basics" to "Forecasting".&lt;/div&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;span style="font-family: inherit;"&gt;&lt;span style="font-family: inherit;"&gt;"The tutorials are split into self-contained sessions, although we recommend that new users of EViews work their way through the tutorials one by one.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;span style="font-family: inherit;"&gt;
&lt;/span&gt;&lt;/blockquote&gt;
&lt;blockquote class="tr_bq" style="text-align: justify;"&gt;
&lt;span style="font-family: inherit;"&gt;Each tutorial is accompanied by data files so that you may follow the tutorials in your own copy of EViews. The data files are available in the&amp;nbsp;&lt;em&gt;Supporting Files&lt;/em&gt;&amp;nbsp;side bar of each tutorial. Each tutorial is available in Microsoft Powerpoint® format, along with the data files, bundled together in a Zip file, in the&amp;nbsp;&lt;em&gt;Download Package&lt;/em&gt;&amp;nbsp;area of of the side bar of each tutorial.&lt;/span&gt;&amp;nbsp;&lt;/blockquote&gt;
&lt;blockquote class="tr_bq" style="text-align: justify;"&gt;
&lt;span style="font-family: inherit;"&gt;You should note that the tutorials are written based on EViews 8, however the vast majority of material covered in them is applicable to earlier versions of EViews too."&lt;/span&gt;&lt;/blockquote&gt;
&lt;div style="text-align: justify;"&gt;
Certainly, these tutorial won't tell you everything you'll want to know, &amp;nbsp;but they're a good start.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/obytUaZIM1E" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/7904834353777863258/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/eviews-tutorials.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/7904834353777863258?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/7904834353777863258?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/obytUaZIM1E/eviews-tutorials.html" title="EViews Tutorials" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/eviews-tutorials.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08GSXo-cCp7ImA9WhBaEUU.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-2847275317740663132</id><published>2013-05-21T16:08:00.002-07:00</published><updated>2013-05-21T19:30:28.458-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-21T19:30:28.458-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Poisson distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="ChiSquare distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="Uniform distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="Normal distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Mean squared error" /><category scheme="http://www.blogger.com/atom/ns#" term="Economic statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="Estimation" /><title>Variance Estimators That Minimize MSE</title><content type="html">&lt;div style="text-align: justify;"&gt;
In this post I'm going to look at alternative estimators for the variance of a population. The following discussion builds on &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/whats-variance-of-sample-variance.html" target="_blank"&gt;a recent post&lt;/a&gt;&lt;/b&gt;, and once again it's really directed at students. Well, for the most part.&lt;br /&gt;
&lt;br /&gt;
Actually, some of the results relating to populations that are non-Normal probably won't be familiar to a lot of readers. In fact, I can't think of a reference for where these results have been assembled in this way previously. So, I think there's some novelty here. But we'll get to that in due course.&lt;br /&gt;
&lt;br /&gt;
I can just imagine you smacking your lips in anticipation!&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;To get things started, let's suppose that we're using simple random sampling to get our n data-points, and that this sample is being drawn from a population that's Normal, with a (finite) mean of μ and a (finite) variance of σ&lt;sup&gt;2&lt;/sup&gt;. Let x* denote the sample average: x* = (1 / n)Σx&lt;sub&gt;i&lt;/sub&gt;, where the range of summation here (and everywhere below) is from 1 to n.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Let's consider the &lt;i&gt;family &lt;/i&gt;of estimators of σ&lt;sup&gt;2&lt;/sup&gt;:&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; = (1 / k)Σ[(x&lt;sub&gt;i&lt;/sub&gt; - x*)&lt;sup&gt;2&lt;/sup&gt;],&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
where "k" is a positive number. Now, to be very clear, &lt;i&gt;I'm not suggesting that we should necessarily restrict our attention to estimators that happen to be in this family&lt;/i&gt; - especially when we move away from the case where the population is Normal. I'll come back to this point towards the end of this post.&lt;br /&gt;
&lt;br /&gt;
Clearly, if k = (n - 1), we just have the usual unbiased estimator for σ&lt;sup&gt;2&lt;/sup&gt;, which for simplicity we'll call s&lt;sup&gt;2&lt;/sup&gt;. If k = n, we have the mean squared deviation of the sample, &amp;nbsp;s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2 &lt;/sup&gt;, which is a downward-biased estimator of σ&lt;sup&gt;2&lt;/sup&gt;.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
However, we all know that &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/search/label/Bias%20correction" target="_blank"&gt;unbiasedness&lt;/a&gt;&lt;/b&gt; isn't everything!&lt;br /&gt;
&lt;br /&gt;
Often, we look at our potential estimators and evaluate them in the context of some sort of loss function. If this loss function is quadratic, then the expected loss (or "risk") of an estimator is its Mean Squared Error (MSE). You'll recall that the MSE of an estimator is just the sum of its variance and the square of its bias.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Let's compare the unbiased estimator, s&lt;sup&gt;2&lt;/sup&gt;, and the biased estimator, s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;, in terms of MSE.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
Because we're using simple random sampling from a Normal population, we know that the statistic c = [(n - 1)s&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;/ σ&lt;sup&gt;2&lt;/sup&gt;] follows a Chi-square distribution with (n - 1) degrees of freedom. So, s&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;= (σ&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;/ (n - 1))χ&lt;sup&gt;2&lt;/sup&gt;&lt;sub&gt;(n - 1)&lt;/sub&gt;. We also know that the mean of a Chi-square random variable equals its degrees of freedom; and its variance is twice those degrees of freedom. So, E[s&lt;sup&gt;2&lt;/sup&gt;] = σ&lt;sup&gt;2&lt;/sup&gt;, and Var.(s&lt;sup&gt;2&lt;/sup&gt;) = 2σ&lt;sup&gt;4&lt;/sup&gt;&amp;nbsp;/ (n - 1).&lt;br /&gt;
&lt;br /&gt;
The first of these two results also holds if the population is non-Normal, but the second result doesn't hold, as I discussed in&amp;nbsp;&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/whats-variance-of-sample-variance.html" target="_blank"&gt;this earlier post&lt;/a&gt;&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
Next, noting that s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; = (n - 1)s&lt;sup&gt;2&lt;/sup&gt; / n, it follows that;&lt;br /&gt;
&lt;br /&gt;
&lt;div&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; E[s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = [(n - 1) / n]σ&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;;&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; Bias[s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = E[s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] - σ&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;= - (σ&lt;sup&gt;2&amp;nbsp;&lt;/sup&gt;/ n)&lt;br /&gt;
&lt;br /&gt;
and&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp;Var.[s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = 2σ&lt;sup&gt;4&lt;/sup&gt;(n - 1) / n&lt;sup&gt;2&lt;/sup&gt;.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
So, the MSE of s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;is given by the expression,&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp;MSE(s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) = Var.[s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] + (Bias[s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;])&lt;sup&gt;2&lt;/sup&gt;&lt;span style="background-color: white;"&gt;&amp;nbsp;= σ&lt;sup&gt;4&lt;/sup&gt;&amp;nbsp;(2n - 1) / n&lt;sup&gt;2&lt;/sup&gt;.&lt;/span&gt;&lt;/div&gt;
&lt;br /&gt;
Because s&lt;sup&gt;2&lt;/sup&gt; is unbiased, its MSE is just its variance, so MSE(s&lt;sup&gt;2&lt;/sup&gt;) = 2σ&lt;sup&gt;4&lt;/sup&gt; / (n - 1). Noting that&lt;span style="background-color: white;"&gt; MSE(s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) = [(n - 1) / n] MSE(s&lt;sup&gt;2&lt;/sup&gt;) -&lt;/span&gt; (σ&lt;sup&gt;4&lt;/sup&gt;&amp;nbsp;/ n&lt;sup&gt;2&lt;/sup&gt;), we see immediately that MSE(s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) &amp;lt; MSE(s&lt;sup&gt;2&lt;/sup&gt;), for any finite sample size, n. This can be seen in the following chart, drawn for σ&lt;sup&gt;2&amp;nbsp;&lt;/sup&gt;= 1. (Of course, the two estimators, and their MSEs coincide when the sample size is infinitely large.)&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-G7Cj2UZa-y0/UZv8u5m2C6I/AAAAAAAAA-Y/ERF_ePl0Tps/s1600/graph02.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="340" src="http://2.bp.blogspot.com/-G7Cj2UZa-y0/UZv8u5m2C6I/AAAAAAAAA-Y/ERF_ePl0Tps/s400/graph02.gif" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Although s&lt;sub&gt;n&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; dominates s&lt;sup&gt;2&lt;/sup&gt;, in terms of having smaller MSE for all possible sample values, and all (finite) sample sizes, it's still not the best we can do within the family of estimators we're considering! Let's go back to this class of estimators and ask, "what value of k will lead to the estimator with the &lt;i&gt;smallest possible&lt;/i&gt;&amp;nbsp;MSE for all members of this class?"
&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
We can answer this question by using a bit of simple calculus. We'll write out &amp;nbsp;the expression for the MSE of s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;, and it will be some function of "k". Then we'll differentiate this function with respect to "k", set the derivative to zero, and then solve for the value of k (say k*). We should then check the sign of the second derivative to make sure that k* actually minimizes the MSE, rather than maximizes it!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
So, here goes ........&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Notice that we can write a typical member of our family of estimators as&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; = (1 / k)Σ[(x&lt;sub&gt;i&lt;/sub&gt; - x*)&lt;sup&gt;2&lt;/sup&gt;] = [(n - 1) / k]s&lt;sup&gt;2&lt;/sup&gt; .&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
So, using the results that E[s&lt;sup&gt;2&lt;/sup&gt;] = σ&lt;sup&gt;2&lt;/sup&gt;, and Var.(s&lt;sup&gt;2&lt;/sup&gt;) = 2σ&lt;sup&gt;4&lt;/sup&gt;&amp;nbsp;/ (n - 1), we get:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; E[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = [(n - 1) / k]σ&lt;sup&gt;2&lt;/sup&gt; ;&lt;br /&gt;
&amp;nbsp; &amp;nbsp; Bias[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = E[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] - σ&lt;sup&gt;2&lt;/sup&gt; = [(n - 1 - k) / k]σ&lt;sup&gt;2&lt;/sup&gt;;&lt;br /&gt;
and&lt;br /&gt;
&amp;nbsp; &amp;nbsp; Var.[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = 2σ&lt;sup&gt;4&lt;/sup&gt;(n - 1) / k&lt;sup&gt;2&lt;/sup&gt;.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
The MSE of s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; is given by the expression,&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp;M = MSE(s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) = Var.[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] + (Bias[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;])&lt;sup&gt;2&lt;/sup&gt; = (σ&lt;sup&gt;4&lt;/sup&gt; /k&lt;sup&gt;2&lt;/sup&gt;)[2(n - 1) + (n - 1 - k)&lt;sup&gt;2&lt;/sup&gt;].&lt;br /&gt;
&lt;br /&gt;
Differentiating M with respect to "k", and setting this derivative to zero, yields the solution, k* = (n + 1). You can easily check that k* &lt;i&gt;minimizes&lt;/i&gt;&amp;nbsp;(not maximizes) M.&lt;br /&gt;
&lt;br /&gt;
So, &lt;i&gt;within this family&lt;/i&gt; that we've been considering, the minimum MSE (MMSE) estimator of σ&lt;sup&gt;2&lt;/sup&gt; is the estimator,&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; s&lt;sub&gt;n+1&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; = (1 / (n + 1))Σ[(x&lt;sub&gt;i&lt;/sub&gt; - x*)&lt;sup&gt;2&lt;/sup&gt;] .&lt;br /&gt;
&lt;br /&gt;
This is certainly a well-known result. It also extends naturally to the situation where we're estimating the variance of the (Normally distributed) error term in a linear regression model. In that case the MMSE of this variance is (1 / (n - p + 2))Σe&lt;sub&gt;i&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;, where e&lt;sub&gt;i&lt;/sub&gt; is the i&lt;sup&gt;th&lt;/sup&gt; OLS residual, and p is the number of coefficients in the model.&lt;br /&gt;
&lt;br /&gt;
So far, so good!&lt;br /&gt;
&lt;br /&gt;
Now, let's connect with the earlier post that I mentioned above, and see how all of this works out if we have a population that's &lt;i&gt;non-Normal&lt;/i&gt;. We'll retain the simple random sampling, though. As was discussed in &lt;a href="http://davegiles.blogspot.ca/2013/05/whats-variance-of-sample-variance.html" style="font-weight: bold;" target="_blank"&gt;that post&lt;/a&gt;, in general the variance of s&lt;sup&gt;2&lt;/sup&gt; is given by:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Var.[s&lt;sup&gt;2&lt;/sup&gt;] = (1 / n)[μ&lt;sub&gt;4&lt;/sub&gt; - (n - 3)μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; / (n - 1)] &amp;nbsp;= V, say.&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
Here, μ&lt;sub&gt;2&lt;/sub&gt; and μ&lt;sub&gt;4&lt;/sub&gt; are the second and fourth central moments of the population distribution. Recall that μ&lt;sub&gt;2&lt;/sub&gt; is the population variance, and for the result immediately above to hold the first four moments of the distribution must exist.&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
Let's extend this variance expression to members of the family, s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;. Then we'll work out the expression for the MSE of such estimators for a non-normal population. Finally, this will allow us to derive the MMSE estimator in this family for &lt;i&gt;any&lt;/i&gt;&amp;nbsp;population distribution - not just for the &lt;i&gt;Normal&lt;/i&gt; population that we dealt with earlier in this post.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
Once again, we'll begin by using the fact that we can write:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;= (1 / k)Σ[(x&lt;sub&gt;i&lt;/sub&gt;&amp;nbsp;- x*)&lt;sup&gt;2&lt;/sup&gt;] = [(n - 1) / k]s&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;.&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
The estimator, s&lt;sup&gt;2&lt;/sup&gt;, is still unbiased for σ&lt;sup&gt;2&lt;/sup&gt; even in the non-Normal case, so we still have the results:&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; E[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = [(n - 1) / k]σ&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;; and &amp;nbsp;Bias[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] =&amp;nbsp;[(n - 1 - k) / k]σ&lt;sup&gt;2 &lt;/sup&gt;= &amp;nbsp;[(n - 1 - k) / k]μ&lt;sub&gt;2&lt;/sub&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
as before.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Also,&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Var.[s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;] = [(n - 1) / k]&lt;sup&gt;2&lt;/sup&gt; Var.[s&lt;sup&gt;2&lt;/sup&gt;] = [(n - 1) / k]&lt;sup&gt;2&lt;/sup&gt;(1 / n)[μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;- (n - 3)μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;/ (n - 1)] ,&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
and so the MSE of s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; is given by:&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;MSE(s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) = &amp;nbsp;[(n - 1 - k) / k]&lt;sup&gt;2&lt;/sup&gt;μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; + V[(n - 1) / k]&lt;sup&gt;2&lt;/sup&gt;, &lt;br /&gt;
&lt;br /&gt;
where V = Var.[s&lt;sup&gt;2&lt;/sup&gt;] is defined above.&lt;br /&gt;
&lt;br /&gt;
Going through the calculus once again, &lt;span style="color: red;"&gt;it's easy to show&lt;/span&gt; (&lt;i&gt;I used to hate that statement in textbooks&lt;/i&gt;) that the value of "k" for which the MSE is &lt;i&gt;minimized &lt;/i&gt;is:&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; k** = (n - 1)(V&amp;nbsp;+ μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;) / μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;,&lt;br /&gt;
&lt;br /&gt;
where V = (1 / n)[μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;- (n - 3)μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;/ (n - 1)].&lt;br /&gt;
&lt;span style="color: red;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="color: red;"&gt;Check: &lt;/span&gt;For the &lt;i&gt;Normal&lt;/i&gt; distribution, μ&lt;sub&gt;4&lt;/sub&gt; = 3μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;, and so k** = (n + 1) = k*, as before.&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
Having gone to all of this effort, let's finish up by illustrating the optimal k** values for a small selection of other population distributions:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;span style="color: red;"&gt;&lt;i&gt;Uniform, continuous on&amp;nbsp;&lt;/i&gt;[&lt;i&gt;a , b&lt;/i&gt;]&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= (b - a)&lt;sup&gt;2&amp;nbsp;&lt;/sup&gt;/ 12 &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= &amp;nbsp;(9 / 5)&lt;br /&gt;
k** = (n - 2) + (3 / n) +1296(n - 1) / [5n(b - a)&lt;sup&gt;4&lt;/sup&gt;]&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;Standard Student's-t, with v degrees of freedom&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= &amp;nbsp; v / (v - 2) &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= &amp;nbsp; 3v&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;/ [(v - 2)(v - 4)]&lt;br /&gt;
&amp;nbsp;k** = (n - 2) + (3 / n) +3(n - 1)(v - 2) /[n(v - 4)] &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ; &amp;nbsp; for v &amp;gt; 4&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;χ&lt;sup&gt;2&lt;/sup&gt;, with v degrees of freedom&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= 2v &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= 3(v + 4) / v&lt;br /&gt;
k** = &amp;nbsp;(n - 2) + (3 / n) +3(n - 1)(v + 4)] / (4nv&lt;sup&gt;3&lt;/sup&gt;)&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;Exponential, with mean θ&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= θ&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;&amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= 9θ&lt;sup&gt;4 &amp;nbsp;&lt;/sup&gt;&lt;br /&gt;
k** = (n&lt;sup&gt;2&lt;/sup&gt; + 7n - 6) / n&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;Poisson, with parameter λ&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= λ &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= λ(3λ + 1) &lt;br /&gt;
k** = (n - 2) + (3 / n) +(n - 1)(3λ + 1) / (nλ&lt;sup&gt;3&lt;/sup&gt;)&lt;br /&gt;
&lt;br /&gt;
Now, a final word of &lt;i&gt;caution&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
Yes, setting k = k** in the case of each of these non-Normal populations, and then estimating the variance by using the statistic, &amp;nbsp;s&lt;sub&gt;k&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;= (1 / k)Σ[(x&lt;sub&gt;i&lt;/sub&gt;&amp;nbsp;- x*)&lt;sup&gt;2&lt;/sup&gt;], will ensure that your "estimator" is MMSE &lt;i&gt;within this particular family of estimators&lt;/i&gt;. However, this doesn't mean to say that it's the "best", or even a feasible, estimator to use.&lt;br /&gt;
&lt;br /&gt;
For instance, consider the last example where the population is Poisson. In that case, the population mean and variance are both &amp;nbsp;λ. The MLE for &amp;nbsp;λ is the sample average, x*. Therefore, x* is also the MLE for the population variance. Actually, x* is the "minimum variance unbiased" (MVUE) estimator of λ. The statistic s&lt;sup&gt;2&lt;/sup&gt; is also an unbiased estimator of λ, but it is inefficient relative to x*. &amp;nbsp;So x* dominates s&lt;sup&gt;2&lt;/sup&gt; in terms of MSE. See&amp;nbsp;&lt;a href="http://qspace.qu.edu.qa/bitstream/handle/10576/10632/A%20rich%20learning%20lesson%20using%20the%20Poisson%20distribution%20.pdf?sequence=1" style="font-weight: bold;" target="_blank"&gt;here&lt;/a&gt;&amp;nbsp;for a nice discussion.&lt;br /&gt;
&lt;br /&gt;
If we were to try and implement our MMSE estimator of the &lt;i&gt;variance&lt;/i&gt; in this case, we'd be trying to estimate λ. However, k** is a function of λ. We'd need to know the population variance in order to obtain the MMSE estimator of that parameter! You can see that the same issue applies to the Student's-t and χ&lt;sup&gt;2&lt;/sup&gt; examples given above but it's not an issue with the other two examples.&lt;br /&gt;
&lt;br /&gt;
Sometimes, MMSE estimators simply aren't "feasible". They're functions of the unknown parameters we're trying to estimate. They're not really estimators, at all!&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;span style="text-align: justify;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles
&lt;/div&gt;
&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/u0nwFYNsdU8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/2847275317740663132/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/variance-estimators-that-minimize-mse.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/2847275317740663132?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/2847275317740663132?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/u0nwFYNsdU8/variance-estimators-that-minimize-mse.html" title="Variance Estimators That Minimize MSE" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-G7Cj2UZa-y0/UZv8u5m2C6I/AAAAAAAAA-Y/ERF_ePl0Tps/s72-c/graph02.gif" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/variance-estimators-that-minimize-mse.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUACRno9fip7ImA9WhBbGUQ.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-672406649034315181</id><published>2013-05-19T14:09:00.003-07:00</published><updated>2013-05-19T14:09:27.466-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T14:09:27.466-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Grad. students" /><title>Camp(s) Econometrics</title><content type="html">&lt;div style="text-align: justify;"&gt;
The&lt;b&gt; &lt;a href="http://www.maxwell.syr.edu/cpr/events/cpr_camp_econometrics/Camp_Econometrics_VIII/" target="_blank"&gt;New York Camp Econometrics VIII&lt;/a&gt;&lt;/b&gt; was held in Bolton Landing, NY, last month. I recall Badi Baltagi (one of the Camp Econometrics organisers) telling me about this great annual event a few years ago. The &lt;b&gt;&lt;a href="http://www.utexas.edu/cola/depts/economics/events/25016" target="_blank"&gt;Texas Econometrics 2013&lt;/a&gt;&lt;/b&gt; was held in Lost Pines back in February. This was the 18&lt;sup&gt;th&lt;/sup&gt; Camp for the group in Texas.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I also seem to recall that there used to be another regular Camp Econometrics in Southern California some years ago. If my neurons are still firing in the right order, I believe that Denis Aigner was one of the leaders of that venture.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Back to the NY Camp:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
"This event is a gathering of econometricians and empirical economists whose successful goal is to: (1) Bring together a group of econometricians/empirical economists and guests of host universities to discuss issues in econometrics, both applied and theoretical; (2) Present papers for comments by participants; (3) Stimulate student interest in econometrics; (4) Help students develop their technical presentation skills by encouraging the students of host universities to participate in the meetings and present papers."&lt;/blockquote&gt;
Events like the Camp(s) Econometrics, and &lt;b&gt;&lt;a href="http://www.econometricgame.com/" target="_blank"&gt;The Econometric Game&lt;/a&gt;&lt;/b&gt;, in the Netherlands, really are great ventures!&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/zujbQ8qy1lQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/672406649034315181/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/camps-econometrics.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/672406649034315181?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/672406649034315181?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/zujbQ8qy1lQ/camps-econometrics.html" title="Camp(s) Econometrics" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/camps-econometrics.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEAEQ30zeip7ImA9WhBbGU4.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-3341237080139233435</id><published>2013-05-18T20:28:00.001-07:00</published><updated>2013-05-18T21:11:42.382-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-18T21:11:42.382-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Cooking" /><title>Cookbook Econometrics - Reprise</title><content type="html">&lt;div style="text-align: justify;"&gt;
A few days ago I was looking at my copy of &lt;i&gt;Econometric Foundations&lt;/i&gt;, written by Ron Mittelhammer, George Judge, and Doug Miller. It's an &lt;b&gt;&lt;a href="http://www.amazon.com/Econometric-Foundations-Pack-CD-ROM-Mittelhammer/dp/0521623944/ref=sr_1_1?s=books&amp;amp;ie=UTF8&amp;amp;qid=1368893607&amp;amp;sr=1-1&amp;amp;keywords=econometric+foundations" target="_blank"&gt;excellent book&lt;/a&gt;&lt;/b&gt;, by the way.&lt;/div&gt;
&lt;br /&gt;
I noticed, for the first time, that on p.xxviii of the Preface they have the following to say, under the heading of "A Comment":&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote class="tr_bq" style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;br /&gt;"Many view econometrics as a potpourri or bag of tricks, and the cookbook metaphor for econometrics textbooks has become commonplace. Unfortunately, this philosophy can produce analysts who know a list of econometric recipes but who have insufficient understanding of which techniques to apply in a given situation or how to interpret the results of an application properly. As the inventory of econometric procedures has grown, the importance of understanding when it is appropriate to apply each econometric procedure, as well as knowing the appropriate interpretation of the results, has grown more than proportionately. The number of reference works that describe the growing inventory has expanded &lt;i&gt;pari passu&lt;/i&gt;. These reference works will be accessible to the well-trained analyst who has mastered the basic philosophy and principles on which econometrics is founded. However, analysts who have done little more than memorize the recipes in conceptual econometric cookbooks will find the growing literature on new econometric methods impenetrable. Our goal is for you to be able to determine or create the econometric procedures that are applicable to your problem and then be able to apply them empirically and interpret the results appropriately. This is what this book is about, and this is what we think modern graduate econometrics instruction should be about."&lt;/blockquote&gt;
I don't know Doug Miller personally, but I do know and respect &amp;nbsp;Ron and George, and so I wasn't surprised to find myself punching the air and &lt;strike&gt;shouting&lt;/strike&gt; saying, "YES!"&lt;br /&gt;
&lt;br /&gt;
I had similar things to say in &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/05/cookbook-econometrics.html" target="_blank"&gt;a post&lt;/a&gt;&lt;/b&gt;&amp;nbsp; couple of years ago, and I won't repeat them all here.&lt;br /&gt;
&lt;br /&gt;
Yes, I enjoy helping readers by providing "how-to-do-it" posts (such as &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/04/testing-for-granger-causality.html" target="_blank"&gt;this one&lt;/a&gt;&lt;/b&gt;&amp;nbsp;on testing for Granger causality). Judging by the feedback, these posts appear to be widely appreciated - thanks! However,&amp;nbsp;&amp;nbsp;I'm just as committed to fostering a clear understanding of:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;The assumptions that are needed to get our standard econometric results&lt;/li&gt;
&lt;li&gt;The connections between many of the "tools" that we use&lt;/li&gt;
&lt;li&gt;How to approach problems we haven't met before - in a sensible way&lt;/li&gt;
&lt;li&gt;Why some econometric tools work better than others&lt;/li&gt;
&lt;li&gt;The history and intuition that provide the foundation for much of what we do in econometrics&lt;/li&gt;
&lt;li&gt;&lt;strike&gt;Motherhood and apple pie&lt;/strike&gt;&lt;/li&gt;
&lt;li&gt;The crucial importance of data quality&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
O.K. - enough harping on for now!&lt;/div&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/05IfYnrUZKw" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/3341237080139233435/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/cookbook-econometrics-reprise.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3341237080139233435?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3341237080139233435?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/05IfYnrUZKw/cookbook-econometrics-reprise.html" title="Cookbook Econometrics - Reprise" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/cookbook-econometrics-reprise.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkADQ3Y_eyp7ImA9WhBbGUQ.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-7381473472292455733</id><published>2013-05-18T07:50:00.002-07:00</published><updated>2013-05-19T12:12:52.843-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-19T12:12:52.843-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="macroeconometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="New Zealand" /><category scheme="http://www.blogger.com/atom/ns#" term="Sheep" /><category scheme="http://www.blogger.com/atom/ns#" term="History of econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Grad. students" /><title>I Know What You Did Last Summer!</title><content type="html">&lt;div style="text-align: justify;"&gt;
O.K., I know that I stole that title! It was absolutely blatant.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
The other day, some colleagues and I were discussing the issue of students (including our own offspring), and their summer jobs - or no jobs, as the case may be. I'm not passing judgement here in what follows, by the way.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
When I was a student I had to earn enough money over the summer to live off for the rest of the year. That's just the way it was. Period! My parents were great - I could live at home over the summer at no cost to me. But that was it. They lived in a &lt;u&gt;very&lt;/u&gt; small rural town in New Zealand, a long way from where I attended university. The upside of this was that, being a rural area (in the mid/late 1960's through to the early 1970's), there were some seriously good work opportunities.&lt;br /&gt;
&lt;br /&gt;
So, what did I do each summer?&lt;br /&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;Well, I did lots of different things, actually. They ranged from career-related, to "I don't care how tough this is - I just need the bucks!"&lt;br /&gt;
&lt;br /&gt;
Let's look at some of these jobs. They were interesting, to say the least:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Summers of '66, '67 &amp;amp; '68: Sheep-shearing gang &amp;amp; harvesting hay and grain&lt;/li&gt;
&lt;li&gt;Summer of '69: Actuarial office of a major life insurance company&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Summer of '70: Research Section of the N.Z. central bank - the Reserve Bank of New Zealand&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
In retrospect, I was very lucky. I got to experience some work situations that were way outside what most students get to see.&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Didn't get to Woodstock - not because I was making hay, but because I was in class - remember, I was in the Southern Hemisphere. The "Summer of Love" was my "Winter of Woe".&lt;/li&gt;
&lt;li&gt;Decided that an actuarial life wasn't for me - financially, a bad decision!&lt;/li&gt;
&lt;li&gt;Got to be one of 3 people working on the construction of the first version of the Reserve Bank's macroeconometric model. You'd be right if you guessed that I found that really exciting!&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;div&gt;
That last summer job led to a scholarship for my Masters-level studies, and ultimately to a very rewarding period at the RBNZ before I opted for an academic life in Australia.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;
You just never know where that summer job is going to lead to!&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/8a-2X3lDvnU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/7381473472292455733/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/i-know-what-you-did-last-summer.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/7381473472292455733?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/7381473472292455733?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/8a-2X3lDvnU/i-know-what-you-did-last-summer.html" title="I Know What You Did Last Summer!" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/i-know-what-you-did-last-summer.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08CR3wyeCp7ImA9WhBaEUU.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-365684784210771425</id><published>2013-05-17T09:44:00.001-07:00</published><updated>2013-05-21T19:31:06.290-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-21T19:31:06.290-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Poisson distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="ChiSquare distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="Uniform distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="Normal distribution" /><category scheme="http://www.blogger.com/atom/ns#" term="Degrees of freedom" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Economic statistics" /><category scheme="http://www.blogger.com/atom/ns#" term="Estimation" /><title>What's the Variance of a Sample Variance?</title><content type="html">&lt;div style="text-align: justify;"&gt;
This post is really pitched at students who are taking a course or two in introductory economic statistics. It relates to a couple of estimators of the variance of a population that we all meet in such courses - plus another one that you might not have met. In addition, I'll be emphasising the fact that some "standard" results depend crucially on certain assumptions. Not surprisingly - but &amp;nbsp;not always made clear by instructors and text books.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;To begin with, let's consider a standard problem. We have a population that is Normal, with a mean of μ and a variance of σ&lt;sup&gt;2&lt;/sup&gt;. We take a sample of size n, using &lt;i&gt;simple random sampling&lt;/i&gt;. Then we form the simple arithmetic mean of the sample values: x* = Σx&lt;sub&gt;i&lt;/sub&gt; , where the range of summation (here and everywhere below) is from 1 to n.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Under my assumptions, we know that the sampling distribution of x* is N[μ , (σ&lt;sup&gt;2&lt;/sup&gt; / n)]. The normality of the sampling distribution follows from the Normality of the population, and the fact that x* is a linear function of the data. The variance of the sampling distribution stated above is correct only because simple random sampling has been used.&lt;br /&gt;
&lt;br /&gt;
Now, let's get to what I'm really interested in here - estimating σ&lt;sup&gt;2&lt;/sup&gt;. We all learn that the mean squared deviation of the sample, σ&lt;sup&gt;*2&lt;/sup&gt; = (1 / n)Σ[(x&lt;sub&gt;i&lt;/sub&gt; - x*)&lt;sup&gt;2&lt;/sup&gt;], is a (downward-)&amp;nbsp;&lt;i&gt;biased&lt;/i&gt; estimator of σ&lt;sup&gt;2&lt;/sup&gt;. If we allow for the fact that we've actually lost one degree of freedom by estimating μ using x*, then an &lt;i&gt;unbiased&lt;/i&gt; estimator of σ&lt;sup&gt;2&lt;/sup&gt; is s&lt;sup&gt;2&lt;/sup&gt; = (1 / (n - 1))Σ[(x&lt;sub&gt;i&lt;/sub&gt; - x*)&lt;sup&gt;2&lt;/sup&gt;].&lt;br /&gt;
&lt;br /&gt;
O.K., now what does the sampling distribution of s&lt;sup&gt;2&lt;/sup&gt; look like?&lt;br /&gt;
&lt;br /&gt;
Well, under the assumptions I've made, including the Normality of the population, s&lt;sup&gt;2&lt;/sup&gt; has a sampling distribution that is &lt;i&gt;proportional to&lt;/i&gt;&amp;nbsp;a Chi-square distribution. More specifically, the statistic, c = [(n - 1)s&lt;sup&gt;2&lt;/sup&gt; / σ&lt;sup&gt;2&lt;/sup&gt;] is Chi-square with (n - 1) degrees of freedom.&lt;br /&gt;
&lt;br /&gt;
[As an aside, s&lt;sup&gt;2&lt;/sup&gt; and x* are independently distributed &lt;i&gt;if and only if&lt;/i&gt;&amp;nbsp;the population is Normal. The "only if part" of the latter statement is due to the Irish statistician, Geary - see &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2012/03/irish-economic-statistician.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;.]&lt;br /&gt;
&lt;br /&gt;
So, we now know something, indirectly, about the sampling distribution of s&lt;sup&gt;2&lt;/sup&gt;, and we know that E[s&lt;sup&gt;2&lt;/sup&gt;] = σ&lt;sup&gt;2&lt;/sup&gt;. What is the variance of σ&lt;sup&gt;2&lt;/sup&gt;?&lt;br /&gt;
&lt;br /&gt;
Because we're assuming a Normal population, implying that the statistic I've called "c" follows a Chi-square distribution, we can use the result that the variance of a Chi-square random variable equals twice its degrees of freedom.&lt;br /&gt;
&lt;br /&gt;
Re-arranging the formula for "c", we can write: s&lt;sup&gt;2&lt;/sup&gt; = cσ&lt;sup&gt;2&lt;/sup&gt; / (n - 1).&lt;br /&gt;
&lt;br /&gt;
Then, Var.(s&lt;sup&gt;2&lt;/sup&gt;) = {[σ&lt;sup&gt;2&lt;/sup&gt; / (n - 1)]&lt;sup&gt;2&amp;nbsp;&lt;/sup&gt;Var.(c)} = {[σ&lt;sup&gt;4&lt;/sup&gt; / (n - 1)&lt;sup&gt;2&lt;/sup&gt;]2(n - 1)} = 2σ&lt;sup&gt;4&lt;/sup&gt; / (n -1).&lt;br /&gt;
&lt;br /&gt;
[As another aside, the mean of a Chi-square random variable equals its degrees of freedom, so applying this result to "c" and re-arranging, we immediately get the result that E[s&lt;sup&gt;2&lt;/sup&gt;] = σ&lt;sup&gt;2&lt;/sup&gt;. However, we know this already, and this result holds even if the data are non-Normal.]&lt;br /&gt;
&lt;br /&gt;
Now, this is as far as things usually go in an introductory economic statistics course. To sum up:&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;E[s&lt;sup&gt;2&lt;/sup&gt;] = σ&lt;sup&gt;2&lt;/sup&gt;&lt;/li&gt;
&lt;li&gt;c = [(n - 1)s&lt;sup&gt;2&lt;/sup&gt; / σ&lt;sup&gt;2&lt;/sup&gt;] ~ χ&lt;sup&gt;2&lt;/sup&gt;&lt;sub&gt;(n - 1)&lt;/sub&gt;&lt;/li&gt;
&lt;li&gt;Var.[s&lt;sup&gt;2&lt;/sup&gt;] = 2σ&lt;sup&gt;4&lt;/sup&gt; / (n -1)&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
Notice that Var.(s&lt;sup&gt;2&lt;/sup&gt;) vanishes when n grows very large. This, together with the first above, implies that s&lt;sup&gt;2&lt;/sup&gt; is a (mean-square) &lt;b&gt;consistent&lt;/b&gt; estimator of σ&lt;sup&gt;2&lt;/sup&gt;.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
Unfortunately, students often don't realize that the second and third of these results rely on both simple random sampling and the Normality of the population.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
A thoughtful student will notice that the first result holds even if the data are non-Normal, and will ask, "what's the &lt;i&gt;variance&lt;/i&gt; of s&lt;sup&gt;2&lt;/sup&gt; if the population &lt;i&gt;isn't&lt;/i&gt; Normal?" That's a good question!&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
To answer it, let's introduce an important concept - the "moments" of a probability distribution. Let X be a random variable. Then E[X&lt;sup&gt;k&lt;/sup&gt;] is called the k&lt;sup&gt;th&lt;/sup&gt; "raw moment" (or, moment about zero) of the distribution of X. &amp;nbsp;(Here, "k" is a positive integer, but more generally we can allow k to be negative, or a fraction.) Let's denote the k&lt;sup&gt;th&lt;/sup&gt; such moment by μ'&lt;sub&gt;k&lt;/sub&gt;. So, the first raw moment is just the population mean. That is, μ'&lt;sub&gt;1&lt;/sub&gt; = μ.&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Then consider the quantities, μ&lt;sub&gt;k&lt;/sub&gt; = E[(X - μ)&lt;sup&gt;k&lt;/sup&gt;], for k = 1, 2, 3,.......... We call these the "centered moments" of the distribution of X. You'll notice that μ&lt;sub&gt;2&lt;/sub&gt; is just the population variance. The third and fourth centered moments are used (together with μ&lt;sub&gt;2&lt;/sub&gt;) to construct measures of skewness and kurtosis, but that's another story.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
By the way, there's an important detail. The expectations involved in the construction of the moments require forming an integral. If that integral &lt;i&gt;diverges&lt;/i&gt;, the corresponding moment isn't defined. or instance, the k&lt;sup&gt;th&lt;/sup&gt; moment for a Student's-t distribution with v degrees of freedom exists only if v &amp;gt; k. In the case of the Cauchy distribution (which is just a Student's-t distribution with v = 1), &lt;i&gt;none&lt;/i&gt;&amp;nbsp;of the moments exist!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Alright - back to the question in hand! What is the variance of s&lt;sup&gt;2&lt;/sup&gt; if the population is non-Normal? &lt;b&gt;&lt;a href="http://www.amstat.org/sections/srms/proceedings/y2008/Files/300992.pdf" target="_blank"&gt;The answer&lt;/a&gt;&lt;/b&gt;, in the case of &lt;i&gt;simple random sampling&lt;/i&gt;, is:&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Var.(s&lt;sup&gt;2&lt;/sup&gt;) = (1 / n)[μ&lt;sub&gt;4&lt;/sub&gt; - μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;(n - 3) / (n -1)] .&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
If the population is Normal, then μ&lt;sub&gt;4&lt;/sub&gt; = 3σ&lt;sup&gt;4&lt;/sup&gt;, and μ&lt;sub&gt;2&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; = σ&lt;sup&gt;4&lt;/sup&gt;. So, we get &amp;nbsp;Var.(s&lt;sup&gt;2&lt;/sup&gt;) = 2σ&lt;sup&gt;4&lt;/sup&gt; / (n - 1), in this case.&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
Notice that this more general expression for Var.(s&lt;sup&gt;2&lt;/sup&gt;) also vanishes as n grows. So, a pair of &lt;i&gt;sufficient&lt;/i&gt;&amp;nbsp;conditions for the mean-square consistency of s&lt;sup&gt;2&lt;/sup&gt; (as an estimator of σ&lt;sup&gt;2&lt;/sup&gt;) is:&lt;br /&gt;
&lt;ol&gt;
&lt;li&gt;The data are obtained using simple random sampling; &amp;amp;&lt;/li&gt;
&lt;li&gt;At least the first 4 moments of the population distribution exist.&lt;/li&gt;
&lt;/ol&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;div style="text-align: justify;"&gt;
We can easily work out the expressions for Var.(s&lt;sup&gt;2&lt;/sup&gt;) in the case where the population follows some other distributions that you may have heard about. Here are just a few illustrative results:&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;span style="color: red;"&gt;&lt;i&gt;Uniform, continuous on &lt;/i&gt;[&lt;i&gt;a , b&lt;/i&gt;]&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt; = (b - a)&lt;sup&gt;2&amp;nbsp;&lt;/sup&gt;/ 12 &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt; = &amp;nbsp;(9 / 5)&lt;br /&gt;
Var.(s&lt;sup&gt;2&lt;/sup&gt;) = [1296(n - 1) - 5(n - 3)(b - a)&lt;sup&gt;4&lt;/sup&gt;] / [720n(n - 1)]&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;Standard Student's-t, with v degrees of freedom&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= &amp;nbsp; v / (v - 2) &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= &amp;nbsp; 3v&lt;sup&gt;2&lt;/sup&gt; / [(v - 2)(v - 4)]&lt;br /&gt;
Var.(s&lt;sup&gt;2&lt;/sup&gt;) = [2v&lt;sup&gt;2&lt;/sup&gt;(2v - 5)] / [n(v - 2)&lt;sup&gt;2&lt;/sup&gt;(v - 4)] &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ; &amp;nbsp; for v &amp;gt; 4&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;χ&lt;sup&gt;2&lt;/sup&gt;, with v degrees of freedom&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= 2v &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= 3(v + 4) / v &lt;br /&gt;
Var.(s&lt;sup&gt;2&lt;/sup&gt;) = [3(n - 1)(v + 4) - 4(n - 3)v&lt;sup&gt;3&lt;/sup&gt;] / [n(n - 1)v]&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;Exponential, with mean θ&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= θ&lt;sup&gt;2&lt;/sup&gt; &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= 9θ&lt;sup&gt;4 &amp;nbsp;&lt;/sup&gt;&lt;br /&gt;
Var.(s&lt;sup&gt;2&lt;/sup&gt;) = [2(4n - 3)θ] / [n(n - 1)]&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;&lt;b&gt;&lt;span style="color: red;"&gt;Poisson, with parameter λ&lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
μ&lt;sub&gt;2&lt;/sub&gt;&amp;nbsp;= λ &amp;nbsp;; &amp;nbsp;μ&lt;sub&gt;4&lt;/sub&gt;&amp;nbsp;= λ(3λ + 1) &lt;br /&gt;
Var.(s&lt;sup&gt;2&lt;/sup&gt;) = 2λ&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;/ (n - 1) + (λ / n)&lt;br /&gt;
&lt;br /&gt;
Keep in mind that in each of the cases, the sampling distribution of &amp;nbsp;c = [(n - 1)s&lt;sup&gt;2&lt;/sup&gt; / σ&lt;sup&gt;2&lt;/sup&gt;] will no longer be a χ&lt;sup&gt;2&lt;/sup&gt; distribution! Given our assumption of simple random sampling, you should be able to convince yourself that the &lt;i&gt;asymptotic&lt;/i&gt; sampling distribution of "c" will be Normal.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;References&lt;/b&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;&lt;a href="http://www.amstat.org/sections/srms/proceedings/y2008/Files/300992.pdf" target="_blank"&gt;Cho, E. &amp;amp; M. J. Cho&lt;/a&gt;&lt;/b&gt;, 2008. Variance of sample variance. &lt;i&gt;Proceedings of the 2008 Joint Statistical Meetings, Section on Survey Research Methods&lt;/i&gt;, American Statistical Association, Washington DC,1291-1293.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;/div&gt;
&lt;a href="http://www.jstor.org/stable/2983669"&gt;&lt;b&gt;Geary, R. C.&lt;/b&gt;&lt;/a&gt;&amp;nbsp;(1936). The distribution of the Student's ratio for the non-normal samples.&amp;nbsp;&lt;em&gt;Supplement to the&amp;nbsp;Journal of the&amp;nbsp;Royal Statistical Society&lt;/em&gt;, 3, 178-184.
&lt;br /&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/5j9Wb5PwsXk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/365684784210771425/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/whats-variance-of-sample-variance.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/365684784210771425?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/365684784210771425?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/5j9Wb5PwsXk/whats-variance-of-sample-variance.html" title="What's the Variance of a Sample Variance?" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/whats-variance-of-sample-variance.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEYDRXY8fyp7ImA9WhBbFko.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-6906458279462099778</id><published>2013-05-15T19:41:00.001-07:00</published><updated>2013-05-15T19:42:54.877-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-15T19:42:54.877-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Blogs" /><title>Top Economics Blogs</title><content type="html">Nice to be on &lt;b&gt;&lt;a href="http://blog.inomics.com/top-economics-blogs/" target="_blank"&gt;the list&lt;/a&gt;&lt;/b&gt; - thanks, &lt;a href="http://www.inomics.com/" style="font-weight: bold;" target="_blank"&gt;INOMICS&lt;/a&gt;!&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/2XuWhsHF4NM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/6906458279462099778/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/top-economics-blogs.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6906458279462099778?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6906458279462099778?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/2XuWhsHF4NM/top-economics-blogs.html" title="Top Economics Blogs" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/top-economics-blogs.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEIDSHw_eip7ImA9WhBbFEw.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-5517121982112166386</id><published>2013-05-12T14:57:00.000-07:00</published><updated>2013-05-12T20:42:59.242-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-12T20:42:59.242-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Simultaneous equations models" /><category scheme="http://www.blogger.com/atom/ns#" term="2SLS" /><category scheme="http://www.blogger.com/atom/ns#" term="FIML" /><category scheme="http://www.blogger.com/atom/ns#" term="GMM" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="LIML" /><category scheme="http://www.blogger.com/atom/ns#" term="History of econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Estimation" /><category scheme="http://www.blogger.com/atom/ns#" term="3SLS" /><title>What's Your Favourite Estimator?</title><content type="html">&lt;div style="text-align: justify;"&gt;
It's interesting to dwell on the popularity of different estimators that econometricians use. Some estimators are "in vogue" for a period, and then give way to others as new developments come along. Different topics have captured the attention of theoreticians and practitioners alike at different times in history.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Here's a &lt;b&gt;&lt;a href="http://books.google.com/ngrams/graph?content=2SLS%2C3SLS%2CLIML%2CFIML&amp;amp;year_start=1960&amp;amp;year_end=2008&amp;amp;corpus=15&amp;amp;smoothing=3&amp;amp;share=" target="_blank"&gt;Google Ngram&lt;/a&gt;&amp;nbsp;&lt;/b&gt;showing&amp;nbsp;the extent to which some familiar estimators for simultaneous equations models have been mentioned in books since 1960:&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-O4Jex24hSy4/UZANYMI7RWI/AAAAAAAAA9Y/dn7wgI2kHrc/s1600/capture2.GIF" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="170" src="http://2.bp.blogspot.com/-O4Jex24hSy4/UZANYMI7RWI/AAAAAAAAA9Y/dn7wgI2kHrc/s640/capture2.GIF" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
Not too surprisingly, good old &lt;a href="http://books.google.com/ngrams/graph?content=OLS&amp;amp;year_start=1960&amp;amp;year_end=2008&amp;amp;corpus=15&amp;amp;smoothing=3&amp;amp;share=" style="font-weight: bold;" target="_blank"&gt;OLS&lt;/a&gt;&amp;nbsp;just goes on and on:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-3p368qOAneM/UZAOoN56vMI/AAAAAAAAA9k/mmYKTTbWHP4/s1600/capture2.GIF" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="168" src="http://4.bp.blogspot.com/-3p368qOAneM/UZAOoN56vMI/AAAAAAAAA9k/mmYKTTbWHP4/s640/capture2.GIF" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
I was going to include the&amp;nbsp;&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/finite-sample-properties-of-gmm.html" target="_blank"&gt;GMM&lt;/a&gt;&amp;nbsp;&lt;/b&gt;estimator in these plots, but this acronym has meanings other than the obvious one that comes to mind. So, the results would have been misleading. To be safe, let's use the full phrase &lt;a href="http://books.google.com/ngrams/graph?content=generalized+method+of+moments%2CGeneralized+Method+of+Moments&amp;amp;year_start=1976&amp;amp;year_end=2008&amp;amp;corpus=15&amp;amp;smoothing=3&amp;amp;share=" style="font-weight: bold;" target="_blank"&gt;Generalized Method of Moments&lt;/a&gt;&amp;nbsp;and allow for case sensitivity:&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-S1jglkgSOOc/UZAhKY8vrBI/AAAAAAAAA90/j4-4c6LnCxs/s1600/capture2.GIF" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="168" src="http://2.bp.blogspot.com/-S1jglkgSOOc/UZAhKY8vrBI/AAAAAAAAA90/j4-4c6LnCxs/s640/capture2.GIF" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Interestingly, the phrase appeared in some books before the publication of Hansen's classic 1982 paper.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/Z0Wf0iaDAT0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/5517121982112166386/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/whats-your-favourite-estimator.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5517121982112166386?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5517121982112166386?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/Z0Wf0iaDAT0/whats-your-favourite-estimator.html" title="What's Your Favourite Estimator?" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-O4Jex24hSy4/UZANYMI7RWI/AAAAAAAAA9Y/dn7wgI2kHrc/s72-c/capture2.GIF" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/whats-your-favourite-estimator.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUHSHwyeCp7ImA9WhBbE0U.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-1294991912090498941</id><published>2013-05-12T09:58:00.000-07:00</published><updated>2013-05-12T10:37:19.290-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-12T10:37:19.290-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Time series" /><category scheme="http://www.blogger.com/atom/ns#" term="Seasonal adjustment" /><category scheme="http://www.blogger.com/atom/ns#" term="Data" /><title>Flowers for Mom - From Quandl</title><content type="html">&lt;div style="text-align: justify;"&gt;
Today being Mothers' Day in many parts of the world, I thought that flowers would be appropriate. Well, a price index for (Gardens, Plants, and) Flowers. Specifically, a harmonized price index for these goods for 27 European Union countries.&lt;br /&gt;
&lt;br /&gt;
I retrieved the monthly data for the period January 2006 to March 2013 from &lt;b&gt;&lt;a href="http://quandl.com/"&gt;Quandl.com&lt;/a&gt;&lt;/b&gt;&amp;nbsp;- a really nice resource that I &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/04/in-praise-of-quandl.html" target="_blank"&gt;posted about&lt;/a&gt;&lt;/b&gt; recently. &amp;nbsp;As well as downloading the data in various formats, reading the data from R, &lt;i&gt;etc&lt;/i&gt;., you can also &lt;b&gt;&lt;a href="http://www.quandl.com/graph-embed-is-ready?utm_source=quandl&amp;amp;utm_medium=sitebanner&amp;amp;utm_campaign=q_graph_embed" target="_blank"&gt;embed&lt;/a&gt;&lt;/b&gt;&amp;nbsp;an interactive chart of the data directly into a document such as this one, and make the data visible to viewers. (&lt;i&gt;Just click on the graph&lt;/i&gt;.) For instance:&lt;br /&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;a href="http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/CP0933EU27M086NEST-Harmonized-Index-of-Consumer-Prices-Gardens-Plants-and-Flowers-for-European-Union-27-countries?utm_source=quandl&amp;amp;utm_medium=graph" target="blank"&gt;&lt;img alt="Graph of Harmonized Index of Consumer Prices: Gardens, Plants, and Flowers for European Union (27 countries)" height="360" src="http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/CP0933EU27M086NEST-Harmonized-Index-of-Consumer-Prices-Gardens-Plants-and-Flowers-for-European-Union-27-countries.png?dataset[trim_start]=2008-01-01&amp;amp;dataset[trim_end]=2013-03-01&amp;amp;dataset[width]=853&amp;amp;dataset[height]=480&amp;amp;dataset[visible_columns]=0&amp;amp;dataset[y_axis_min]=102.69&amp;amp;dataset[y_axis_max]=118.38&amp;amp;dataset[y_axis_dual_min]=null&amp;amp;dataset[y_axis_dual_max]=null&amp;amp;dataset[graph_title]=Harmonized%20Index%20of%20Consumer%20Prices%3A%20Gardens%2C%20Plants%2C%20and%20Flowers%20for%20European%20Union%20(27%20countries)&amp;amp;dataset[graph_source]=Federal%20Reserve%20Economic%20Data" width="640" /&gt;&lt;/a&gt;

&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
As this price is &lt;i&gt;not&lt;/i&gt;&amp;nbsp;seasonally adjusted, but obviously has a pronounced seasonal pattern, I thought I'd use this post to illustrate the basics of seasonal adjustment. What I'm going to do is take you through the steps associated with a bare-bones version of the ratio-to-moving-average method. This is something that I show students if I'm teaching an introductory descriptive economic statistics course.&lt;br /&gt;
&lt;br /&gt;
Then, I'll show you how close the results are to the ones that you get if you seasonally adjust the data using the full-blown Census X-13 method that is employed by most statistical agencies world-wide.&lt;br /&gt;
&lt;br /&gt;
Let the price index at time 't' be denoted by P&lt;sub&gt;t&lt;/sub&gt;, and let's assume that this series is made up of the &lt;i&gt;product&lt;/i&gt; of trend (T&lt;sub&gt;t&lt;/sub&gt;), cycle (C&lt;sub&gt;t&lt;/sub&gt;), and irregular(I&lt;sub&gt;t&lt;/sub&gt;) components. (In fact, when I use the X13 method and allow it to decide whether the series is multiplicative or additive in its components, it selects the multiplicative model I'm using here.)&lt;br /&gt;
&lt;br /&gt;
So, taking natural logarithms, we have:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ln(P&lt;sub&gt;t&lt;/sub&gt;) = ln(T&lt;sub&gt;t&lt;/sub&gt;) + ln(C&lt;sub&gt;t&lt;/sub&gt;) + ln(I&lt;sub&gt;t&lt;/sub&gt;) .&lt;br /&gt;
&lt;br /&gt;
The rudimentary ratio-to-moving-average method involves the following steps. The column labels refer to the associated Excel workbook and csv file that are available on the&amp;nbsp;&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/p/data.html" target="_blank"&gt;data&lt;/a&gt;&lt;/b&gt;&amp;nbsp;page of this blog:&lt;br /&gt;
&lt;br /&gt;
&lt;ol&gt;
&lt;li&gt;First, we take an unweighted 12-month arithmetic moving average of the ln(P&lt;sub&gt;t&lt;/sub&gt;) data. (Column D.) Averaging the data smooths the series. A 12-month average should smooth out the seasonal movements, as well as any irregular movements.&lt;/li&gt;
&lt;li&gt;There's a slight problem - it's to do with the "timing" of the observations in Column D. It arises because there's an even number (12) of months in a year. Conceptually, the first figures in Column D ( 4.48255) should be located half way between the June and July months if it's to be at the middle of the year. Right now, it's half a month out of alignment. This problem also arises if we're seasonally adjusting quarterly time-series data, because 4 is an even number too. (There's also an even &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/04/how-many-weeks-are-there-in-year.html" target="_blank"&gt;number of weeks in a year&lt;/a&gt;&amp;nbsp;&lt;/b&gt;- but I digress!)&lt;/li&gt;
&lt;li&gt;To re-align the data, we now take an unweighted 2-period arithmetic moving average of the numbers in Column D. The results appear in Column E. Averaging two "out-of-alignment" numbers shifts them by half a month, and bingo, they're now lined up with the appropriate dates! We call the resulting series the "Centered Moving Average".&lt;/li&gt;
&lt;li&gt;If we've done things properly there should be 6 values "missing" at the start of the series in Column E, and 6 missing at the end. (Two and two, if we had quarterly data.)&amp;nbsp;&lt;/li&gt;
&lt;li&gt;What have we achieved by this? Well, the data in Column E represent what is left of the ln(P&lt;sub&gt;t&lt;/sub&gt;) series after we've smoothed away the Seasonal and Irregular components. That is, they represent the combined ln(Trend) and ln(Cycle) components.&lt;/li&gt;
&lt;li&gt;Next, we subtract the Centered Moving Average series (Column E) from the ln(P&lt;sub&gt;t&lt;/sub&gt;) data in Column C. This gives us (in Column F) the ln(Seasonal) and ln(Irregular) components.&lt;/li&gt;
&lt;li&gt;Then, we take the arithmetic mean of all of the July month values in Column F. This gives us a single, common "seasonal factor" for that month. We then do the same with all of the August month values in that column, and so on. The results appear in Column G. Notice that we're implicitly assuming that the seasonal factors are going to be stable over time.&lt;/li&gt;
&lt;li&gt;We're nearly there! If we add up the 12 seasonal factors they should sum to zero over the full year. Seasonality, by definition, is an intra-year phenomenon. Let's CHECK if we have this result. You can see in the workbook that they actually sum to 0.00178098. Not bad, but not good enough!&lt;/li&gt;
&lt;li&gt;We apportion this discrepancy across the 12 seasonal factors by subtracting (0.00178098/12) from each of the numbers in Column G. The results, which are the final seasonal factors, appear in Column H. Notice that these factors are repeated, year after year.&lt;/li&gt;
&lt;li&gt;We can now seasonally adjust the ln(P&lt;sub&gt;t&lt;/sub&gt;) series. We subtract these seasonal factors from the data in Column C. The results appear in Column I.&lt;/li&gt;
&lt;li&gt;Taking the exponential of the Column I series gives us the seasonally adjusted series for P&lt;sub&gt;t&lt;/sub&gt; itself, as in Column J.&lt;/li&gt;
Notice that if I hadn't taken the logarithm of P&lt;sub&gt;t&lt;/sub&gt; before starting the adjustment process, I could have achieved the same results by using &lt;i&gt;geometric averages&lt;/i&gt; in place of &lt;i&gt;arithmetic&lt;/i&gt; &lt;i&gt;averages&lt;/i&gt;, everywhere above; and by &lt;i&gt;dividing&lt;/i&gt;, rather than &lt;i&gt;subtracting&lt;/i&gt;, everywhere. In particular, at step 6 we would have isolated the "Seasonal and Irregular" components from the "Trend and Cycle" components by taking the &lt;u&gt;Ratio&lt;/u&gt; of P&lt;sub&gt;t&lt;/sub&gt; &lt;u&gt;to&lt;/u&gt; the centered &lt;u&gt;moving&lt;/u&gt; &lt;u&gt;average&lt;/u&gt;&amp;nbsp;series. Hence the name of this seasonal adjustment method.
&lt;/ol&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
&lt;div style="text-align: justify;"&gt;
Next, I used Eviews to apply the X-13 seasonal adjustment method to P&lt;sub&gt;t&lt;/sub&gt;. Once you are viewing the series, you select "Proc", and then "Seasonal Adjustment", and go from there. Here's the original price index and its seasonally adjusted counterpart: &amp;nbsp;&lt;/div&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-Ug51mlAzqZ8/UY6do5kLXdI/AAAAAAAAA8U/a9ZIbC4zlMI/s1600/graph01.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="474" src="http://2.bp.blogspot.com/-Ug51mlAzqZ8/UY6do5kLXdI/AAAAAAAAA8U/a9ZIbC4zlMI/s640/graph01.gif" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: justify;"&gt;
The EViews workfile that I used is on the &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/p/code.html" target="_blank"&gt;code&lt;/a&gt; &lt;/b&gt;page for this blog. In that file there are actually &lt;i&gt;three&lt;/i&gt;&amp;nbsp;seasonally adjusted versions of the price index. The series called "X13" will be self-explanatory; the series called "Manual" was obtained using the basic steps outlined above, as shown in the Excel workbook; and the series called "RMA" was obtained using the "moving average" option under the "seasonal adjustment" procedure in EViews. The latter series should be &lt;i&gt;almost identical&lt;/i&gt;&amp;nbsp;to "Manual", and indeed it is. More on this below.&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: justify;"&gt;
Now, how similar is my rudimentary seasonally adjusted series, "Manual", to the one obtained using X-13? Here's a scatter-plot of the two series - it's virtually a 45-degree line.&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-PLEW9gx0sOQ/UY6ex_2ur9I/AAAAAAAAA8g/rzKLoVNXJw4/s1600/graph02.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="620" src="http://2.bp.blogspot.com/-PLEW9gx0sOQ/UY6ex_2ur9I/AAAAAAAAA8g/rzKLoVNXJw4/s640/graph02.gif" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: justify;"&gt;
This is confirmed by the following OLS regression, and simple correlations:&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-1CLSH-LC1DA/UY6e8TbdP1I/AAAAAAAAA8o/bD55jWeZLy0/s1600/Capture2.GIF" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="465" src="http://1.bp.blogspot.com/-1CLSH-LC1DA/UY6e8TbdP1I/AAAAAAAAA8o/bD55jWeZLy0/s640/Capture2.GIF" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
You can see that there is almost a perfect correlation between my seasonally adjusted series, and both "RMA" and "X13":&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-FRhHGb38zNI/UY6dU7GXrPI/AAAAAAAAA8M/8bBZ8doVdFE/s1600/capture1.GIF" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="130" src="http://4.bp.blogspot.com/-FRhHGb38zNI/UY6dU7GXrPI/AAAAAAAAA8M/8bBZ8doVdFE/s400/capture1.GIF" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
The rudimentary ratio-to-moving average seasonal adjustment procedure that I went through above isn't always going to provide results that are this close to the ones that you get when you use X-13. There are several reasons for this:&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;X-13 can allow for outliers in the data. Our basic method ignores this possibility.&lt;/li&gt;
&lt;li&gt;X-13 can allow for a seasonal pattern that "evolves" over the cycle, or over time. Our basic method assumes a stable seasonal pattern.&lt;/li&gt;
&lt;li&gt;X-13 can allow for the fact that different months have different numbers of "trading days", and for "holiday effects", such as the moving dates for Easter. These effects are ignored in our basic method.&lt;/li&gt;
&lt;li&gt;X-13 can deal with "end-point" effects that arise at the beginning and end of the sample, where values can't be computed for the centered moving averages. Our basic method doesn't take this into account.&lt;/li&gt;
&lt;/ul&gt;
&lt;div style="text-align: justify;"&gt;
However, in many cases, very similar seasonally adjusted series &lt;i&gt;are&lt;/i&gt;&amp;nbsp;obtained. This is very comforting for those of us who teach this material. You can take students through the rudimentary steps that I've outlined, and they can generate very convincing seasonally adjusted time-series.&lt;br /&gt;
&lt;br /&gt;
Oh yes - don't forget those flowers for Mothers' Day!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/H9mGnfXHlaQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/1294991912090498941/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/flowers-for-mom-from-quandl.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/1294991912090498941?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/1294991912090498941?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/H9mGnfXHlaQ/flowers-for-mom-from-quandl.html" title="Flowers for Mom - From Quandl" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-Ug51mlAzqZ8/UY6do5kLXdI/AAAAAAAAA8U/a9ZIbC4zlMI/s72-c/graph01.gif" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/flowers-for-mom-from-quandl.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkQHR3s5fip7ImA9WhBbEk8.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-976328311422572121</id><published>2013-05-10T14:12:00.000-07:00</published><updated>2013-05-10T14:12:16.526-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-10T14:12:16.526-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Asymptotic theory" /><category scheme="http://www.blogger.com/atom/ns#" term="Hypothesis testing" /><category scheme="http://www.blogger.com/atom/ns#" term="Circular data" /><title>New Paper Published</title><content type="html">&lt;div style="text-align: justify;"&gt;
A paper of mine appears in the latest issue of the &lt;a href="http://chjs.soche.cl/index.php?option=com_content&amp;amp;view=article&amp;amp;id=173&amp;amp;Itemid=56" style="font-style: italic;" target="_blank"&gt;&lt;b&gt;Chilean Journal of Statistics&lt;/b&gt;&lt;/a&gt;. The paper is titled, "Exact asymptotic goodness-of-fit testing for discrete circular data with applications.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I've posted previously about this general topic, &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2012/03/goodness-of-fit-testing-with-discrete.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;, &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2012/08/goodness-of-fit-testing-with-discrete.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt; and &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2012/12/eggnog-with-econometrics-flavour.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/-fd-u2_QrOI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/976328311422572121/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/new-paper-published.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/976328311422572121?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/976328311422572121?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/-fd-u2_QrOI/new-paper-published.html" title="New Paper Published" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/new-paper-published.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0EAQXczeyp7ImA9WhBbEUw.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-5595889854286590745</id><published>2013-05-09T10:14:00.000-07:00</published><updated>2013-05-09T10:14:00.983-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-09T10:14:00.983-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Blogs" /><category scheme="http://www.blogger.com/atom/ns#" term="R" /><category scheme="http://www.blogger.com/atom/ns#" term="Grad. students" /><title>R is His Friend</title><content type="html">&lt;div style="text-align: justify;"&gt;
Marcus Beck has a nice (&amp;amp; relatively new) blog called &lt;a href="http://beckmw.wordpress.com/" style="font-weight: bold;" target="_blank"&gt;R is My Friend&lt;/a&gt;. You can guess that his posts relate to the use of R.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I particularly liked &lt;b&gt;&lt;a href="http://beckmw.wordpress.com/2012/12/" target="_blank"&gt;his piece&lt;/a&gt;&lt;/b&gt; on the use of the &lt;a href="http://cran.r-project.org/web/packages/XML/index.html" target="_blank"&gt;XML&lt;/a&gt;&lt;b&gt;&amp;nbsp;&lt;/b&gt;package in R to mine data from the internet; and &lt;b&gt;&lt;a href="http://beckmw.wordpress.com/2013/04/29/poor-mans-integration-a-simulated-visualization-approach/" target="_blank"&gt;his post&lt;/a&gt;&lt;/b&gt; on using the &lt;i&gt;integrate&lt;/i&gt; function in R, even when the anti derivative has no closed-form solution.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Grad. student readers will also like his post, &lt;a href="http://beckmw.wordpress.com/2013/04/15/how-long-is-the-average-dissertation/" style="font-weight: bold;" target="_blank"&gt;How Long is the Average Dissertation&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
My own take on a related question can be found &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/02/how-long-should-my-thesis-be.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/8g7wP9PJ5vo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/5595889854286590745/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/r-is-his-friend.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5595889854286590745?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5595889854286590745?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/8g7wP9PJ5vo/r-is-his-friend.html" title="R is His Friend" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/r-is-his-friend.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8CQH84fyp7ImA9WhBbEUw.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-8116206077919326898</id><published>2013-05-08T11:52:00.002-07:00</published><updated>2013-05-09T08:37:41.137-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-09T08:37:41.137-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Asymptotic theory" /><category scheme="http://www.blogger.com/atom/ns#" term="Specification testing" /><category scheme="http://www.blogger.com/atom/ns#" term="Heteroskadasticity" /><category scheme="http://www.blogger.com/atom/ns#" term="Nonlinear models" /><category scheme="http://www.blogger.com/atom/ns#" term="STATA" /><category scheme="http://www.blogger.com/atom/ns#" term="EViews" /><title>Robust Standard Errors for Nonlinear Models</title><content type="html">&lt;div style="text-align: justify;"&gt;
&lt;span style="font-family: inherit;"&gt;André Richter wrote to me from Germany, commenting on the reporting of robust standard errors in the context of nonlinear models such as Logit and Probit. He said he 'd been led to believe that this doesn't make much sense. I told him that I agree, and that this is another of my "pet peeves"!&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;
Yes, I do get grumpy about some of the things I see so-called "applied econometricians" doing all of the time. For instance, see my&amp;nbsp;&lt;/span&gt;&lt;a href="http://davegiles.blogspot.ca/2011/05/gripe-of-day.html" style="font-weight: bold;" target="_blank"&gt;Gripe of the Day&lt;/a&gt;&amp;nbsp;post back in 2011. Sometimes I feel as if I could produce a post with title almost every day!&lt;/div&gt;
&lt;br /&gt;
&lt;span style="font-family: inherit;"&gt;&lt;span style="font-family: inherit;"&gt;Anyway, let's get back to André's point.&lt;/span&gt;&lt;br /&gt;
&lt;/span&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;span style="font-family: inherit;"&gt;The following facts are widely known (&lt;i&gt;e.g&lt;/i&gt;., check any recent edition of Greene' text) and it's hard to believe that anyone could get through a grad. level course in econometrics and not be aware of them:&lt;/span&gt;&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;In the case of a &lt;i&gt;linear&lt;/i&gt;&amp;nbsp;regression model, heteroskedastic errors render the OLS estimator, b, of the &lt;i&gt;coefficient vector&lt;/i&gt;, beta, inefficient. However, this estimator is still unbiased and weakly consistent.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;In this same &lt;i&gt;linear&lt;/i&gt; model, and still using OLS, the usual estimator of the covariance matrix of b is an &lt;i&gt;inconsistent&lt;/i&gt;&amp;nbsp;estimators of the true covariance matrix of b. Consequently, if the standard errors of the elements of b are computed in the usual way, they will &lt;i&gt;inconsistent &lt;/i&gt;estimators of the true standard deviations of the elements of b.&lt;/li&gt;
&lt;li&gt;For this reason,we often use White's "heteroskedasticity consistent" estimator for the covariance matrix of b, if the presence of heteroskedastic errors is suspected.&lt;/li&gt;
&lt;li&gt;This covariance estimator is still consistent, even if the errors are actually &lt;i&gt;homoskedastic&lt;/i&gt;.&lt;/li&gt;
&lt;li&gt;In the case of the linear regression model, this makes sense. Whether the errors are homoskedastic or heteroskedastic, &lt;u&gt;both&lt;/u&gt; the OLS coefficient estimators and White's standard errors are consistent.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
Moreover, in the case of a model that is&amp;nbsp;&lt;i&gt;nonlinear&lt;/i&gt; in the parameters:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;The MLE of the &lt;i&gt;parameter vector&lt;/i&gt; is biased and &lt;i&gt;inconsistent&lt;/i&gt; if the errors are heteroskedastic (unless the likelihood function is modified to correctly take into account the precise form of heteroskedasticity).&lt;/li&gt;
&lt;li&gt;This stands in stark contrast to the situation above, for the &lt;i&gt;linear&lt;/i&gt; model.&lt;/li&gt;
&lt;li&gt;The MLE of the asymptotic covariance matrix of the MLE of the parameter vector is also inconsistent, as in the case of the linear model.&lt;/li&gt;
&lt;li&gt;Obvious examples of this are Logit and Probit models, which are nonlinear in the parameters, and are usually estimated by MLE.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
I've made this point in at least one &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/05/gripe-of-day.html" target="_blank"&gt;previous post&lt;/a&gt;&lt;/b&gt;. The results relating to nonlinear models are really well-known, and this is why it's extremely important to test for model mis-specification (such as heteroskedasticity) when estimating models such as Logit, Probit, Tobit, &lt;i&gt;etc&lt;/i&gt;. Then, if need be, the model can be modified to take the heteroskedasticity into account before we estimate the parameters. For more information on such tests, and the associated references, see &lt;b&gt;&lt;a href="http://web.uvic.ca/~dgiles/downloads/binary_choice/index.html" target="_blank"&gt;this page&lt;/a&gt;&lt;/b&gt;&amp;nbsp;on my professional website.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
Unfortunately, it's unusual to see "applied econometricians" pay any attention to this! They tend to just do one of two things. They either&lt;/div&gt;
&lt;div&gt;
&lt;ol&gt;
&lt;li&gt;use Logit or Probit, but report the "heteroskedasticity-consistent" standard errors that their favourite econometrics package conveniently (&lt;i&gt;but misleading&lt;/i&gt;) computes for them. This involves a covariance estimator along the lines of White's "sandwich estimator". Or, they&lt;/li&gt;
&lt;li&gt;estimate a "linear probability model" (&lt;i&gt;i.e&lt;/i&gt;., just use OLS, even though the dependent variable is a binary dummy variable, and report the "het.-consistent standard errors".&lt;/li&gt;
&lt;/ol&gt;
&lt;div&gt;
If they follow approach 2, these folks defend themselves by saying that "you get essentially the same estimated marginal effects if you use OLS as opposed to Probit or Logit." I've said my piece about this attitude previously (&lt;a href="http://davegiles.blogspot.ca/2011/09/econometrics-and-one-way-streets.html" style="font-weight: bold;" target="_blank"&gt;here&lt;/a&gt;, &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2012/06/another-gripe-about-linear-probability.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;, &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2012/06/yet-another-reason-for-avoiding-linear.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;,&amp;nbsp;and &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2012/07/more-comments-on-use-of-lpm.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;), and I won't go over it again here.&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
My concern right now is with approach 1 above.&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
The "robust" standard errors are being reported to cover the possibility that the model's errors may be heteroskedastic. But if that's the case, the parameter estimates are &lt;i&gt;inconsistent&lt;/i&gt;. What use is a consistent standard error when the point estimate is inconsistent? Not much!!&lt;/div&gt;
&lt;div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div&gt;
This point is laid out pretty clearly in&amp;nbsp;&lt;span style="font-family: inherit;"&gt;Greene (2012, pp. 692-693), for example. Here's what he has to say:&lt;/span&gt;&lt;/div&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;span style="font-family: inherit;"&gt;"...the probit (Q-) maximum likelihood estimator is &lt;i&gt;not&lt;/i&gt;&amp;nbsp;consistent in the presence of any form of heteroscedasticity, unmeasured heterogeneity, omitted variables (even if they are orthogonal to the included ones), nonlinearity of the form of the index, or an error in the distributional assumption [ with some narrow exceptions as described by Ruud (198)]. Thus, in almost any case, the sandwich estimator provides an appropriate asymptotic covariance matrix &lt;u&gt;for an estimator that is biased in an unknown direction.&lt;/u&gt;" (My underlining; DG.) "White raises this issue explicitly, although it seems to receive very little attention in the literature.".........."His very useful result is that if the QMLE converges to a probability limit, then the sandwich estimator can, under certain circumstances, be used to estimate the asymptotic covariance matrix of that estimator. But there is no guarantee the the QMLE &lt;i&gt;will&lt;/i&gt;&amp;nbsp;converge to anything interesting or useful. Simply computing a robust covariance matrix for an otherwise inconsistent estimator does not give it redemption. Consequently, the virtue of a robust covariance matrix in this setting is unclear."&lt;/span&gt;&lt;/blockquote&gt;
Back on July 2006, on the &lt;b&gt;&lt;a href="https://stat.ethz.ch/pipermail/r-help/2006-July/108722.html" target="_blank"&gt;R Help feed&lt;/a&gt;&lt;/b&gt;, Robert Duval had this to say:&lt;br /&gt;
&lt;blockquote class="tr_bq"&gt;
"This discussion leads to another point which is more subtle, but more important...&lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;
You can always get Huber-White (a.k.a robust) estimators of the standard errors even in non-linear models like the logistic regression. However, if you believe your errors do not satisfy the standard assumptions of the model, then you should not be running that model as this might lead to biased parameter estimates.&lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;
For instance, in the linear regression model you have consistent parameter estimates independently of whether the errors are heteroskedastic or not. However, in the case of non-linear models it is usually the case that heteroskedasticity will lead to biased parameter estimates (unless you fix it explicitly somehow).&lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;
Stata is famous for providing Huber-White std. errors in most of their regression estimates, whether linear or non-linear. But this is nonsensical in the non-linear models since in these cases you would be consistently estimating the standard errors of inconsistent parameters.&lt;/blockquote&gt;
&lt;blockquote class="tr_bq"&gt;
This point and potential solutions to this problem is nicely discussed in Wooldrige's Econometric Analysis of Cross Section and Panel Data."&lt;/blockquote&gt;
&lt;/div&gt;
&lt;span style="font-family: inherit;"&gt;&lt;i&gt;&lt;b&gt;Amen to that&lt;/b&gt;&lt;/i&gt;&lt;b&gt;!&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="font-family: inherit;"&gt;Regrettably, it's not just STATA that encourages questionable practices in this respect. These same options are also available in EViews, for example.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;
&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;b&gt;Reference&lt;/b&gt;&lt;br /&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-family: inherit;"&gt;&lt;a href="http://people.stern.nyu.edu/wgreene/Text/econometricanalysis.htm" target="_blank"&gt;&lt;b&gt;Greene, W. H&lt;/b&gt;&lt;/a&gt;&lt;b&gt;.&lt;/b&gt;, 2012. &lt;i&gt;Econometric Analysis&lt;/i&gt;. Prentice Hall, Upper Saddle River, NJ.&lt;/span&gt;&lt;br /&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
&lt;span style="font-family: inherit;"&gt;© 2013, David E. Giles&lt;/span&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/bewp90bgidU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/8116206077919326898/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/robust-standard-errors-for-nonlinear.html#comment-form" title="23 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/8116206077919326898?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/8116206077919326898?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/bewp90bgidU/robust-standard-errors-for-nonlinear.html" title="Robust Standard Errors for Nonlinear Models" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>23</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/robust-standard-errors-for-nonlinear.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkYGQ3c5cSp7ImA9WhBUGUQ.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-7821305526418970881</id><published>2013-05-07T23:22:00.000-07:00</published><updated>2013-05-07T23:22:02.929-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-07T23:22:02.929-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="macroeconometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="New Zealand" /><category scheme="http://www.blogger.com/atom/ns#" term="History of econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Computing" /><title>Turn on the Economy</title><content type="html">&lt;div style="text-align: justify;"&gt;
"Turn on the economy". That's one of the invitations issued to (web) visitors to the &lt;b&gt;&lt;a href="http://www.rbnzmuseum.govt.nz/" target="_blank"&gt;museum&lt;/a&gt;&lt;/b&gt; of New Zealand's central bank - &lt;b&gt;&lt;a href="http://www.rbnz.govt.nz/" target="_blank"&gt;The Reserve Bank of New Zealand&lt;/a&gt;&lt;/b&gt;. Accepting this invitation will allow you to see a virtual version of Bill Phillips' famous &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/10/moniacal-economist.html" target="_blank"&gt;MONIAC computer&lt;/a&gt;&lt;/b&gt; at work, and to "play" with the economy yourself.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Doesn't that appeal to you?&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Bill Phillips - "&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/the-indiana-jones-of-economics.html" target="_blank"&gt;the Indiana Jones of Economics&lt;/a&gt;&lt;/b&gt;" - gave us "the Phillips Curve", of course. However, the &lt;a href="http://www.rbnz.govt.nz/about/museum/3121411.pdf" style="font-weight: bold;" target="_blank"&gt;MONIAC computer&lt;/a&gt;&amp;nbsp;was a revolutionary device that Bill used to demonstrate economic stabilization policy.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
If you happen to be visiting New Zealand's capital city - Wellington - you can &lt;b&gt;&lt;a href="http://www.rbnzmuseum.govt.nz/visitus/FindUs.aspx" target="_blank"&gt;visit&lt;/a&gt;&lt;/b&gt; the Bank's (physical) museum, and see the &lt;b&gt;&lt;a href="http://www.rbnz.govt.nz/research/search/article.asp?id=4006" target="_blank"&gt;MONIAC&lt;/a&gt;&lt;/b&gt; "in the flesh".&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/tCoGfJfRfw8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/7821305526418970881/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/turn-on-economy.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/7821305526418970881?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/7821305526418970881?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/tCoGfJfRfw8/turn-on-economy.html" title="Turn on the Economy" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/turn-on-economy.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEIDQXY_cSp7ImA9WhBUGUQ.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-2842136452421161401</id><published>2013-05-07T20:59:00.002-07:00</published><updated>2013-05-07T22:56:10.849-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-07T22:56:10.849-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Continuous-time model" /><category scheme="http://www.blogger.com/atom/ns#" term="macroeconometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="New Zealand" /><category scheme="http://www.blogger.com/atom/ns#" term="History of econometrics" /><title>The Indiana Jones of Economics</title><content type="html">&lt;div style="text-align: justify;"&gt;
All students of economics have heard about The Phillips Curve in one of its forms or another. The Phillips Curve is named after A. W. H. (Bill) Phillips, a remarkable New Zealander who made a number of fundamental contributions. His work, undertaken largely at the London School of Economics, dealt with stabilization policy, and modelling in continuous time, to name just two topics.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Bill Phillips was quite a character, and his varied life has been amply documented in various places. &lt;b&gt;&lt;a href="http://en.wikipedia.org/wiki/William_Phillips_(economist)" target="_blank"&gt;His entry&lt;/a&gt;&lt;/b&gt; in Wikipedia is a useful starting point, and the &lt;b&gt;&lt;a href="http://www.eastonbh.ac.nz/1978/11/a_w_h_phillips_/" target="_blank"&gt;memorial piece&lt;/a&gt; &lt;/b&gt;written in 1978 by one of my former teachers, Brian Easton, is also a "must read" item.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Some time ago, I wrote about Bill in a post titled, "&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/10/moniacal-economist.html" target="_blank"&gt;A Moniacal Economist&lt;/a&gt;&lt;/b&gt;", in reference to his famous MONIAC machines. These were hydraulic analogue computers that could be used to demonstrate the workings of the macro-economy.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
In February of this year, the BBC Radio aired a piece about Bill Phillips, titled "The India Jones of Economics". In this 14-minute broadcast, Tim Hartford provides an interesting commentary of Bill's life and contributions to our discipline. You can download the broadcast - it's Episode 4, 6 February 2013 - &lt;b&gt;&lt;a href="http://www.bbc.co.uk/podcasts/series/thpop" target="_blank"&gt;from here&lt;/a&gt;&lt;/b&gt;.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
More about Bill Phillips at a later date..............&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/_tG0vIGRGxQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/2842136452421161401/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/the-indiana-jones-of-economics.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/2842136452421161401?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/2842136452421161401?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/_tG0vIGRGxQ/the-indiana-jones-of-economics.html" title="The Indiana Jones of Economics" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/the-indiana-jones-of-economics.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUUEQXY5eyp7ImA9WhBUGEU.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-3195752571004520499</id><published>2013-05-06T16:10:00.000-07:00</published><updated>2013-05-06T16:33:20.823-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-06T16:33:20.823-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Instrumental variables" /><category scheme="http://www.blogger.com/atom/ns#" term="Forecasting" /><category scheme="http://www.blogger.com/atom/ns#" term="CPI" /><category scheme="http://www.blogger.com/atom/ns#" term="Weak Instruments" /><category scheme="http://www.blogger.com/atom/ns#" term="Regression models" /><category scheme="http://www.blogger.com/atom/ns#" term="Granger causality" /><title>My Recent Reading</title><content type="html">Here are some of the papers that I have been reading in the past few days:&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;&lt;b&gt;&lt;a href="http://research.barcelonagse.eu/tmp/working_papers/682.pdf" target="_blank"&gt;Majid M. Al-Sadoon&lt;/a&gt;&lt;/b&gt;, 2013. Geometric and long run aspects of Granger causality. Discussion Paper, Barcelona Graduate School of Economics.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;&lt;a href="http://papers.tinbergen.nl/13047.pdf" target="_blank"&gt;David Ardia &amp;amp; Lennart Hoogerheide&lt;/a&gt;&lt;/b&gt;, 2013. GARCH models for daily stock returns: Impact of estimation frequency on value-at-risk and expected shortfall forecasts. Tinbergen Institute Discussion Paper.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;&lt;a href="http://www.sciencedirect.com/science/article/pii/S0304407612002205" target="_blank"&gt;Otilia Boldia &amp;amp; Alastair R. Hall&lt;/a&gt;&lt;/b&gt;, 2013. Estimation and inference in unstable nonlinear least squares models. &lt;i&gt;Journal of Econometrics&lt;/i&gt;, 172, 158-167.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;&amp;nbsp;&lt;b&gt;&lt;a href="http://ideas.repec.org/p/hst/ghsdps/gd12-295.html" target="_blank"&gt;Kazuhito Higa&lt;/a&gt;&lt;/b&gt;, 2013. Estimating upward bias in the Japanese CPI using Engel's law. Working Paper, Hitotsubashi University.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;&lt;a href="http://pe.cemi.rssi.ru/pe_2013_1_117-131.pdf" target="_blank"&gt;Anna Mikusheva&lt;/a&gt;&lt;/b&gt;, 2013. Survey on statistical inferences in weakly identified instrumental variables models.&amp;nbsp;&lt;i&gt;Applied Econometrics&lt;/i&gt;, 29, 117-131.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/me3FHcl73yc" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/3195752571004520499/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/my-recent-reading.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3195752571004520499?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3195752571004520499?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/me3FHcl73yc/my-recent-reading.html" title="My Recent Reading" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/my-recent-reading.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkYNRXg5fSp7ImA9WhBUGEU.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-4017638075221254131</id><published>2013-05-06T15:42:00.005-07:00</published><updated>2013-05-06T15:43:14.625-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-06T15:43:14.625-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><title>Econometrics Lectures on YouTube</title><content type="html">&lt;div style="text-align: justify;"&gt;
I'm always keeping my eyes open for new or different resources that I can integrate into my Economic Statistics and Econometrics courses. For example, as I've mentioned before in previous posts (&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2011/08/interactive-statistics-wolframs-cdf.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt; and &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/04/a-first-encounter-with-monte-carlo.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;), I've been really pleased with what I've been able to achieve with Wolfram's cdf files.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
In my undergrad. Statistical Inference course I also refer the students to some of the excellent mini-lectures by &lt;a href="http://www.keithbower.com/" style="font-weight: bold;" target="_blank"&gt;Keith Bower&lt;/a&gt;. I find his presentations to be clear and (very importantly) accurate.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
If you check out YouTube you'll find a number of video presentations relating to the teaching of econometrics. To be honest, many of them don't particularly impress me. Maybe I;m just hard to please!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
There are some exceptions to this, though, including David Hendry's 20111 lecture on&amp;nbsp;&lt;a href="http://www.youtube.com/watch?v=We12iDwwYEE" style="font-weight: bold;" target="_blank"&gt;Teaching Undergraduate Economics at Oxford&lt;/a&gt;, and the great series of videos of &lt;b&gt;&lt;a href="http://pages.uoregon.edu/mthoma/" target="_blank"&gt;Mark Thoma&lt;/a&gt;&lt;/b&gt; in action &lt;a href="http://www.youtube.com/MarkThoma" style="font-weight: bold;" target="_blank"&gt;in the classroom&lt;/a&gt;.&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/GKBwKXk7swU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/4017638075221254131/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/econometrics-lectures-on-youtube.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/4017638075221254131?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/4017638075221254131?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/GKBwKXk7swU/econometrics-lectures-on-youtube.html" title="Econometrics Lectures on YouTube" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/econometrics-lectures-on-youtube.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04GSHo7eyp7ImA9WhBUGEo.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-3291959254987374542</id><published>2013-05-06T14:32:00.001-07:00</published><updated>2013-05-06T14:32:09.403-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-06T14:32:09.403-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="PPP" /><category scheme="http://www.blogger.com/atom/ns#" term="Data" /><title>Burgernomics</title><content type="html">&lt;div style="text-align: justify;"&gt;
Looking through the papers that are "in press" at &lt;i&gt;&lt;b&gt;&lt;a href="http://www.journals.elsevier.com/economics-letters/" target="_blank"&gt;Economics Letters&lt;/a&gt;&lt;/b&gt;&lt;/i&gt; today, I came across a paper by Anthony Landry, titled "&lt;b&gt;&lt;a href="http://ideas.repec.org/p/fip/feddgw/95.html" target="_blank"&gt;Borders and Big Macs&lt;/a&gt;&lt;/b&gt;". (The link is to the working paper version.) Here's the abstract:&lt;/div&gt;
&lt;blockquote class="tr_bq" style="text-align: justify;"&gt;
"I provide new estimates of border frictions for 14 countries using local, national, and international Big Mac prices. I ﬁnd that borders generally introduce only small price wedges, far smaller than those observed across New York City neighboring locations."&lt;/blockquote&gt;
This led me to wonder just how many academic papers have been written using the well-known "Big Mac Index" (&lt;b&gt;&lt;a href="http://www.economist.com/blogs/graphicdetail/2013/01/daily-chart-18" target="_blank"&gt;BMI&lt;/a&gt;&lt;/b&gt;) that's published annually by &lt;i&gt;&lt;b&gt;&lt;a href="http://www.economist.com/" target="_blank"&gt;The Economist&lt;/a&gt;&lt;/b&gt;&lt;/i&gt; magazine. I don't know the exact answer, but there are 39 listed on RePEc's &lt;b&gt;&lt;a href="http://ideas.repec.org/" target="_blank"&gt;IDEAS&lt;/a&gt;&lt;/b&gt; site.&lt;br /&gt;
&lt;br /&gt;
That's a lot of burgers!&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/XhxTdxvcoYo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/3291959254987374542/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/burgernomics.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3291959254987374542?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3291959254987374542?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/XhxTdxvcoYo/burgernomics.html" title="Burgernomics" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/burgernomics.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C04ERXY4cSp7ImA9WhBUGEs.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-5048508824028156027</id><published>2013-05-06T09:48:00.000-07:00</published><updated>2013-05-06T10:38:24.839-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-06T10:38:24.839-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="SHAZAM" /><category scheme="http://www.blogger.com/atom/ns#" term="Humour" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="History of econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Computing" /><category scheme="http://www.blogger.com/atom/ns#" term="OLS" /><title>A Visual Proof That OLS is BLU</title><content type="html">&lt;div style="text-align: justify;"&gt;
Back in the day (as they say), we had monochrome monitors on our P.C.'s. Do you remember the ghastly green or weird amber colours? Then, one bright day everything became multi-coloured! This is not just me reminiscing - this is leading up to an innovative proof of the Gauss-Markhov Theorem. Honestly!&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
In a &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/the-frequent-regressors-club.html" target="_blank"&gt;post yesterday&lt;/a&gt;&lt;/b&gt;, I mentioned Ken White's sense of humour - that's Ken White, "The &lt;b&gt;&lt;a href="http://econometrics.com/" target="_blank"&gt;SHAZAM&lt;/a&gt;&lt;/b&gt; Man", as my kids used to affectionately call him. On one of his many visits in the late 1980's, Ken offered to give a talk to a group of students about using the SHAZAM econometrics package. (We had no money for software at the time, but thanks to Ken's outstanding generosity we always had the latest version of his package for everyone to use.)&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
There were (and probably still are) some hidden tricks in SHAZAM. For instance, if you entered the command "USER KEN", then all sorts of extra things were available to you. For instance, the subsequent commands became case-sensitive. Another command that wasn't widely known was "SET COLOR", which could be rescinded with "SET NOCOLOR". With this option toggled on, the background colour on the monitor changed as certain SHAZAM commands were executed.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I must admit, this got a little hard on the eyes after a while, which might account for my declining vision! However, you have to remember that we didn't have the internet to amuse us, and it really didn't take too much to get us excited - technologically speaking! So, if you issued the command, "READ X", to indicate that a series of data was to be &lt;i&gt;&lt;span style="color: red;"&gt;read&lt;/span&gt;&lt;/i&gt;, the background colour on the monitor turned &lt;i&gt;&lt;span style="color: red;"&gt;red&lt;/span&gt;.&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
So, Ken got started on his promo-talk about SHAZAM. The students got right into it. Then he said, "You know, the great thing about SHAZAM is that unlike the competing packages, you can actually use it to prove some important theorems." Now the class got visibly excited!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
"For instance", said Ken. "Let's prove the Gauss-Markhov Theorem". He keyed in the command &lt;b&gt;OLS Y &amp;nbsp;X&lt;/b&gt;. As the results began to appear on the screen, the background colour of the monitor turned very bright &lt;span style="color: blue;"&gt;blue. &lt;/span&gt;in response to the use of the OLS command. The colour was so bad that you could hardly see the results.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Dead-pan, Ken said, "And that, my friends, is how you prove that the OLS estimator is &lt;span style="color: blue;"&gt;BLU&lt;/span&gt;!"&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/WBGLg8xAr_4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/5048508824028156027/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/a-visual-proof-that-ols-is-blu.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5048508824028156027?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5048508824028156027?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/WBGLg8xAr_4/a-visual-proof-that-ols-is-blu.html" title="A Visual Proof That OLS is BLU" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/a-visual-proof-that-ols-is-blu.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C04AQnc8eyp7ImA9WhBUGEs.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-6008135171313870029</id><published>2013-05-05T12:07:00.002-07:00</published><updated>2013-05-06T10:39:03.973-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-06T10:39:03.973-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="SHAZAM" /><category scheme="http://www.blogger.com/atom/ns#" term="Humour" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="History of econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Computing" /><title>The Frequent Regressor Club</title><content type="html">&lt;div style="text-align: justify;"&gt;
My friend, Ken White, developed the &lt;b&gt;&lt;a href="http://econometrics.com/" target="_blank"&gt;SHAZAM&lt;/a&gt;&lt;/b&gt; econometrics package in 1977. Ken's a funny guy - that's to say, he has a &lt;i&gt;great&lt;/i&gt; sense of humour.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
On one of his many visits to Christchurch, New Zealand (when I was living there, many years ago) he gave me a wooden die that he'd had an artisan carve at the local Arts Centre in Chistchurch. On each of the six faces he'd had the guy put the name of an econometrics/statistics package - TSP, LIMDEP, GAUSS, RATS, and SHAZAM. Yes, I know that's only 5 names. The thing was, SHAZAM appeared on &lt;i&gt;two&lt;/i&gt;&amp;nbsp;of the faces! The idea was to roll the die to decide which package to use in your lab. class. I still have the die - much to the occasional bemusement of students who see it on my desk.&lt;br /&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;br /&gt;
Ken also kindly gave me a very early edition of the &lt;i&gt;Captain Marvel&lt;/i&gt; comic - the one where the &lt;b&gt;&lt;a href="http://en.wikipedia.org/wiki/Shazam_(character)" target="_blank"&gt;meaning of the acronym,&lt;/a&gt;&lt;/b&gt; SHAZAM, is revealed to the readers. That comic is still in my office too. There's going to be a heck of a garage sale in the department when I retire!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Some of you will recall the manuals that went with earlier versions of SHAZAM. There was an "Appendix" at the back, titled &lt;i&gt;Frequent Regressor Club&lt;/i&gt;. At that time, Ken travelled A LOT. He was an avid collector of frequent flyer points with United Airlines. He had so many that he hardly knew what to do with them. This was the inspiration for the &lt;i&gt;Frequent Regressor Club&lt;/i&gt;.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Readers of the manual were told that they could collect points for every OLS regression that they ran. They just had to mail a copy of the printout to Ken. (This was pre-PC days.) Nonlinear regressions earned extra points; and step-wise regressions weren't allowed!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
If you earned enough points you could apparently get a free upgrade of SHAZAM. It was just a spoof, but Ken was constantly stunned by the number of people who sent him their printouts!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Those were the days!&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/xXNuQfxxFJM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/6008135171313870029/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/the-frequent-regressors-club.html#comment-form" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6008135171313870029?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6008135171313870029?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/xXNuQfxxFJM/the-frequent-regressors-club.html" title="The Frequent Regressor Club" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/the-frequent-regressors-club.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0MNQngyfCp7ImA9WhBUFkU.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-5098347125660693055</id><published>2013-05-04T08:31:00.002-07:00</published><updated>2013-05-04T08:31:33.694-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-04T08:31:33.694-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Hypothesis testing" /><category scheme="http://www.blogger.com/atom/ns#" term="Granger causality" /><title>Granger Causality Testing Done Properly</title><content type="html">&lt;div style="text-align: justify;"&gt;
I enjoy following David Stern's &lt;i style="font-weight: bold;"&gt;&lt;a href="http://stochastictrend.blogspot.ca/" target="_blank"&gt;Stochastic Trend&lt;/a&gt; &lt;/i&gt;blog.&amp;nbsp;David is Research Director at the &lt;b&gt;&lt;a href="https://crawford.anu.edu.au/" target="_blank"&gt;Crawford School of Public Policy&lt;/a&gt;&lt;/b&gt; at the Australian National University. He's an energy and environmental economist who does some really interesting work - not my field at all, but I always enjoy reading what he has to say.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
In his latest &lt;b&gt;&lt;a href="http://stochastictrend.blogspot.ca/2013/05/update.html" target="_blank"&gt;blog post&lt;/a&gt;&lt;/b&gt;, David links to a recent paper that he's co-authored with Robert Kaufman, from Boston University. &lt;b&gt;&lt;a href="http://econapps-in-climatology.webs.com/Stern&amp;amp;Kaufmann_Guelph.pdf" target="_blank"&gt;The paper&lt;/a&gt;&lt;/b&gt;&amp;nbsp;is titled, "Robust Granger Causality Testing of the Effect of Natural and Anthropogenic Radiative Forcings on Global Temperature".&amp;nbsp;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
As I said, this isn't my field. However, if you want to see an example of &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/01/granger-causality.html" target="_blank"&gt;Granger causality&lt;/a&gt;&lt;/b&gt; testing done well, you should take a look at this well-written paper.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Nice one!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/6x7r8HoeGj0" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/5098347125660693055/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/granger-causality-testing-done-properly.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5098347125660693055?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/5098347125660693055?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/6x7r8HoeGj0/granger-causality-testing-done-properly.html" title="Granger Causality Testing Done Properly" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/granger-causality-testing-done-properly.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C08NR38-fSp7ImA9WhBUFkg.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-6982299593876292092</id><published>2013-05-03T20:37:00.000-07:00</published><updated>2013-05-04T00:18:16.155-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-04T00:18:16.155-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Goodness of fit" /><category scheme="http://www.blogger.com/atom/ns#" term="Regression models" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Hypothesis testing" /><category scheme="http://www.blogger.com/atom/ns#" term="OLS" /><title>When Will the Adjusted R-Squared Increase?</title><content type="html">&lt;div style="text-align: justify;"&gt;
The coefficient of determination (R&lt;sup&gt;2&lt;/sup&gt;) and t-statistics have been the subjects of two of my posts in recent days (&lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/good-old-r-squared.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt; and &lt;b&gt;&lt;a href="http://davegiles.blogspot.ca/2013/05/when-can-regression-coefficients-change.html" target="_blank"&gt;here&lt;/a&gt;&lt;/b&gt;). There's another related result that a lot of students don't seem to get taught. This one is to do with the behaviour of the "adjusted" R&lt;sup&gt;2&lt;/sup&gt; when variables are added to or deleted from an OLS regression model.&lt;br /&gt;
&lt;br /&gt;
We all know, and it's trivial to prove, that the addition of &lt;i&gt;any&lt;/i&gt;&amp;nbsp;variable to such a regression model cannot decrease the R&lt;sup&gt;2&lt;/sup&gt; value. In fact, R&lt;sup&gt;2&lt;/sup&gt; will increase with such an addition to the model in general, and it will stay the same if the added variable is totally uncorrelated with the other variables in the model. Conversely, deleting &lt;i&gt;any&lt;/i&gt;&amp;nbsp;regressor from an OLS regression model will &lt;i&gt;reduce&lt;/i&gt;&amp;nbsp;the value of R&lt;sup&gt;2&lt;/sup&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;a name='more'&gt;&lt;/a&gt;Indeed, this is precisely why various "adjusted" R&lt;sup&gt;2&lt;/sup&gt; measures have been suggested over the years. You can boost the goodness-of-fit of the model by throwing anything into the regression, whether it makes economic sense or not.&lt;br /&gt;
&lt;br /&gt;
The adjusted R&lt;sup&gt;2&lt;/sup&gt; that we typically use involves "correcting" both the numerator and denominator sums of squares (in the usual R&lt;sup&gt;2&lt;/sup&gt; formula) for the appropriate degrees of freedom. If the sample size is "n", and the model includes "k" regressors (including the intercept) this adjusted R&lt;sup&gt;2&lt;/sup&gt; can be expressed as:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; = 1 - [(n - 1) / (n - k)][1 - R&lt;sup&gt;2&lt;/sup&gt;] ,&lt;br /&gt;
&lt;br /&gt;
and it can be shown that R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; ≤ R&lt;sup&gt;2&lt;/sup&gt; ≤ 1. The &lt;i&gt;adjusted&lt;/i&gt; R&lt;sup&gt;2&lt;/sup&gt; can take negative values, and this will occur if and only if R&lt;sup&gt;2&lt;/sup&gt; ≤ [(k - 1) / (n - 1)].&lt;br /&gt;
&lt;br /&gt;
Now, what can we say about the behaviour of R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; if we add regressors or delete them from the model? The adjusted R&lt;sup&gt;2&lt;/sup&gt; may increase or decrease (or stay the same) when we do this, and there are some simple conditions that determine which will occur.&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;The first result is that adding a regressor will increase (decrease) R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; depending on whether the &lt;u&gt;absolute value&lt;/u&gt; of the t-statistic associated with that regressor is greater (less) than one in value. R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; is unchanged if that absolute t-statistic is exactly equal to one.&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
If you drop a regressor from the model, the converse of the above result applies. Dropping a regressor amounts to imposing a (zero) restriction on its coefficient. If you square the t-statistic, you get an F-statistic, and it's exactly the F-statistic for testing if the single linear restriction is valid. Not surprisingly, then, there's a more general result than the one given above - one that applies to a situation where several regressors are simultaneously added to or dropped from the model.&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;Adding a group of regressors to the model will increase (decrease) R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt; depending on whether the F-statistic for testing that there coefficients are all zero is greater (less) than one in value.&amp;nbsp;R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;&amp;nbsp;is unchanged if that &amp;nbsp;F-statistic is exactly equal to one.&lt;/i&gt;&lt;br /&gt;
&lt;i&gt;&lt;br /&gt;&lt;/i&gt;
So, you can increase the adjusted coefficient of determination by adding regressors that are statistically insignifcant, but the situation isn't quite as bad as with the usual (uadjusted) R&lt;sup&gt;2&lt;/sup&gt;.&lt;br /&gt;
&lt;br /&gt;
Finally, the second result given above generalizes to the case where we are considering &lt;i&gt;any &lt;/i&gt;set of linear restrictions on the regression coefficients - not just zero restrictions.&lt;br /&gt;
&lt;br /&gt;
&lt;div&gt;
In summary, and not too surprisingly, the behaviour of the adjusted coefficient of determination as we add or delete regressors is quite systematic. If you're a student who's hoping that &lt;i&gt;deleting&lt;/i&gt; a regressor with a t-statistic of 1.1 will &lt;i&gt;increase&lt;/i&gt; the value of R&lt;sub&gt;A&lt;/sub&gt;&lt;sup&gt;2&lt;/sup&gt;, think again!&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles
&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/7SCENQfrd7A" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/6982299593876292092/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/when-will-adjusted-r-squared-increase.html#comment-form" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6982299593876292092?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6982299593876292092?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/7SCENQfrd7A/when-will-adjusted-r-squared-increase.html" title="When Will the Adjusted R-Squared Increase?" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/when-will-adjusted-r-squared-increase.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0IMSHg6eip7ImA9WhBUFkk.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-6934108825323674442</id><published>2013-05-03T10:25:00.000-07:00</published><updated>2013-05-03T21:26:29.612-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-03T21:26:29.612-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="Data" /><title>Mark Thoma on "Replication"</title><content type="html">&lt;div style="text-align: justify;"&gt;
Yesterday, in his &lt;b&gt;&lt;a href="http://economistsview.typepad.com/economistsview/" target="_blank"&gt;Economist's View&lt;/a&gt;&lt;/b&gt; blog, &lt;b&gt;&lt;a href="http://pages.uoregon.edu/mthoma/" target="_blank"&gt;Mark Thoma&lt;/a&gt;&lt;/b&gt; discussed the importance of &lt;b&gt;&lt;a href="http://economistsview.typepad.com/economistsview/2013/05/economics-needs-replication.html" target="_blank"&gt;replicating results&lt;/a&gt;&lt;/b&gt; in empirical economics. He's absolutely right, of course.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
I'll leave you to read what had to say, but I especially liked his closing passage:&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;blockquote class="tr_bq"&gt;
&lt;span style="font-family: inherit;"&gt;"One place where replication occurs regularly is assignments in graduate classes. I routinely ask students to replicate papers as part of their coursework. Even if they don't find explicit errors (and most of the time they don't), it almost always raises good questions about the research (why this choice, this model, what if you relax this assumption, there's a better way to do this,here's the next question to ask, etc., etc.). So replication does occur routinely in economics, and it is very valuable, but it is not a formal part of the profession the way it should be, and much of the replication is done by people (students) who generally assume that if they can't replicate something, it is probably their error. We have a lot of work to do on the replication front, and I want to encourage efforts like this."&lt;/span&gt;&lt;/blockquote&gt;
At least one of my colleagues also assigns replication exercises in this way, and I really should do the same. Fortunately, more journals are either recommending or requiring that data-sets be made available as a condition of publication. The &lt;b&gt;&lt;i&gt;&lt;a href="http://econ.queensu.ca/jae/" target="_blank"&gt;Journal of Applied Econometrics&lt;/a&gt;&lt;/i&gt;&lt;/b&gt; is one such journal, and we've recently been pushing in that direction with the &lt;i&gt;&lt;b&gt;&lt;a href="http://ideas.repec.org/s/taf/jitecd.html" target="_blank"&gt;Journal of International Trade &amp;amp; Economic Development&lt;/a&gt;&lt;/b&gt;&lt;/i&gt;.&lt;br /&gt;
&lt;br /&gt;
This should become part of our culture.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/r9PnYsOh-Zs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/6934108825323674442/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/mark-thoma-on-replication.html#comment-form" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6934108825323674442?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/6934108825323674442?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/r9PnYsOh-Zs/mark-thoma-on-replication.html" title="Mark Thoma on &quot;Replication&quot;" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/mark-thoma-on-replication.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A04FSHk6fCp7ImA9WhBUFUQ.&quot;"><id>tag:blogger.com,1999:blog-2198942534740642384.post-3958574395205072657</id><published>2013-05-03T09:51:00.003-07:00</published><updated>2013-05-03T09:51:59.714-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-05-03T09:51:59.714-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Instrumental variables" /><category scheme="http://www.blogger.com/atom/ns#" term="Regression models" /><category scheme="http://www.blogger.com/atom/ns#" term="Teaching econometrics" /><category scheme="http://www.blogger.com/atom/ns#" term="OLS" /><title>When Can Regression Coefficients Change Sign?</title><content type="html">&lt;div style="text-align: justify;"&gt;
Let's suppose that you've been running regressions happily all morning. It's sunny day, but what could be better than enjoying some honest-to-goodness econometrics? Suddenly, you notice that one of the estimated coefficients in your model has a sign that's the opposite to what you were expecting (from your vast knowledge of the underlying economics). Shock! Horror!&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Well. it's really good that you're on the look-out for that sort of thing. Congratulations! However, something has to be done about this problem.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Being young, with good eyesight, you also happen to spot something else that's interesting. One of the &lt;i&gt;other&lt;/i&gt; estimated coefficients has a very low t-statistic. You have a brilliant idea! If you delete the variable associated with the very small t-value, maybe the "wrong" sign on the first coefficient will be reversed. Is this possible?&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;/div&gt;
&lt;a name='more'&gt;&lt;/a&gt;&lt;br /&gt;
Sadly, no, it's not going to be that simple.&lt;br /&gt;
&lt;br /&gt;
Let's make sure that what I'm talking about is quite clear. Here are the results for a hypothetical regression, estimated by OLS, and with t-statistics in parentheses:&lt;br /&gt;
&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; y = 0.43 + 1.45X&lt;sub&gt;1&lt;/sub&gt; - 0.89X&lt;sub&gt;2&lt;/sub&gt; + residual &amp;nbsp; &amp;nbsp;; &amp;nbsp; n = 34 ; &amp;nbsp; R&lt;sup&gt;2&lt;/sup&gt; = 0.89&lt;br /&gt;
&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; (1.45) &amp;nbsp; (0.66) &amp;nbsp; &amp;nbsp;(-1.83)&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: justify;"&gt;
You were expecting a &lt;i&gt;positive &lt;/i&gt;&amp;nbsp;coefficient for X&lt;sub&gt;2&lt;/sub&gt;. If you drop X&lt;sub&gt;1&lt;/sub&gt; from the regression, and re-estimate the model by OLS, could the sign of the coefficient of X&lt;sub&gt;2&lt;/sub&gt; become positive? No - that's impossible.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
Leamer (1975) proved that such a change in sign &amp;nbsp;&lt;i&gt;cannot occur&lt;/i&gt; if the absolute value of the t-statistic for the variable you're deleting is &lt;i&gt;less&lt;/i&gt;&amp;nbsp;than the absolute value of the t-statistic for the variable whose sign you're interested in. That's the situation we have in the example above.&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
On the other hand, if we were to delete X&lt;sub&gt;2&lt;/sub&gt; from the above model, its &lt;i&gt;possible&lt;/i&gt;&amp;nbsp;for the sign of the coefficient for X&lt;sub&gt;1&lt;/sub&gt; to change.&lt;br /&gt;
&lt;br /&gt;
A number of extensions/generalizations of this result are also available, including:&lt;br /&gt;
&lt;br /&gt;
&lt;ul&gt;
&lt;li&gt;Leamer's necessary condition, stated above, was extended to include a sufficient condition by Visco (1978).&lt;/li&gt;
&lt;li&gt;Leamer's result was re-stated in a somewhat simpler form by Oksanen (1987).&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Leamer's result was generalized by McAleer &lt;i&gt;et al&lt;/i&gt;. (1986) to apply to cases where the deleted variables are combined in arbitrary linear combinations. See, also, Visco (1988).&lt;/li&gt;
&lt;li&gt;All of the above results were shown by Gikles (1989) to hold if the model is estimated by &lt;i&gt;any Instrumental Variables estimator&lt;/i&gt;, rather than by OLS.&lt;/li&gt;
&lt;/ul&gt;
&lt;div&gt;
This last extension is based on the algebraic relationship between the OLS and (generalized) I.V. estimators, and the results on restricted I.V. estimation given by Giles (1982).&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;i&gt;By the way - HT to a former student of mine, Darren Gibbs, who asked some interesting questions that led me to write the 1989 paper referenced below. I never teach a course without learning something new!&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;b&gt;References&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;&lt;/b&gt;
&lt;br /&gt;
Giles, D. E. A., 1982. Instrumental variables estimation with linear restrictions. &lt;i&gt;Sankhya: The Indian Journal of Statistics&lt;/i&gt;, B, 44, 343-350.&lt;br /&gt;
&lt;br /&gt;
&lt;div&gt;
Giles, D.E.A., 1989. Coefficient sign changes when restricting regression models under instrumental &amp;nbsp;variables Estimation.&amp;nbsp;&lt;i&gt;Oxford Bulletin of Economics and Statistics&lt;/i&gt;, 51, 465-467.&lt;/div&gt;
&lt;div&gt;
&lt;/div&gt;
&lt;br /&gt;
Leamer, E. E., 1975. A result on the sign of restricted least-squares estimates. &lt;i&gt;Journal of Econometrics&lt;/i&gt;, 3, 387-390.&lt;br /&gt;
&lt;br /&gt;
McAleer, M., A. Pagan, &amp;amp; I. Visco, 1986. A further result on the sign of restricted least-squares estimates. &lt;i&gt;Journal of Econometrics&lt;/i&gt;, 32, 287-290.&lt;br /&gt;
&lt;br /&gt;
Oksanen, E. H., 1987. On sign changes upon deletion of a variable in linear regression analysis. &lt;i&gt;Oxford Bulletin of Economics and Statistics&lt;/i&gt;, 49, 227-229.&lt;br /&gt;
&lt;br /&gt;
Visco, I., 1978. On obtaining the right sign of a coefficient estimate by omitting a variable from the regression. &lt;i&gt;Journal of Econometrics&lt;/i&gt;, 7, 115-117.&lt;br /&gt;
&lt;br /&gt;
Visco, I., 1988. Again on sign changes upon deletion of a variable from a linear regression. &lt;i&gt;Oxford Bulletin of Economics and Statistics&lt;/i&gt;, 50, 225-227.&lt;br /&gt;
&lt;div style="font-weight: bold;"&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div style="text-align: justify;"&gt;
&lt;br /&gt;
&lt;br /&gt;&lt;/div&gt;
&lt;div style="text-align: center;"&gt;
© 2013, David E. Giles&lt;/div&gt;
&lt;img src="http://feeds.feedburner.com/~r/blogspot/jjOHE/~4/OwE-935JLzQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://davegiles.blogspot.com/feeds/3958574395205072657/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://davegiles.blogspot.com/2013/05/when-can-regression-coefficients-change.html#comment-form" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3958574395205072657?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/2198942534740642384/posts/default/3958574395205072657?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/jjOHE/~3/OwE-935JLzQ/when-can-regression-coefficients-change.html" title="When Can Regression Coefficients Change Sign?" /><author><name>Dave Giles</name><uri>http://www.blogger.com/profile/05389606956062019445</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="31" src="http://4.bp.blogspot.com/-jWo0jl4VqKA/TVlhA-ht3xI/AAAAAAAAAAM/00hY8YOzvQY/s220/giles.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://davegiles.blogspot.com/2013/05/when-can-regression-coefficients-change.html</feedburner:origLink></entry></feed>
