GLS-R gru=ret30 <-read.table("C:/data/grunfeldInve.txt", header=T) summary(gru) Year Firm Invest Firmvalue PlaEqSto Min. :1935 Min. :1 Min. : 12.93 Min. : 191.5 Min. : 0.8 1st Qu.:1940 1st Qu.:2 1st Qu.: 55.27 1st Qu.: 718.6 1st Qu.: 85.7 Median :1945 Median :3 Median : 140.10 Median :1682.3 Median : 205.3 Mean :1945 Mean :3 Mean : 248.96 Mean :1922.2 Mean : 311.1 3rd Qu.:1949 3rd Qu.:4 3rd Qu.: 419.18 3rd Qu.:2310.0 3rd Qu.: 343.1 Max. :1954 Max. :5 Max. :1486.70 Max. :6241.7 Max. :2226.3 reg=lm(Invest~Firmvalue+PlaEqSto, data=gru) summary(reg) library(car) hccm(reg) #this gives heteroscedasticity corrected std errors durbin.watson(reg) #assumes one lag durbin.watson(reg, max.lag=4) #gives 4 lags #following is described but does not work durbin.watson(reg, max.lag=4, alternative=c("two.sided", "positive", "negative")) library(nlme) greg=gls(Invest~Firmvalue+PlaEqSto, data=gru, groups=Firm Ex7.3 erng <-read.table("C:/dmck/earningsdat.asc", header=T) reg1=lm(y~d1+d2+d3-1,data=erng) #-1 forced the regr line thru origin since all are dummy summary(reg1) library(car) qq.plot(reg1) u=resid(reg1) u2=u^2 #Hetero-corrected OLS wt=1/ sqrt(u2) regw=lm(y~d1+d2+d3-1,data=erng, weight=wt) #if u compare the t values, they have gone down #efficiency has gone up summary(regw) qq.plot(regw) #quantile plot shows that density is less like Student's t than before #Ex7.3 in DMCK book reg2=lm(u2~d1+d2+d3-1,data=erng) summary(reg2) #durbin.watson(reg1) does not work for lack of memory acf(u, plot=F, lag.max=4) #prints autocorrelations # 0 1 2 3 4 #1.000 0.991 0.983 0.974 0.966 library(nlme) greg1=gls(y~d1+d2+d3-1,data=erng, corAR1) #does not work greg1=gls(y~d1+d2+d3-1,data=erng)#works but gives same as OLS cs1=corAR1(0.991) greg1=gls(y~d1+d2+d3-1,data=erng, cs1) #does not work ## Willam H. Greene, Econometric Analysis, 2nd Ed. ## Chapter 15 ## load data set, p. 411, Table 15.1 data(Investment) ## fit linear model, p. 412, Table 15.2 fm <- lm(RealInv ~ RealGNP + RealInt, data = Investment) summary(fm) ## visualize residuals, p. 412, Figure 15.1 plot(ts(residuals(fm), start = 1964), type = "b", pch = 19, ylim = c(-35, 35), ylab = "Residuals") sigma <- sqrt(sum(residuals(fm)^2)/fm$df.residual) ## maybe used df = 26 instead of 16 ?? abline(h = c(-2, 0, 2) * sigma, lty = 2) # above draws confidence band at + or - two times sigma and middle line at zero #lty means line type 2 vcovHC(fm, type="const") kernHAC(fm) vcovHAC(fm) kernHAC(fm) wt=weightsAndrews(fm) fmw=lm(RealInv ~ RealGNP + RealInt, data = Investment, weight=wt) if(require(lmtest)) { ## Newey-West covariances, Example 15.3 coeftest(fm, vcov = NeweyWest(fm, lag = 4)) ## Note, that the following is equivalent: coeftest(fm, vcov = kernHAC(fm, kernel = "Bartlett", bw = 5, prewhite = FALSE, adjust = FALSE)) ## Durbin-Watson test, p. 424, Example 15.4 dwtest(fm) ## Breusch-Godfrey test, p. 427, Example 15.6 #Gujarati data compensation=c(3396, 3787, 4013, 4014, 4146, 4241, 4387, 4538, 4843) productivity=c(9355, 8584, 7962, 8275, 8389, 9418, 9795, 10281, 11750) reg1=lm(compensation~productivity) summary(reg1) u=resid(reg1) u2=u^2 reg2=lm(I(log(u2))~I(log(productivity))) summary(reg2) #Since t values of this regression are small Rsq= 0.04872 only # Park test for hetero says that there is no hetero #Glejser test reg2=lm(I(abs(u))~productivity) summary(reg2) #very low R-square and t-values means no hetero by Glejser test reg2=lm(I(abs(u))~I(sqrt(productivity))) summary(reg2) #very low R-square and t-values means no hetero by Glejser test reg2=lm(I(abs(u))~I(1/productivity)) summary(reg2) #very low R-square and t-values means no hetero by Glejser test reg2=lm(I(abs(u))~I(1/ sqrt(productivity))) summary(reg2) #now NONlinear least squares for Glejser test #very low R-square and t-values means no hetero by Glejser test reg2=nls(formula=(I(abs(u))~sqrt(beta1+beta2*productivity)), start=list(beta1=.5,beta2=1)) summary(reg2) #very low R-square and t-values means no hetero by Glejser test reg2=nls(formula=(I(abs(u))~sqrt(beta1+beta2*I(productivity^2))), start=list(beta1=.5,beta2=1)) summary(reg2) #very low R-square and t-values means no hetero by Glejser test #cy <-read.table("C:/data/consumptionincomeGuj.txt", header=T) consum=c(55, 65, 70, 80, 79, 84, 98, 95, 90, 75, 74, 110, 113, 125, 108, 115, 140, 120, 145, 130, 152, 144, 175, 180, 135, 140, 178, 191, 137, 189) income=c(80, 100, 85, 110, 120, 115, 130, 140, 125, 90, 105, 160, 150, 165, 145, 180, 225, 200, 240, 185, 220, 210, 245, 260, 190, 205, 265, 270, 230, 250) reg1=lm(consum~income) summary(reg1) u=resid(reg1) u2=u^2 acf(u, plot=F, lag.max=4) #prints autocorrelations oo=order(income) consum2=consum[oo]#re-order that data w.r.t income variable income2=sort(income) reg1=lm(consum2~income2) summary(reg1) u=resid(reg1) u2=u^2 acu=acf(u, plot=F, lag.max=4) #prints autocorrelations print(acu) n=length(u) ut=u[2:n] utm1=u[1: (n-1)] cor.test(ut,utm1) #formal test for first order serial correlation #We can use toeplitz to construct the AR1 error covariance matrix #let us write a function to do this for us AR1cov=function(r,n) #function to create a covariance matrix with AR1 errors using toeplitz # input r=AR1 correlation coefficient, n =size of matrix (number of rows) {aa=0: (n-1); ar1s=r^aa; AR1cov=toeplitz(ar1s) list(AR1cov=AR1cov)# note we repeat the output name here } ar1c=AR1cov(cor(ut,utm1),n)$AR1cov #note the dollar and name to extract output print(ar1c[1:4,1:4]) consum3=consum2 #initialize place to keep the revised y and x of regression income3=income2 rho=0.18 #plausible smallest value of AR1 coeff. of interest while (rho<0.31) { omrhos=sqrt(1-rho^2) consum3[1]=omrhos*consum2[1] #adjustment to first observation of y income3[1]=omrhos*income2[1] #adjust first observation of x t=2 while (t<=n){ consum3[t]=consum2[t]-rho*consum2[t-1] #pseudo difference remaining ones income3[t]=income2[t]-rho*income2[t-1] t=t+1 } #end loop on t rss=sum(resid(lm(consum3~income3))^2) print(c("rho, resid sum of sq",rho,rss)) rho=rho+0.001 } #end rho loop #rho=0.202 seems to be have smallest rss #But minimizing resid sum of sq is NOT the same maximizing likelihood # or maximizing restricted likelihood #because the likelihood has a determinant of the error covariance matrix #Also, the ML or REML can directly maximize lkhd w.r.t. rho and regr coeff's GLS estimation with ML or REML library("nlme") fmGLS <- gls(consum2~income2, correlation = corAR1(), method = "ML") summary(fmGLS) fmGLS <- gls(consum2~income2, correlation = corAR1(), method = "REML") summary(fmGLS) fmGLS <- gls(consum2~income2, correlation = corARMA(p=1,q=0), method = "ML") summary(fmGLS) #verify that this gives same answer as AR1 fmGLS <- gls(consum2~income2, correlation = corARMA(p=2,q=0), method = "ML") summary(fmGLS) help(corClasses) #this lists all Available correlation structures: # corAR1: autoregressive process of order 1. # corARMA: autoregressive moving average process, with arbitrary orders # for the autoregressive and moving average components. # corCAR1: continuous autoregressive process (AR(1) process for a # continuous time covariate). #corCompSymm: compound symmetry structure corresponding to a constant # correlation. hetero Testing #Goldfeld Quandt test drop middle 4 observations # after ranking data w.r.t. income reg1=lm(consum2~income2) summary(reg1) reg1=lm(I(consum2[1:13])~I(income2[1:13])) summary(reg1) u1=resid(reg1) rss1=sum(u1^2) n=length(consum2) reg2=lm(I(consum2[18:n])~I(income2[18:n])) summary(reg2) u2=resid(reg2) rss2=sum(u2^2) #test stat has second regr in numerator lam=(rss2/reg2$df)/ (rss1/reg1$df) lam crit=qf(0.95, reg2$df, reg1$df) #this gives upper tail of F distribution at 5% crit #critical value for F test if(lam<=crit) print("Reject Homoscedasticity") if(lam>crit) print("Accept Homoscedasticity") #Breusch-Pagan-Godfrey test Auxiliary regress (u-hat-sq/ resid mean sq) # on Z regressors and half of this resid ss is Chisq with m-1 df #this is asymp test sensitive to normality assumption library(lmtest) bptest(consum~income) #studentized Breusch-Pagan test #BP = 5.2722, df = 1, p-value = 0.02167 agrees with Guj text compensation=c(3396, 3787, 4013, 4014, 4146, 4241, 4387, 4538, 4843) employmentsize=c(1,2,3,4,5,6,7,8,9) sdwages=c(743.70, 851.40, 727.80, 805.06, 929.90,1080.60,1243.20,1307.70,1112.50) y=compensation/sdwages x=employmentsize/sdwages reg1=lm(y~I(1/sdwages)+x-1) summary(reg1) > AR1cov(0.3,4) [,1] [,2] [,3] [,4] [1,] 1.000 0.30 0.09 0.027 [2,] 0.300 1.00 0.30 0.090 [3,] 0.090 0.30 1.00 0.300 [4,] 0.027 0.09 0.30 1.000