###---------------------------------------------------------------------------- ### Detecting multicollinearity with Longley Data (Table 10.8) ###---------------------------------------------------------------------------- ## Load the data longley <- read.table("Table_10_8.csv", header = TRUE) ## Simple regression longleyLM <- lm(Y~X1+X2+X3+X4+X5, data = longley) summary(longleyLM) # Hight R2 but few variables are significant, what was the # problem? ## Check what went wrong with the covariates X <- as.matrix(longley[, 3:7]) # The covariates matrix n <- nrow(longley) k <- ncol(X) + 1 ## Check the correlations cor(X) # Correlation pairs(X) # Pairwise plot ## Check the eigenvalues longleyEigen <- eigen(t(X)%*%X) longleyEigenVec <- longleyEigen$values ## The conditional number max(longleyEigenVec)/min(longleyEigenVec) # what is your conclusion? ## Study the VIF betaVCOV <- vcov(longleyLM) # variance covariance matrix betaVAR <- diag(betaVCOV) sigma2hat <- sum((longleyLM$residuals)^2)/(n-k) VIF1 <- betaVAR[2]*sum((X[, 1]-mean(X[, 1]))^2)/sigma2hat VIF2 <- betaVAR[3]*sum((X[, 2]-mean(X[, 2]))^2)/sigma2hat VIF3 <- betaVAR[4]*sum((X[, 3]-mean(X[, 3]))^2)/sigma2hat VIF4 <- betaVAR[5]*sum((X[, 4]-mean(X[, 4]))^2)/sigma2hat VIF5 <- betaVAR[6]*sum((X[, 5]-mean(X[, 5]))^2)/sigma2hat