###---------------------------------------------------------------------------- ### Linear Regression with Matrices ###---------------------------------------------------------------------------- ## Prepare the data DataRaw <- read.table("Table_1_1.csv", header = TRUE) Y <- DataRaw[, 2, drop = TRUE] # obtain Y X0 <- DataRaw[, 4:5] # obtain the raw X XOld <- cbind(1, X0) # add an intercept X = as.matrix(XOld) ## The model ## Y = beta0 + beta1 X1 + beta2 X2 ## The regression coefficient betaHat <- solve(t(X)%*%X)%*%t(X)%*%Y # 3x1 vector ## The fitted values YHat <- X%*%betaHat ## The residuals uHat <- Y-YHat ## The estimated variance of error term n <- length(Y) # No. of observations p <- length(betaHat) # No. of free parameters df <- n-p sigma2Hat <- sum(uHat^2)/df # The variance of residuals sgimaHat <- sqrt(sigma2Hat) # The standard deviation ## Variance-Covariance of coefficient VarbetaHat <- sigma2Hat*solve(t(X)%*%X) ## The t statistic tObs <- (betaHat-0)/sqrt(diag(VarbetaHat)) ## RSS RSS <- sum(uHat^2) ## ESS ESS <- sum((YHat-mean(Y))^2) ## TSS TSS <- RSS + ESS ## R Squared R2 <- ESS/TSS ## Adjusted R Squared ajdR2 <- 1- (1-R2)*(n-1)/(n-p) ## F statistic FObs <- (ESS/(p-1))/(RSS/(n-p)) # with df (p-1) and (n-p) ## The Hat matrix HMatrix <- X%*%solve(t(X)%*%X)%*%t(X) all.equal(t(HMatrix), HMatrix) # Symmetric all.equal(HMatrix, HMatrix%*%HMatrix) # Idempotent sum(diag(HMatrix)) # trace of hat matrix, equals to no. of free independent parameters. ## Plot the residuals boxplot(uHat) ## QQ plot qqline(uHat) ## Save plot into pdf dev.copy2pdf(file = "QQplot.pdf")