###----------------------------------------------------------------------------
### Heteroscedasticity Example 11.10
###----------------------------------------------------------------------------

## Load the data and model it
rd <- read.table("Table_11_5.csv", header = TRUE)
X2 <- rd$SALES # The original regressors
X3 <- rd$PROFITS
n <- nrow(rd)
rdLM <- lm(RD~SALES, data = rd) # Run a linear model
uHat <- rdLM$residuals # Obtain its residuals

###----------------------------------------------------------------------------
### Detecting Hetroscedasticity via residual plots
###----------------------------------------------------------------------------

## I am going to make two subplots and put them in a 1-by-3 way
par(mfrow = c(1, 3))

## Plot residuals against fitted values
plot(rdLM$fitted.values, uHat^2, col = "red", pch = "@")

## Plot residuals against x
plot(rd$SALES, uHat^2, col = "blue", pch = 19)

## QQ plot
qqnorm(uHat)
qqline(uHat)

###----------------------------------------------------------------------------
### White's test
###----------------------------------------------------------------------------

## Step 1: obtain the residual which is the "uHat" we already have
T <- uHat^2

## Step 2: Run the auxiliary regression
## The covariates are from the original regressors, cross product of the
## regressors and higher power of the regressors
Z3 <- X2^2 # The 2nd power of the regressors
rdAux <- lm(T~X2+Z3)

## Step 3: Under the null hypothesis, sample size multiplies R squared follows
## chi squared distribution with degrees of freedom equals to number of
## regressors (excluding intercept)

R2 <- summary(rdAux)$r.squared
White <- n*R2 # The white statistic with two degrees of freedom

## Step 4 The decision rule:
## H0: There is not heteroscedasticity
## Reject H0 if "White > Chi critical values"

## The critical values
qchisq(1-0.05, df = 2) # under 95% significance level
qchisq(1-0.1, df = 2) # under 90% significant level

## Or the p-value
pvalue <- 1- pchisq(q = White, df = 2)


###----------------------------------------------------------------------------
### Spearman's rank correlation test
###----------------------------------------------------------------------------

## The spearman's rank correlation coefficient
uHatAbs <- abs(uHat)
uHatAbsOrder <- order(uHatAbs)
XOrder <- order(X2)
rs <- 1-6*sum((uHatAbsOrder-XOrder)^2)/(n*(n^2-1))

## The corresponding t statistic
tObs <- rs*sqrt(n-2)/(1-rs^2)
df <- n-2 # two unknown parameters

## Make the decision
pvalue <- 1-pt(q = tObs, df = df)

###----------------------------------------------------------------------------
### Generalized least squared
###----------------------------------------------------------------------------

## The usual OLS with two covariates,
## Now let's assume variance is correlated with X2.
## To check this, see slide 18.

## Then OLS will be like this
summary(lm(Y~X2+X3))

## Rearrange the data
Y2 <- Y/X2
S2 <- 1/X2
S3 <- X3/X2

## Just simple regression without intercept
GLS <- lm(Y2~0+S2+S3)

summary(GLS)