Matrix Approach to Simple Linear Regression in \(R\)

Read data from an URL

toluca <- read.table("http://www.cnachtsheim-text.csom.umn.edu/Kutner/Chapter%20%201%20Data%20Sets/CH01TA01.txt", sep ="" , header = FALSE)


#Look at the first 6 entries
head(toluca)
  V1  V2
1 80 399
2 30 121
3 50 221
4 90 376
5 70 361
6 60 224

Rename columns

colnames(toluca) <- c("lotSize", "hours")

#Look at the first 6 entries
head(toluca)
  lotSize hours
1      80   399
2      30   121
3      50   221
4      90   376
5      70   361
6      60   224

Making \(X\) and \(Y\) matrices

library(Matrix)
n <- nrow(toluca) # Number of observations
n
[1] 25
X <- toluca$lotSize
Y <- toluca$hours

Y <- as.matrix(Y) 
Y                       # This is your vector Y
      [,1]
 [1,]  399
 [2,]  121
 [3,]  221
 [4,]  376
 [5,]  361
 [6,]  224
 [7,]  546
 [8,]  352
 [9,]  353
[10,]  157
[11,]  160
[12,]  252
[13,]  389
[14,]  113
[15,]  435
[16,]  420
[17,]  212
[18,]  268
[19,]  377
[20,]  421
[21,]  273
[22,]  468
[23,]  244
[24,]  342
[25,]  323
X <- as.matrix(X)
X                       # Not quite the X matrix we need
      [,1]
 [1,]   80
 [2,]   30
 [3,]   50
 [4,]   90
 [5,]   70
 [6,]   60
 [7,]  120
 [8,]   80
 [9,]  100
[10,]   50
[11,]   40
[12,]   70
[13,]   90
[14,]   20
[15,]  110
[16,]  100
[17,]   30
[18,]   50
[19,]   90
[20,]  110
[21,]   30
[22,]   90
[23,]   40
[24,]   80
[25,]   70
X <- cbind(rep(1,n), X) # Adding a column of ones to make the X matrix
X                       # This is your X matrix
      [,1] [,2]
 [1,]    1   80
 [2,]    1   30
 [3,]    1   50
 [4,]    1   90
 [5,]    1   70
 [6,]    1   60
 [7,]    1  120
 [8,]    1   80
 [9,]    1  100
[10,]    1   50
[11,]    1   40
[12,]    1   70
[13,]    1   90
[14,]    1   20
[15,]    1  110
[16,]    1  100
[17,]    1   30
[18,]    1   50
[19,]    1   90
[20,]    1  110
[21,]    1   30
[22,]    1   90
[23,]    1   40
[24,]    1   80
[25,]    1   70

Find \(X'X\)

Find \(X'Y\)

Find \((X'X)^{-1}\)

Find \(\bf{b} = (X'X)^{-1}X'Y\)

Varify answer by Finding the LS estimates uing \(lm\) function

Section 5.11: Fitted Values and Residuals

Uses of inverse matrix

Consider the simultaneous equations: \[2y_1+4y_2 =20\] \[3y_1+y_2=10\] Using Matrix methods, find the solutions for \(y_1\) and \(y_2\).

library(Matrix)
X <- matrix(c(2, 4, 3, 1),ncol=2, nrow = 2, byrow=TRUE)
X
     [,1] [,2]
[1,]    2    4
[2,]    3    1
Z<- matrix(c(20,10),ncol =1,nrow=2,byrow=TRUE)
Z
     [,1]
[1,]   20
[2,]   10
# Find inverse of X matrix 
invX <- solve(X)
invX 
     [,1] [,2]
[1,] -0.1  0.4
[2,]  0.3 -0.2
#Multiply invX with Z matrix to find solutions
Y<- invX %*% Z
Y
     [,1]
[1,]    2
[2,]    4

\(y_1=2\) and \(y_2 =4\)