################################################################################# # To load this library: use source("https://hasthika.github.io/olsvspack.txt") # # Need library(ggplot2), library(leaps) # ################################################################################# olsafterlrsim <- function(n = 10, p = 4, k = 2, nruns = 100, eps = 0.1, shift = 9, psi = 0.0, type = 1, alpha = 0.05, pitype = 1){ # Needs library(glmnet). Does lasso, RR or Relaxed lasso. ##slow 10 fold CV, # Simulates RMSE for lasso if pitype = 1, ridge regression if pitype = 0, ralxed lasso for pitype = 2. # 1 <= k <= p-1, k is the number of nonnoise variables # Uses five iid error distributions: # type = 1 for N(0,1) errors, 2 for t5 errors, 3 for exp(1) - 1 errors # 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) # errors. # constant = 1 so there are p = q+1 coefficients # need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k+1 ones, p-k-1 zeroes # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. See Maronna and Zamar (2002). # cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j # when the correlation exists. # set.seed(974) ##need p>2 e_lr <- rep(NA, nruns) e_ols <- rep(NA, nruns) RMSE_lr <- 0 RMSE_ols <- 0 ols_coef2 <- rep(0, p) ols_coef_sum <- rep(0, p) lr_coef_sum <- rep(0, p) ols_coef_avg <- rep(0, p) lr_coef_avg <- rep(0, p) q <- p-1 rho <- (2*psi + (q-2)*psi^2)/(1 + (q-1)*psi^2) A <- matrix(psi,nrow=q,ncol=q) diag(A) <- 1 b <- 0 * 1:q b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1] indx <- 1:n for(i in 1:nruns) { x <- matrix(rnorm(n * q), nrow = n, ncol = q) x <- x %*% A xf <- rnorm(q) %*% A if(type == 1) { y <- 1 + x %*% b + rnorm(n) yf <- 1 + xf %*% b + rnorm(1) } if(type == 2) { y <- 1 + x %*% b + rt(n, df = 5) yf <- 1 + xf %*% b + rt(1, df = 5) } if(type == 3) { y <- 1 + x %*% b + rexp(n) - 1 yf <- 1 + xf %*% b + rexp(1) - 1 } if(type == 4) { y <- 1 + x %*% b + runif(n, min = -1, max = 1) yf <- 1 + xf %*% b + runif(1, min = -1, max = 1) } if(type == 5) { err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift) y <- 1 + x %*% b + err ef <- rnorm(1, sd = 1 + rbinom(1, 1, eps) * shift) yf <- 1 + xf %*% b + ef } #make an MLR data set # find the 10 fold CV lasso or ridge regression model when pitype == 1 || pitype == 0 if(pitype == 1 || pitype == 0){ out<-cv.glmnet(x, y, alpha = pitype) lam <- out$lambda.min out_coef <- predict(out,s = lam, type = "coefficients") yfhat <- predict(out ,s = lam, newx = xf) } # find the 10 fold CV relaxed lasso model when pitype == 2 if(pitype == 2){ rel_fit <- cv.glmnet(x, y, relax = TRUE) lam <- rel_fit$lambda.min out_coef <- predict(rel_fit, s = lam, type = "coefficients") yfhat <- predict(rel_fit, s = lam, newx = xf) } # Filter out the non-zero coefficients non_zero_indx <- which(out_coef[-1] != 0) x_ols <- x[, non_zero_indx] ols_mod <- lm(y~x_ols) # Apply OLS on the selected predictors ols_coef <- ols_mod$coefficients # For comparisons of coefficients non_zero_coef_indx <- which(out_coef != 0) ols_coef2[non_zero_coef_indx] <- ols_coef xf_ols <- xf[, non_zero_indx] xf_ols <- append(xf_ols, 1, after = 0) # modified xf so it will work with ols yfhat_ols <- xf_ols %*% ols_coef e_lr[i] <- (yf - yfhat)^2 e_ols[i] <- (yf - yfhat_ols)^2 # Compare coefficients ols_coef_sum <- ols_coef2 + ols_coef_sum lr_coef_sum <- out_coef + lr_coef_sum } # TRMSEs RMSE_lr <- sqrt(sum(e_lr)/nruns) RMSE_ols <- sqrt(sum(e_ols)/nruns) # Coef avgs ols_coef_avg <-ols_coef_sum/nruns lr_coef_avg <- lr_coef_sum/nruns # Minkowski distance diff_ols <- (sum( (abs(append(b, 1, after = 0) - ols_coef_avg))^p ))^(1/p) # Sum of the absolute differences between b and new ols coefficients estimates diff_lr <- (sum( (abs(append(b, 1, after = 0) - lr_coef_avg))^p ))^(1/p) # Sum of the absolute differences between b and l or r coefficients estimates list(RMSE_ols = RMSE_ols, RMSE_lr = RMSE_lr, diff_ols=diff_ols, diff_lr=diff_lr) } ################################################################################################### olsafterlrsimplots <- function(n = 50, p = 4, k = 2, nruns = 100, eps = 0.1, shift = 9, psi = 0.0, type = 1, alpha = 0.05, pitype = 1){ # Needs library(glmnet). Does lasso, RR or Relaxed lasso simulations and produces a sid-by-side boxplot. ##slow 10 fold CV, # Simulates RMSE for lasso if pitype = 1, ridge regression if pitype = 0, ralxed lasso for pitype = 2. # 1 <= k <= p-1, k is the number of nonnoise variables # Uses five iid error distributions: # type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors # 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) # errors. # constant = 1 so there are p = q+1 coefficients # need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k+1 ones, p-k-1 zeroes # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. See Maronna and Zamar (2002). # cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j # when the correlation exists. # set.seed(974) ##need p>2 e_lr <- rep(NA, nruns) e_ols <- rep(NA, nruns) RMSE_lr <- 0 RMSE_ols <- 0 ols_coef2 <- rep(0, p) ols_coef_sum <- rep(0, p) lr_coef_sum <- rep(0, p) ols_coef_avg <- rep(0, p) lr_coef_avg <- rep(0, p) q <- p-1 rho <- (2*psi + (q-2)*psi^2)/(1 + (q-1)*psi^2) A <- matrix(psi,nrow=q,ncol=q) diag(A) <- 1 b <- 0 * 1:q b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1] indx <- 1:n for(i in 1:nruns) { x <- matrix(rnorm(n * q), nrow = n, ncol = q) x <- x %*% A xf <- rnorm(q) %*% A if(type == 1) { y <- 1 + x %*% b + rnorm(n) yf <- 1 + xf %*% b + rnorm(1) } if(type == 2) { y <- 1 + x %*% b + rt(n, df = 3) yf <- 1 + xf %*% b + rt(1, df = 3) } if(type == 3) { y <- 1 + x %*% b + rexp(n) - 1 yf <- 1 + xf %*% b + rexp(1) - 1 } if(type == 4) { y <- 1 + x %*% b + runif(n, min = -1, max = 1) yf <- 1 + xf %*% b + runif(1, min = -1, max = 1) } if(type == 5) { err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift) y <- 1 + x %*% b + err ef <- rnorm(1, sd = 1 + rbinom(1, 1, eps) * shift) yf <- 1 + xf %*% b + ef } #make an MLR data set # find the 10 fold CV lasso or ridge regression model when pitype == 1 || pitype == 0 if(pitype == 1 || pitype == 0){ out<-cv.glmnet(x, y, alpha = pitype) lam <- out$lambda.min out_coef <- predict(out,s = lam, type = "coefficients") yfhat <- predict(out ,s = lam, newx = xf) } # find the 10 fold CV relaxed lasso model when pitype == 2 if(pitype == 2){ rel_fit <- cv.glmnet(x, y, relax = TRUE) lam <- out$lambda.min out_coef <- predict(rel_fit, s = lam, type = "coefficients") yfhat <- predict(rel_fit, s = lam, newx = xf) } # Filter out the non-zero coefficients non_zero_indx <- which(out_coef[-1] != 0) x_ols <- x[, non_zero_indx] ols_mod <- lm(y~x_ols) # Apply OLS on the selected predictors ols_coef <- ols_mod$coefficients # For comparisons of coefficients non_zero_coef_indx <- which(out_coef != 0) ols_coef2[non_zero_coef_indx] <- ols_coef xf_ols <- xf[, non_zero_indx] xf_ols <- append(xf_ols, 1, after = 0) # modified xf so it will work with ols yfhat_ols <- xf_ols %*% ols_coef e_lr[i] <- (yf - yfhat)^2 e_ols[i] <- (yf - yfhat_ols)^2 # Compare coefficients ols_coef_sum <- ols_coef2 + ols_coef_sum lr_coef_sum <- out_coef + lr_coef_sum } # Plot TRMSEs rmses <- data.frame(TRMSE = c(e_lr, e_ols), Method = c(rep("Other", nruns), rep("OLSAVS", nruns))) rmsePlot <- ggplot(data = rmses, aes(x= Method, y = TRMSE, fill = Method)) + geom_boxplot() + theme_bw() # TRMSEs RMSE_lr <- sqrt(sum(e_lr)/nruns) RMSE_ols <- sqrt(sum(e_ols)/nruns) # Coef avgs ols_coef_avg <-ols_coef_sum/nruns lr_coef_avg <- lr_coef_sum/nruns #nroot <- length(ols_coef_avg) # Minkowski distance diff_ols <- (sum( (abs(append(b, 1, after = 0) - ols_coef_avg))^p ))^(1/p) # Sum of the absolute differences between b and new ols coefficients estimates diff_lr <- (sum( (abs(append(b, 1, after = 0) - lr_coef_avg))^p ))^(1/p) # Sum of the absolute differences between b and l or r coefficients estimates list(RMSE_ols = RMSE_ols, RMSE_lr = RMSE_lr, diff_ols=diff_ols, diff_lr=diff_lr, rmsePlot = rmsePlot) } olsavs <- function(x, y, method){ # olsavs des the olsavs method # Needs library(glmnet). # need x as a model.matrix # y - matrix of predictors # y - response # Method - lasso, RR, Enet or Relaxed lasso (method = 1, method = 0, method = 0.5 or method = 2) # 10 fold CV # find the 10 fold CV lasso or ridge regression or enet model when method == 1 || method == 0 || method == 0.5 if(method == 1 || method == 0 || method == 0.5){ out<-cv.glmnet(x, y, alpha = method) lam <- out$lambda.min out_coef <- predict(out,s = lam, type = "coefficients") #yfhat <- predict(out ,s = lam, newx = xf) } # find the 10 fold CV relaxed lasso model when method == 2 if(method == 2){ rel_fit <- cv.glmnet(x, y, relax = TRUE) lam <- rel_fit$lambda.min out_coef <- predict(rel_fit, s = lam, type = "coefficients") #yfhat <- predict(rel_fit, s = lam, newx = xf) } # Filter out the non-zero coefficients non_zero_indx <- which(out_coef[-1] != 0) x_ols <- x[, non_zero_indx] ols_mod <- lm(y~x_ols) # Apply OLS on the selected predictors ols_coef <- ols_mod$coefficients outPut <- list(ols_mod = ols_mod, ols_coef = ols_coef, out_coef = out_coef) outPut$ols_mod } ################################################################################################### olsavsPredict <- function(model, newx){ # Does predictions for olsavs xtest <- newx mod <- model names(mod$coefficients) = gsub(pattern = "x_ols", replacement = "", x = names(mod$coefficients)) xtest1 <- xtest[, names(mod$coefficients[-1])] # good newxtest <- cbind(rep(1, nrow(xtest1)), xtest1) # Added a column of ones to make xtest a design matrix yhatTest <- newxtest %*% as.matrix(mod$coefficients) pred <- list(yhat = yhatTest) pred } ##################### Example ############################################### library(ISLR) library(lars) data(Auto) #set.seed(1111) ##################### Splitting and preparing data ############################ train = sample(c(TRUE, FALSE), size = 0.7*nrow(Auto), replace = TRUE) test <- (!train) xtrain <- model.matrix(mpg~., Auto)[train,] xtest <- model.matrix(mpg~., Auto)[test,] ytrain <- Auto$mpg[train] ytest <- Auto$mpg[test] y <- ytrain x <- xtrain ##################### Applying olsavs ######################################## #Method - lasso, RR, Enet or Relaxed lasso (method = 1, method = 0, method = 0.5 or method = 2) mod <- olsavs(x, y, method = 0.5) # Fitting the olsavs model mod summary(mod) # Predictions preds <- olsavsPredict(mod, xtest) # olsavsPredict: separate function for predictions # Test root mean square error for olsavs yhatTest <- preds$yhat RMSE_test_olsavs <- sqrt(mean((ytest - yhatTest)^2)) RMSE_test_olsavs ####################### lasso, ridge, relax, enet ############################### # Test root mean square error lasso, ridge, enet or relax (alpha = 1, 0, 0.5, relax: see below) # for relax lasso: cv.glmnet(x, y, relax = TRUE) out<-cv.glmnet(x, y, alpha = 0.5) lam <- out$lambda.min out_coef <- predict(out, s = lam, type = "coefficients") yfhat <- predict(out, s = lam, newx = xtest) RMSE_test_l <- sqrt(mean((ytest - yfhat)^2)) RMSE_test_l ########################## End Example ####################################