################################################################################# ################################################################################# ### R code for ridge regression using glmnet() ### MOV task ### Help for glmnet ### http://web.stanford.edu/~hastie/Papers/Glmnet_Vignette.pdf ### Barbora Hladka, Martin Holub ### ESSLLI 2015 ### http://ufal.mff.cuni.cz/esslli2015 ################################################################################# ################################################################################# ################################################################################# mse.test <- function(true, predict) { mse.test <- mean((predict-true)^2) return(mse.test) } ############# ## load the package library(glmnet) ############# ## get the data source("load-mov-data.R") # replace missing imdb_rating with 0 examples$imdb_rating[which(is.na(examples$imdb_rating))] <- 0 ############# ## run 5-cross-validation # number of input folds k <- 5 f <- c(0,0,0) for(i in 1:k){ cv.test <- read.csv(paste ("cv.test.", i, ".csv", sep=""), sep="\t") # movie, user, fold No. f <- rbind(f, cbind(cv.test[,1:2], rep(i, nrow(cv.test)))) } ff <- data.frame(f[-1,]) names(ff) <- c("movie", "user", "fold") foldid <- (merge(examples, ff, by = intersect(names(ff), names(examples))))$fold ############# ## run 5-cross-validation ridge regression x <- model.matrix(rating~ age+occupation+genre_drama+imdb_rating, examples) y <- data.matrix(examples$rating) fit <- cv.glmnet(x, y, foldid=foldid, alpha=0) # explore fit fit$glmnet.fit fit$name fit$lambda print(fit) # plot fit plot(fit$glmnet.fit, "norm", label=TRUE) pdf("lin-reg-ridge-mov-path-lambda.pdf", width=8.5, height=5) plot(fit$glmnet.fit, "lambda", label=TRUE) dev.off() plot(fit$glmnet.fit, "dev", label=TRUE) # each curve corresponds to a feature # paths of them against the l2-norm # number of non-zero parameters above at a given lambda # cross-validation curve pdf("lin-reg-ridge-mov-cv-curve.pdf", width=8.5, height=5) plot(fit) dev.off() # mean cross-validation error fit$cvm # minimum cve min(fit$cvm) # lambda that gives minimum cve fit$lambda.min i <- which(fit$lambda == fit$lambda.min) # parameter values for lambda.min coef(fit, s = "lambda.min") # larger value of lambda whose misclassification error is 1 SE larger fit$lambda.1se coef(fit, s=fit$lambda.1se) # parameter values for lambda.min ridge <- coef(fit, s=fit$lambda.min) # parameter values for lambda = 0 # i.e. unregularized zero <- coef(fit, s = 0, exact=TRUE) cbind2(zero, ridge)