Category:Model Evaluation: Cross-validation

library(FNN) accuracy <- function(truth, prediction) { tbl <- table(truth, prediction) sum(diag(tbl))/sum(tbl) }        rankings <- read.csv("~/Google Drive/CS539Proj6/Datasets/rank2010reduced.csv", na.strings="NA")[-1] rankings[,-1] <- scale(rankings[,-1],center=TRUE,scale=TRUE) # Using k-fold cross validation: kn <- 7# used by the knn function to specify num of neighbors kfolds <- 10 # Number of folds for X-Validation index <- 1:nrow(rankings) index <- sample(index) ### shuffle index fold <- rep(1:kfolds, each=nrow(rankings)/kfolds)[1:nrow(rankings)] folds <- split(index, fold) ### create list with indices for each fold accuracies <- vector(mode="numeric") accs <- vector(mode="numeric") for(i in 1:length(folds)) { # Note that multiple algorithms can be tested inside this loop. # this is necessary if we are comparing more than one classification method # and want to make sure each method uses the same folds. cat("Calculating Fold: ",i,"\n") cl <- rankings$school[-folds[[i]]] timeToBuild <- system.time(results <- knn(rankings[-foldsi,], rankings[foldsi,], cl, k = kn, prob=FALSE,algorithm=c("brute"))) actualdata <- rankings$school[folds[[i]]] accs[i] <- accuracy(actualdata, results) if(i == 1) # Added confusion matrix confusionMatrix <- table(actualdata, results) else confusionMatrix <- confusionMatrix + table(actualdata, results) } confusionMatrix mean(accs) Cross-validation using 'caret' package: require(caret) # assign the dataset, put your own code here. Dataset of type data frame is recommended. dataset = read.csv (...) n = 10 # set value for 10-fold cross validation trainindex = list # list of 10 training sets testindex = list # list of 10 testing sets matrix = list # list of 10 confusion matrix len = length(dataset) # number of attributes (including target attribute) folds = createFolds(dataset len, n) # assume that the target attribute is the last attribute in the dataset for (k in 1:n){ trainindex[[k]] = folds[[k]] for (i in 0:(n-3)){ if (k + i + 1 <= n)      trainindex[[k]] = c(trainindex[[k]], folds[[k+i+1]]) else trainindex[[k]] = c(trainindex[[k]], folds[[k+i+1-n]]) }  if (k + (n - 1) <= n)     testindex[[k]] = folds[[k+n-1]] else testindex[[k]] = folds[[k - 1]] # in each training set, split the data into train target (target class) # and train attributes (the rest of the attributes) trainValues = dataset[trainindex[[k]], 1:len-1] #ignore the target attribute trainTarget = dataset[trainindex[[k]], n]  # in each testing set, split the data into test target and test attributes (same as training set) testValues = dataset[testindex[[k]], 1:len-1]) # ignore the target  testTarget = dataset[testindex[[k]], len]   # now you can apply your desired training model the created training set.   # ... your code for training the model here ...   # my_model = ...   # predict for test values. Note: predict function of your package can be different    predictions <- predict(my_model,testValues)   # create confusion matrix based on the preditions and the test target   matrix[[k]] =  = confusionMatrix(testTarget,predictions) } cfMatrix = Reduce('+', matrix)  cat("Confusion matrix:\n") cfMatrix cat("Accuracy: "); cat(100*sum(diag(cfMatrix))/sum(cfMatrix))
 * 1) This is an example of k fold cross validation for a classification task
 * 1) using the FNN Package, works the same for any classification method.
 * 1) The accuracy function compares to vectors: truth and prediction
 * 1) Loading Data
 * 1) Experimantal Setup
 * 1) Set-up k fold cross validation got this from Michael Hahslers Intro to Data Mining Book
 * 1) test each fold
 * 1) set necessary variables
 * 1) create folds for 10 train sets and 10 test sets
 * 1) combine all confusion matrices into one matrix
 * 2) (all matrices must have the same size, otherwise errors will happen)
 * 1) print the final confusion matrix
 * 1) print out the accuracy