Locally Weighted Regression

This is my code for locally weighted regression using matrix multiplication. It is really messy but hopefully you could get some inspirations from it. gaussian_kernel = function (x, x0, c, a){ diff = x - x0 dot_prod = t(diff)%*%diff return (a* exp(dot_prod/ (-2 *c**2))) } norm_vec <- function(x) sqrt(sum(x^2)) cubic_kernel=function(x,x0,thresh, p){ d = (x - x0)/p h = (1 - norm_vec(d)^3)^3*( norm_vec(d) < thresh) return(h) } get_weights = function(datapoint, training_inputs, c, a){ x = as.matrix(training_inputs) n_rows = NROW(x) weights = matrix(0, ncol=n_rows, nrow=n_rows) for (i in 1:n_rows){ #print(datapoint) #print(x[i,]) weights[i,i] = gaussian_kernel(datapoint, x[i,],c,a) #weights[i,i] = cubic_kernel(datapoint, x[i,],c,a) } return(weights) } library('MASS') lwr_predict = function(datapoint,training_inputs, training_outputs, c, a){ training_inputs = data.matrix(training_inputs) training_outputs = data.matrix(training_outputs) weights = get_weights(datapoint,training_inputs, c, a)  x = as.matrix(training_inputs) y = as.matrix(training_outputs) xt = t(x)%*% (weights %*% x) betas = ginv(xt) %*% (t(x) %*% (weights %*% y)) return (datapoint %*% betas) } lwr = function(datapoints,training_inputs, training_outputs, c, a){ n_datapoints = NROW(datapoints) predicts = vector(mode="numeric") for (i in 1: n_datapoints){ predicts[i] = lwr_predict(datapoints[i,], training_inputs, training_outputs, c, a)  } return (predicts) } library('RWeka') allset = read.arff('preprocessed-ALLdataset.arff') subset = read.arff('preprocessed-subset.arff') colnames(allset) = make.names(names(allset)) colnames(subset) = make.names(names(subset)) cur_data = allset cur_data = cur_data[,-c(2,3,13)] seed.val <- 1992 #random seed kfolds <- 10 # Number of folds for X-Validation set.seed(seed.val) index <- 1:nrow(cur_data) index <- sample(index) ### shuffle index fold <- rep(1:kfolds, each=nrow(cur_data)/kfolds)[1:nrow(cur_data)] folds <- split(index, fold) ### create list with indices for each fold mean_acc = vector(mode="numeric") mean_cor = vector(mode="numeric") count = 0 for (k in c(1:10,30,60,90)){ count = count + 1 accs <- vector(mode="numeric") cors <- vector(mode="numeric") for(i in 1:length(folds)) { #cat("Calculating Fold: ",i,"\n") train_data = cur_data[-folds i,] trainX = data.matrix(train_data[,-14]) trainY = data.matrix(train_data[,14]) test_data = cur_data[folds i,] testX = data.matrix(test_data[,-14]) testY = data.matrix(test_data[,14]) #testing on Kth fold system.time(predY <- lwr(testX,trainX,trainY,k,1)) #actualdata <- cur_data[folds i,17] cors[i] <- cor(predY,testY) accs[i] <- norm_vec(predY-testY) } mean_acc[count]=mean(accs,na.rm=TRUE) mean_cor[count]=mean(cors,na.rm=TRUE) } mean_acc mean_cor cu_k = c(1:10,30,60,90) cu_acc = mean_acc cu_cor = mean_cor g_k = cu_k gu_acc= mean_acc gu_cor = mean_cor layout(matrix(c(1,2,3,4),2,2)) s = lm(trainY~trainX) plot(s)