Beruflich Dokumente
Kultur Dokumente
info()[1:5]
sessionInfo()
library(stringr)
library(knitr)
userdir <- getwd()
set.seed(123)
cm<-list()
x<-tal_eval[,1:(nv-1)]
y<-tal_eval[,nv]
fmla<-paste(colnames(tal_eval)[1:(nv-1)],collapse="+")
fmla<-paste0(colnames(tal_eval)[nv],"~",fmla)
fmla<-as.formula(fmla)
#Multinominal
library(nnet)
library(caret)
model<-multinom(fmla, data = tal_eval, maxit = 500, trace=FALSE)
prob<-predict(model,x,type="probs")
pred<-apply(prob,1,which.max)
pred[which(pred=="1")]<-levels(y)[1]
pred[which(pred=="2")]<-levels(y)[2]
pred[which(pred=="3")]<-levels(y)[3]
pred[which(pred=="4")]<-levels(y)[4]
pred<-as.factor(pred)
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
mtab
cm[[1]]<-c("Multinomial","MULTINOM",confusionMatrix(mtab))
cm[[1]]$table
cm[[1]]$overall[1]
#Logistic Regression
library(VGAM)
model<-vglm(fmla, family = "multinomial", data = tal_eval, maxit = 100)
prob<-predict(model,x,type="response")
pred<-apply(prob,1,which.max)
pred[which(pred=="1")]<-levels(y)[1]
pred[which(pred=="2")]<-levels(y)[2]
pred[which(pred=="3")]<-levels(y)[3]
pred[which(pred=="4")]<-levels(y)[4]
pred<-as.factor(pred)
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[2]]<-c("Logistic Regression","GLM",confusionMatrix(mtab))
cm[[2]]$table
cm[[2]]$overall[1]
cm[[3]]$overall[1]
#Non-Linear Classification
library(mda)
model<-mda(fmla,data=tal_eval)
pred<-predict(model,x)
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[4]]<-c("Mixture Discriminant Analysis","MDA",confusionMatrix(mtab))
cm[[4]]$table
cm[[4]]$overall[1]
cm[[5]]$overall[1]
#Neural Netwrok
library(nnet)
library(devtools)
model<-nnet(fmla,data=tal_eval,size = 4, decay = 0.0001, maxit = 700, trace = FALSE)
#import the function from Github
source_url('https://gist.githubusercontent.com/Peque/41a9e20d6687f2f3108d/raw/85e14f3a292e126
f1454864427e3a189c2fe33f3/nnet_plot_update.r')
plot.nnet(model, alpha.val = 0.5, cex= 0.7, circle.col = list('lightblue', 'white'), bord.col = 'black')
pred<-predict(model,x,type="class")
pred<-as.factor(pred)
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[6]]<-c("Neural Network","NNET",confusionMatrix(mtab))
cm[[6]]$table
cm[[6]]$overall[1]
cm[[7]]$overall[1]
cm[[8]]$overall[1]
#k-Nearest Neighbors
library(caret)
model<-knn3(fmla,data=tal_eval,k=nlev+1)
pred<-predict(model,x,type="class")
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[9]]<-c("k-Nearest Neighbors","KNN",confusionMatrix(mtab))
cm[[9]]$table
cm[[9]]$overall[1]
cm[[10]]$overall[1]
cm[[11]]$overall[1]
#3.C2 OneR
library(RWeka)
model<-OneR(fmla,data=tal_eval)
pred<-predict(model,x,type="class")
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[12]]<-c("One R","ONE-R",confusionMatrix(mtab))
cm[[12]]$table
cm[[12]]$overall[1]
#3.C3 C4.5
library(RWeka)
model<-J48(fmla,data=tal_eval)
pred<-predict(model,x)
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[13]]<-c("C4.5","C45",confusionMatrix(mtab))
cm[[13]]$table
cm[[12]]$overall[1]
#3.C4 PART
library(RWeka)
model<-PART(fmla,data=tal_eval)
pred<-predict(model,x)
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[14]]<-c("PART","PART",confusionMatrix(mtab))
cm[[14]]$table
cm[[14]]$overall[1]
cm[[16]]$overall[1]
cm[[17]]$overall[1]
#3.C9 JRip
library(RWeka)
model<-JRip(fmla,data=tal_eval)
pred<-predict(model,x)
l<-union(pred,y)
mtab<-table(factor(pred,l),factor(y,l))
cm[[19]]<-c("JRip","JRIP",confusionMatrix(mtab))
cm[[19]]$table
cm[[19]]$overall[1]
mbm1<-microbenchmark(
m1<-multinom(fmla, data = tal_eval, maxit = 500, trace=FALSE),
m2<-vglm(fmla, family = "multinomial", data = tal_eval, maxit = 100),
m3<-lda(fmla,data=tal_eval),
m4<-mda(fmla,data=tal_eval),
m5<-rda(fmla,data=tal_eval,gamma = 0.05,lambda = 0.01))
mbm2<-microbenchmark(
m6<-nnet(fmla,data=tal_eval,size = 4, decay = 0.0001, maxit = 700,trace=FALSE),
m7<-fda(fmla,data=tal_eval),
m8<-ksvm(fmla,data=tal_eval),
m9<-knn3(fmla,data=tal_eval,k=nlev+1),
m10<-naiveBayes(fmla,data=tal_eval,k=nlev+1))
mbm3<-microbenchmark(
m11<-rpart(fmla,data=tal_eval),
m12<-OneR(fmla,data=tal_eval),
m13<-J48(fmla,data=tal_eval),
m14<-PART(fmla,data=tal_eval),
m15<-bagging(fmla,data=tal_eval))
m17<-
gbm(fmla,data=tal_eval,n.trees=5000,interaction.depth=nlev,shrinkage=0.001,bag.fraction=0.8,distr
ibution="multinomial",verbose=FALSE,n.cores=4),
mbm4<-microbenchmark(
m15<-bagging(fmla,data=tal_eval),
m16<-randomForest(fmla,data=tal_eval),
m18<-C5.0(fmla,data=tal_eval,trials=10),
m19<-JRip(fmla,data=tal_eval))
m17<-
gbm(fmla,data=tal_eval,n.trees=5000,interaction.depth=nlev,shrinkage=0.001,bag.fraction=0.8,distr
ibution="multinomial",verbose=FALSE,n.cores=4)
library(dplyr)
models<-length(cm)
mbm$expr<-rep(sapply(1:models, function(i) {cm[[i]][[2]]}),5)
mbm<-aggregate(x=mbm$time,by=list(Model=mbm$expr),FUN=mean)
mbm$x<-mbm$x/min(mbm$x)
results<-sapply (1:models, function(i) {c(cm[[i]][[1]],cm[[i]][[2]],mbm$x[i],cm[[i]]$overall[1:6])})
row.names(results)<-c("Description","Model","Model_Time_X",names(cm[[1]]$overall[1:6]))
results<-as.data.frame(t(results))
results[,3:9]<-sapply(3:9,function(i){results[,i]<-as.numeric(levels(results[,i])[results[,i]])})
results<-results[,-(8:9)]
results<-arrange(results,desc(Accuracy))
results
library(ggplot2)
library(gridExtra)
res<-
data.frame(Model=results$Model,Accuracy=results$Accuracy,Speed=1/results$Model_Time_X,Ov
erall=results$Accuracy/results$Model_Time_X)
library(RColorBrewer)
myPalette <- colorRampPalette(rev(brewer.pal(12, "Set3")))
sc <- scale_colour_gradientn(colours = myPalette(256), limits=c(0.8, 1))
kable(res)
g<-ggplot(res,aes(x=reorder(Model,-Accuracy),y=Accuracy,fill=Model))+
geom_bar(stat="identity")+
coord_polar(theta="x",direction=1)+
labs(x="Machine Learning Model",y="Prediction Accuracy")+
theme(legend.position="bottom",legend.box="horizontal")+
ggtitle('Car Evaluation Dataset Accuracy Performance')
g
write.csv(tal_eval,file="mydata10.csv")