Beruflich Dokumente
Kultur Dokumente
# use r square
rsq.rpart(m.cart)
# Get the difference between the predicted and observed values of rating
diff<-dv$rating-m.val
# Write the score of both the models along with observed data for
comparison
output<-data.frame(list(m.val,p.rf,dv$rating))
colnames(output)<-c("PredictedCART","PredictedRF","Observed")
write.csv(output,"score.csv",row.names=F)
# Pruning the tree based on the minimum of cross validation error 'xerror'
# This is commented here as there will be only the root node if the pruning
is done in this example
#pm<-prune(m,cp=m$cptable[which.min(m$cptable[,"xerror"]),"CP"])
# As the ROC Curve shows that high TPR can be achieved at low
# cutoff probabilities, draw confusion matrix accordingly
predcton<-ifelse(fit.m[2]>0.2,1,0)
print("At cutoff=0.2")
print(table(dv$PersonalLoan,predcton))
plotLift(predcton,dv$PersonalLoan)
plotLift(predcton,dv$PersonalLoan,cumulative = F)
# As we are using only one predictor, we can plot and see the relation
between the observed and predicted points
plot(PersonalLoan~Income,data=dt)
lines(dt$Income,m.lr$fitted.values,type="p",col="blue")
# As the ROC Curve shows that high TPR can be achieved at low
# cutoff probabilities, draw confusion matrix accordingly
predcton<-ifelse(m.vl>0.15,1,0)
print("At cutoff=0.15")
print(table(dv$PersonalLoan,predcton))
plotLift(predcton,dv$PersonalLoan)
plotLift(predcton,dv$PersonalLoan,cumulative = F)
# Read dataset
wine=read.csv(choose.files())
names(wine)
library(class)
di<-read.table("iris.csv",header=T,sep=",")
pred<-knn(trn[,-5],val[,-5],trn[,5],k=5)
print("k=5")
print(table(pred,val[,5]))
library(class)
di<-read.table("iris.csv",header=T,sep=",")
pred<-knn(trn[,-5],val[,-5],trn[,5],k=5)
print("k=5")
print(table(pred,val[,5]))
# Open Dataset
di<-read.csv("iris.csv",header=T)
head(di)
# Call Library
library(caret)
# Partition dataset
rec=createDataPartition(y=di$Species,p=0.7,list=F)
dit=di[rec,]
div=di[-rec,]
di<-read.table("iris.csv",header=T,sep=",")
# Dotplot PC1
library(lattice)
load = wine.pc$rotation
print(paste("The loadings are as follows: "))
print(load)
# Dotplot PC2
ordered.load2=load[order(load[,2]),2]
dotplot(ordered.load2,main="Loadings Plot of PC2",xlab="Variable
Loadings",col="red",cex="1.5")
# Draw a biplot
biplot(wine.pc,cex=c(1,0.7))
# load dataframe
cer=read.csv(choose.files(),header=T)
# remove the first column - Cereal - as that is just the name and may not
be important
cer=cer[,-1]
d<-read.csv("regtest.csv")
dt<-d[sam,]
dv<-d[-sam,]
# Develop the regression model based on the training set
m.lm<-lm(endurance~age, data=dt)
# Identify the data points which have a cooks distance greater than 0.04
tp<-seq(1:length(cd))
ip<-tp[cd>0.04]
iv<-cd[ip]
text(ip,iv-(max(cd)*0.05),names(iv),col="blue",cex=0.7)
# Draw scatterplot
ggscatmat(di,columns=1:4)
# Draw scatterplot
ggscatmat(di,columns=1:4)
# load library
library(caret)
# fullRank True will create n-1 dymmys, while False will create n dummies
dnnew=data.frame(predict(dmy,newdata=di))
library(class)
di<-read.table("iris.csv",header=T,sep=",")
pred<-knn(trn[,-5],val[,-5],trn[,5],k=5)
print("k=5")
print(table(pred,val[,5]))
# Read dataset
wine=read.csv(choose.files())
names(wine)