Sie sind auf Seite 1von 4

COVID-19 forecasting including the effects & benefits of

Lock-down on New Cases and Fatalities using Machine Learning.

setwd("C:/Users/Shivendra/Desktop/COVID-19/Week-2")

getwd()

covid.train=read.csv("train.csv",header = TRUE)

covid.test=read.csv("test.csv",header = TRUE)

str(covid.train)

summary(covid.train)

covid.train$Date=as.character(covid.train$Date)

covid.train$Date=as.Date(covid.train$Date, format = "%Y-%m-%d")

covid.test$Date=as.character(covid.test$Date)

covid.test$Date=as.Date(covid.test$Date, format = "%Y-%m-%d")

library(dplyr)

covid.train$Day <- as.integer(covid.train$Date - min(covid.train$Date))

covid.test$Day <- as.integer(covid.test$Date - min(covid.train$Date))

covid.train2= covid.train %>% filter(Date < min(covid.test$Date))

str(covid.test)

summary(covid.train$Date)

library(DataExplorer)

library(corrplot)

library(rpart)

library(caret)
library(rpart.plot)

plot_missing(covid.train)

boxplot(covid.train)

library(corrplot)

cormatrix=cor(covid.train[,-c(1,2,3,4)])

corrplot(cormatrix,type = "lower",method = "number")

#CART MODEL

attach(covid.train)

rpart.ctrl=rpart.control(minsplit = 50, minbucket = 5, cp=0,xval=10)

cart_model=rpart(ConfirmedCases~Day+Country_Region+Province_State,data = covid.train,control =
rpart.ctrl)

cart_model2=rpart(Fatalities~Day+Country_Region+Province_State,data = covid.train,control =
rpart.ctrl)

importance <- varImp(cart_model, scale=FALSE)

# summarize importance

print(importance) # Fatalities, Month, Id

# plot importance

plot(importance)

library(forcats)
covid.train$Preds=round(predict(cart_model,newdata = covid.train),0)

table(covid.train$pred)

# Evaluation: root-mean square logarithmic error on training data

RMSLE_1 <- sqrt(mean((log(covid.train$Preds + 1) -

log(covid.train$ConfirmedCases + 1))^2))

RMSLE_1

covid.test$ConfirmedCases=predict(cart_model,newdata = covid.test)

covid.test$Fatalities=predict(cart_model2,newdata = covid.test)

submission <- covid.test %>% select(ForecastId,ConfirmedCases,Fatalities)

write.csv(submission, file = "submission.csv", row.names = FALSE)

attach(covid.train2)

plot(Country_Region,sum(ConfirmedCases))

#SVM MODEL

attach(covid.train)

library(e1071)

svmfit <- svm(ConfirmedCases~Country_Region+Province_State+Day, data = covid.train, kernel =


"linear", scale = FALSE)

svmfit2 <- svm(Fatalities~Country_Region+Province_State+Day, data = covid.train, kernel = "linear",


scale = FALSE)

covid.train$Preds=round(predict(svmfit,newdata = covid.train),0)
covid.test$ConfirmedCases=predict(svmfit,newdata = covid.test)

covid.test$Fatalities=predict(svmfit2,newdata = covid.test)

submission <- covid.test %>% select(ForecastId,ConfirmedCases,Fatalities)

write.csv(submission, file = "submission.csv", row.names = FALSE)

#Random Forest Modeling

library(fastDummies)

results <- fastDummies::dummy_cols(covid.train)

result2=results[,-c(1,2,3,4)]

names(result2)<-gsub("\\s","_",names(result2))

colnames(result2)[165] <- "Country_Region_Congo_Brazzaville"

colnames(result2)[166] <- "Country_Region_Congo_Kinshasa"

colnames(result2)[168] <- "Country_Region_Cote_dIvoire"

colnames(result2)[198] <- "Country_Region_Guinea_Bissau"

colnames(result2)[217] <- "Country_Region_Korea_South"

colnames(result2)[284] <- "Country_Region_Taiwan"

colnames(result2)[287] <- "Country_Region_Timor_Leste"

library(randomForest)

rnd =
randomForest(result2$ConfirmedCases~.,data=result2,ntree=301,mtry=6,nodesize=10,importance=TRU
E)

Das könnte Ihnen auch gefallen