BAR Codes-Module 1-Common Functions-Intro

###Introduction to R
#practicising common functions
conc<-as.data.frame(Concrete_Data)
##Observation/variable selection
conc[1:20,]
conc[,9]
conc[1:20,9]
conc_1 <- conc[c(10:21),c(2,6:7)]

conc_2 <- conc[-c(1:100,200:530),-c(1,3:5)] #1030-100-331
#subsetting rows for a given criteria-comp strength and columns to be returned
conc_3 <-subset(conc, conc$`Cement (component 1)(kg in a m^3 mixture)`>=500,

select = c("Age (day)"))
names(conc)[1:9]<-c()
#renaming columns/variables
concrete<-Concrete_Data
names(concrete)[1:9] <- c("cement_comp","slag_comp","flyash_comp",
"water_ratio","plastic_comp","cagg_comp",
"fagg_comp","duration","comp_strength")
##or use attach
conc_3_2<-subset(concrete, concrete$cement_comp>=500,
select = c("duration","comp_strength"))
#Based on criteria (multiple values):

conc_4 <- subset(concrete,comp_strength >50 & cement_comp<= 500,
select=c("duration","fagg_comp"))
conc_5 <- subset(concrete,comp_strength <=50 & cement_comp<= 500,

select=c("duration","fagg_comp"))
#811+189=1000
which(concrete$cement_comp>500)
length(which(concrete$cement_comp>500)) #30
##selecting all columns through

conc_6 <- subset(concrete, cement_comp>=500 & duration > 25,
select=water_ratio:fagg_comp)
# take a random sample of size 50

# sample without replacement do without (setseed(123))
set.seed(123)
mysample <- concrete[sample(1:nrow(concrete), 50,
replace=FALSE),]
set.seed(123)
mysample_index <- sample(1:nrow(concrete), 50,
replace=FALSE)
####Computing cloumn/rwo sums; means etc.
colSums(conc_6)
colMeans(conc_6)##verify
rowSums(conc_1)
rowMeans(conc_1)##verify
##ADding rows/columns
#the variables should be same for rbind while same #observations for cbind
conc_6_M<-colMeans(conc_6)
conc_6<-rbind(conc_6,conc_6_M)
conc_comb<-rbind(conc_4,conc_5)
##Descriptives in R
##or use dropdown from import
library(readr)
SENSEX <- read_csv("D:/BSc/SENSEX.csv")
##preprocessing
SENSEX<-SENSEX[,1:5]
SENSEX<-SENSEX[-133:-134,]
library(xlsx)
summary(SENSEX$Close)
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's

##8892 16864 19343 20665 26369 33602 2
install.packages("Hmisc")
library(Hmisc)
describe(SENSEX$Close)
install.packages("pastecs")
library(pastecs)
stat.desc(SENSEX[,5])
install.packages("psych")
library(psych)
describe(SENSEX[,5])
##download data from website

install.packages("tseries")
library(tseries)
gvkpil<-get.hist.quote(instrument = "GVKPIL.NS", start="2017-04-01",
end="2018-03-31",
quote = c("Open", "High", "Low", "Close"),
provider = "yahoo", compression = "d")##d=daily, m=monthly
tatasteel <- get.hist.quote(instrument = "TATASTEEL.NS", start="2017-04-01",

end="2018-03-31",
quote = c("Open", "High", "Low", "Close"),
provider = "yahoo", compression = "d")
TSL<-as.data.frame(tatasteel)
GVK<-as.data.frame(gvkpil)
write.csv(GVK,"D:/Analytics Consulting using ML/gvksp.csv")
ticker<- c("TATASTEEL.NS", "GVKPIL.NS", "BANKINDIA.NS", "TCS.NS")
start <- "2017-04-01"

end <- "2018-03-31"
stock_comb<- list(); # empty list to fill in the data
for(i in 1:length(ticker))
{
stock_comb[[i]] <- get.hist.quote(instrument = ticker[i], start=start, end=end,
quote = "Close", provider = "yahoo",
compression = "d")
}
##look for observation no.=247 say (against ??? trading days)
stock_prices<-matrix(unlist(stock_comb),247,4)
stock_comb[1] ##replace 1 with 2:4
##Export data from R
write.csv(stock_prices,"D:/Term I-XIMB_Biz Stat/stock_data.csv")
##Plotting in R
#simple histogram
hist(TSL[,1])
hist(GVK[,1])
# Colored Histogram with Different Number of Bins

hist(TSL$Close, breaks=4, col="red")
hist(GVK$Close, breaks=6, col="yellow")
# Add a Normal Curve
x <- GVK$Open
h<-hist(x, breaks=5, col="red", xlab="Stock Price",

main="Histogram with Normal Curve")
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)
##kernel plots
plot(density(x))
plot(density(GVK$Close))
##boxplot
boxplot(TSL)
boxplot(GVK$Close)
##BOXPLOTS FOR multiple vaariables

boxplot.matrix(stock_prices,use.cols = TRUE)
library(e1071)
skewness(stock_prices[,1])##= -0.4219908
skewness(stock_prices[,2])#=0.2083
colMeans(stock_prices)##595.25129 12.85344 153.78340 2610.22835
##compare skewness
skew_1<-(stock_prices[,1]-595.25129)^3
skew_11<-sum(skew_1)/247 ## = -351452.152699
skew_1f<-skew_11/(var(stock_prices[,1])^1.5)
##compare skewness
skew_2<-sum((stock_prices[,2]-12.85344)^3)/nrow(stock_prices)##=28.383
skew_3<-sum((stock_prices[,3]-153.7834)^3)/247##=99.01688
skew_4<-mean((stock_prices[,4]-2610.22835)^3)##=9366989.915
skew_2/var(stock_prices[,2])^1.5 ##=0.2083754

BAR Codes-Module 1-Common Functions-Intro

Hochgeladen von

Dokumentinformationen

Originaltitel

Copyright

Verfügbare Formate

Dieses Dokument teilen

Dokument teilen oder einbetten

Freigabeoptionen

Stufen Sie dieses Dokument als nützlich ein?

Sind diese Inhalte unangemessen?

Copyright:

Verfügbare Formate

BAR Codes-Module 1-Common Functions-Intro

Hochgeladen von

Copyright:

Verfügbare Formate

###Introduction to R

#practicising common functions

conc_1 <- conc[c(10:21),c(2,6:7)]

#subsetting rows for a given criteria-comp strength and columns to be returned

conc_3 <-subset(conc, conc$`Cement (component 1)(kg in a m^3 mixture)`>=500,

#Based on criteria (multiple values):

conc_5 <- subset(concrete,comp_strength <=50 & cement_comp<= 500,

##selecting all columns through

# take a random sample of size 50

####Computing cloumn/rwo sums; means etc.

# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's

##download data from website

tatasteel <- get.hist.quote(instrument = "TATASTEEL.NS", start="2017-04-01",

ticker<- c("TATASTEEL.NS", "GVKPIL.NS", "BANKINDIA.NS", "TCS.NS")

start <- "2017-04-01"

##Export data from R

write.csv(stock_prices,"D:/Term I-XIMB_Biz Stat/stock_data.csv")

# Colored Histogram with Different Number of Bins

# Add a Normal Curve

h<-hist(x, breaks=5, col="red", xlab="Stock Price",

##BOXPLOTS FOR multiple vaariables

colMeans(stock_prices)##595.25129 12.85344 153.78340 2610.22835

Das könnte Ihnen auch gefallen