Sie sind auf Seite 1von 4

###Introduction to R

#practicising common functions

conc<-as.data.frame(Concrete_Data)
##Observation/variable selection
conc[1:20,]
conc[,9]
conc[1:20,9]

conc_1 <- conc[c(10:21),c(2,6:7)]


conc_2 <- conc[-c(1:100,200:530),-c(1,3:5)] #1030-100-331

#subsetting rows for a given criteria-comp strength and columns to be returned

conc_3 <-subset(conc, conc$`Cement (component 1)(kg in a m^3 mixture)`>=500,


select = c("Age (day)"))

names(conc)[1:9]<-c()
#renaming columns/variables
concrete<-Concrete_Data
names(concrete)[1:9] <- c("cement_comp","slag_comp","flyash_comp",
"water_ratio","plastic_comp","cagg_comp",
"fagg_comp","duration","comp_strength")
##or use attach

conc_3_2<-subset(concrete, concrete$cement_comp>=500,
select = c("duration","comp_strength"))

#Based on criteria (multiple values):


conc_4 <- subset(concrete,comp_strength >50 & cement_comp<= 500,
select=c("duration","fagg_comp"))

conc_5 <- subset(concrete,comp_strength <=50 & cement_comp<= 500,


select=c("duration","fagg_comp"))

#811+189=1000
which(concrete$cement_comp>500)
length(which(concrete$cement_comp>500)) #30

##selecting all columns through


conc_6 <- subset(concrete, cement_comp>=500 & duration > 25,
select=water_ratio:fagg_comp)

# take a random sample of size 50


# sample without replacement do without (setseed(123))

set.seed(123)
mysample <- concrete[sample(1:nrow(concrete), 50,
replace=FALSE),]
set.seed(123)
mysample_index <- sample(1:nrow(concrete), 50,
replace=FALSE)

####Computing cloumn/rwo sums; means etc.

colSums(conc_6)
colMeans(conc_6)##verify
rowSums(conc_1)
rowMeans(conc_1)##verify

##ADding rows/columns
#the variables should be same for rbind while same #observations for cbind

conc_6_M<-colMeans(conc_6)
conc_6<-rbind(conc_6,conc_6_M)

conc_comb<-rbind(conc_4,conc_5)
##Descriptives in R
##or use dropdown from import
library(readr)
SENSEX <- read_csv("D:/BSc/SENSEX.csv")

##preprocessing
SENSEX<-SENSEX[,1:5]
SENSEX<-SENSEX[-133:-134,]

library(xlsx)
summary(SENSEX$Close)

# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's


##8892 16864 19343 20665 26369 33602 2

install.packages("Hmisc")
library(Hmisc)
describe(SENSEX$Close)

install.packages("pastecs")
library(pastecs)
stat.desc(SENSEX[,5])

install.packages("psych")
library(psych)
describe(SENSEX[,5])

##download data from website


install.packages("tseries")
library(tseries)
gvkpil<-get.hist.quote(instrument = "GVKPIL.NS", start="2017-04-01",
end="2018-03-31",
quote = c("Open", "High", "Low", "Close"),
provider = "yahoo", compression = "d")##d=daily, m=monthly

tatasteel <- get.hist.quote(instrument = "TATASTEEL.NS", start="2017-04-01",


end="2018-03-31",
quote = c("Open", "High", "Low", "Close"),
provider = "yahoo", compression = "d")

TSL<-as.data.frame(tatasteel)
GVK<-as.data.frame(gvkpil)
write.csv(GVK,"D:/Analytics Consulting using ML/gvksp.csv")

ticker<- c("TATASTEEL.NS", "GVKPIL.NS", "BANKINDIA.NS", "TCS.NS")

start <- "2017-04-01"


end <- "2018-03-31"
stock_comb<- list(); # empty list to fill in the data

for(i in 1:length(ticker))
{
stock_comb[[i]] <- get.hist.quote(instrument = ticker[i], start=start, end=end,
quote = "Close", provider = "yahoo",
compression = "d")
}
##look for observation no.=247 say (against ??? trading days)
stock_prices<-matrix(unlist(stock_comb),247,4)
stock_comb[1] ##replace 1 with 2:4

##Export data from R

write.csv(stock_prices,"D:/Term I-XIMB_Biz Stat/stock_data.csv")

##Plotting in R
#simple histogram
hist(TSL[,1])
hist(GVK[,1])

# Colored Histogram with Different Number of Bins


hist(TSL$Close, breaks=4, col="red")
hist(GVK$Close, breaks=6, col="yellow")

# Add a Normal Curve

x <- GVK$Open

h<-hist(x, breaks=5, col="red", xlab="Stock Price",


main="Histogram with Normal Curve")
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)

##kernel plots
plot(density(x))
plot(density(GVK$Close))
##boxplot

boxplot(TSL)
boxplot(GVK$Close)

##BOXPLOTS FOR multiple vaariables


boxplot.matrix(stock_prices,use.cols = TRUE)

library(e1071)

skewness(stock_prices[,1])##= -0.4219908
skewness(stock_prices[,2])#=0.2083
skewness(stock_prices[,3])#=0.00599
skewness(stock_prices[,4])#=0.8538869

colMeans(stock_prices)##595.25129 12.85344 153.78340 2610.22835

##compare skewness
skew_1<-(stock_prices[,1]-595.25129)^3
skew_11<-sum(skew_1)/247 ## = -351452.152699
skew_1f<-skew_11/(var(stock_prices[,1])^1.5)

##compare skewness
skew_2<-sum((stock_prices[,2]-12.85344)^3)/nrow(stock_prices)##=28.383
skew_3<-sum((stock_prices[,3]-153.7834)^3)/247##=99.01688
skew_4<-mean((stock_prices[,4]-2610.22835)^3)##=9366989.915

skew_2/var(stock_prices[,2])^1.5 ##=0.2083754
skew_3/var(stock_prices[,3])^1.5 ##=0.005992238
skew_4/var(stock_prices[,4])^1.5 ##=0.8538869

Das könnte Ihnen auch gefallen