Sie sind auf Seite 1von 10

library(stringr)

library(dplyr)
library(tidyr)
library(ggplot2)

# read the cws event dataset


cws.event.df <- read.csv("/Users/shidey/dev/RTraining/cws_event_dataset.csv", header=TRUE,
sep=",")
head(cws.event.df)

################### ignore CWS Data not to be shared


###############################################
#1. Implement a for loop on the following by creating a simple example for each of them.
#
#a. Vector
# getting the first row our as a vector
first.row.vect <- as.vector(cws.event.df$Stage)
#
# function to return the unique STAGES
# @in_stages_char_vect : vector derived from the dataframe
uniqueStages <- function(in_stages_char_vect){
uniqueVector <- vector(mode="character")
for(stg in in_stages_char_vect){
if(stg %in% uniqueVector)
next
#
uniqueVector <- c(uniqueVector , stg)
}
return (uniqueVector)
}
#
unique.sates.vect <- uniqueStages(first.row.vect)
#
#
#b. Matrix
#
#
#c. Data frame
# changing factors in the ISO-8601 date field to "%Y-%m-%dT%H:%M:%S", tz="UTC"
#
cws.event.df$TimeStamp <- as.POSIXct(cws.event.df$X_time, "%Y-%m-%dT%H:%M:%S",
tz="UTC")
#d. List

#################### NON CWS data. Consider this one instead ####################

#1. Implement a for loop on the following by creating a simple example for each of them.

#a. Vector
#let me try radix sort in R
# this should have answers to the subsequent questions

##
# @input – a vector with numeric
#
##
radixSort<-function(input){

if(!is.vector(input)){
warning('Please enter a vector')
return(NA_real_)
}
if(!is.numeric(input)){
warning('Please enter a numeric vector')
return(NA_real_)
}

digits<-nchar(max(input))
input<-str_pad(input, digits, pad = "0")
rad<-data.frame(matrix(0, ncol = digits, nrow = length(input)))

for(i in 1:digits){
rad[,i] <- str_sub(input, i,i)
}

for(z in rev(1:ncol(rad))){
first <- which(rad[,z] == 0 )
second <- which(rad[,z] == 1 )
third <- which(rad[,z] == 2 )
fourth <- which(rad[,z] == 3 )
fifth <- which(rad[,z] == 4 )
sixth <- which(rad[,z] == 5 )
seventh <- which(rad[,z] == 6 )
eighth <- which(rad[,z] == 7 )
nineth <- which(rad[,z] == 8 )
tenth <- which(rad[,z] == 9 )

k<-c(first,second,third,fourth,fifth,sixth,seventh,eighth,nineth,tenth)
rad<-rad[k,]
}

rad<-rad %>% unite_(col = "num", from = colnames(rad), sep = "")


return(as.numeric(rad$num))
}

# test
radixSortTest <- as.vector(c(1123,100,244,76))
radixSort(radixSortTest)

#b. Matrix

# transpose a matrix
##
# @m – a matrix as input
#
##

transposeMatrix <- function(m) {


if (!is.matrix(m)) {
warning("Please enter a matrix. Return NA")
return(NA_real_)
}
tran <- matrix(1, nrow=ncol(m), ncol=nrow(m))
for (r in 1:nrow(m)) {
for (c in 1:ncol(m)) {
tran[c,r] <- m[r,c]
}
}
return(tran)
}

# test
z <- matrix(1:10, nrow=5, ncol=2)
tz <- transposeMatrix(z)

#c. Data frame


# answer to 'a' uses the dataframe for the radix

#d. List
# answer to 'a' uses a list for the index positions

#2. Implement a while loop on the following by creating a simple example for each of
them.

#a. Vector

# let me try quicksort in R. This includes example of a while(<condition>)

##
# @input – a vector with numeric
#
##

quickSort <- function(input){


if(!is.vector(input)){
warning('Please enter a vector')
return(NA_real_)
}
quickSortUtil(input , 1 , as.integer(length(input)))
}

quickSortUtil<- function(arr , lo , hi){


if(lo > hi){
return(arr)
}
if(hi - lo == 1){
if(arr[lo] > arr[hi]){
arr <- swap(arr,lo,hi)
}
return(arr)
}
#this list contains the pivot and the array rearranged around the pivot
#R goes by call by value
#please suggest a better way
vector.pivot.df <- partition(arr, lo, hi)
pivot <- as.integer(vector.pivot.df[1])
arr <- unlist(vector.pivot.df[2])
quickSortUtil(arr , lo , pivot -1)
quickSortUtil(arr , pivot +1 , hi)
}

##
# @arr – a vector with numeric
#@lo – lower bound
#@hi – upper bound
# returns – a list with consisting of the pivot and the rearranged vector
##
partition <- function(arr , lo , hi) {

pivotIndex <- lo+sample.int(hi - lo +1, 1)


pivot <- arr[pivotIndex]
arr <- swap(arr , lo , pivotIndex)
start <- lo
end <- hi+1

while (TRUE){
browser()
while (start <hi && arr[inc(start)] <= pivot) {
if(start == end)
break

#inc(start)
}

while (arr[dec(end)] > pivot) {


#browser()
if(start == end)
break

#dec(end)
}

if(end <= start){


arr <- swap(arr , lo , end)
pivot.and.vector <- list(as.integer(end) , arr)
return (pivot.and.vector)
}

arr <- swap(arr , start , end)

}# end of while

}# end of partition

##
# @arr – a vector with numeric
# @first – some index
# @second – other index
# returns – vector with the given indices swapped
##

swap <- function(arr , first , second) {


#browser()
sprintf("Calling swap with %d , %d", first, second)
tmp = arr[first]
arr[first] = arr[second]
arr[second] = tmp
return(arr)
}

## Increments an integer
# @x – an integer
#
##

inc <- function(x)


{
eval.parent(substitute(x <- x + 1))
return(x)
}

## Decrements an integer
# @x – an integer
#
##

dec <- function(x)


{
eval.parent(substitute(x <- x - 1))
return(x)
}

#b. Matrix

# transpose a matrix
transposeMatrix <- function(m) {
if (!is.matrix(m)) {
warning("Please enter a matrix. Return NA")
return(NA_real_)
}
tran <- matrix(1, nrow=ncol(m), ncol=nrow(m))
r <- 0
c <- 0
browser()
while (inc(r) <= nrow(m)) {
while (inc(c) <= ncol(m)) {
tran[c,r] <- m[r,c]

}
}
return(tran)
}

# test
z <- matrix(1:10, nrow=5, ncol=2)
tz <- transposeMatrix(z)

#c. Data frame

# populate the df in a loop


x<-runif(1)
y<-x+1
z<-x+y
#create placeholder
test.df = data.frame( x=rep(0, 10), y=rep(0,10), z=rep(0,10))
count <- 0
#populate
while(inc(count) <= nrow(test.df)){
test.df[count, ] = c(x, y, z)
}
#d. List
rnd.list <- list()
count <- 0
while(inc(count) <= 200){
rnd.list[[count]] <- runif(1)
}
hist(unlist(rnd.list))

#3. Convert Icons into a data frame with 2 columns: name and yob using the following
ideas:

# a. For loop

#b. Lapply

#c. Sapply

Icons<- c("Sachin Tendulkar:1973",

"Brian Lara:1969",

"Virat Kohli:1988",

"AB De Villiers:1984")

# a. loop

icons.df <- data.frame(ROWID= numeric(0) , ICONS= character() , DOB = character() ,


stringsAsFactors = FALSE)

for(icon in Icons){
icons.df[nrow(icons.df)+1,] <- c(nrow(icons.df)+1 , c(unlist(strsplit(icon, ":"))))
}

#Alternatively
rm(icons.df)
icons.df <- data.frame(ROWID= numeric(0) , ICONS= character() , DOB = character() ,
stringsAsFactors = FALSE)
cnt <- 0
for(icon in Icons){
token <- unlist(strsplit(icon , ":"))
icons.df <- rbind(icons.df , data.frame(ROWID = inc(cnt) , ICONS = token[1] , DOB = token[2] ))
}

# b sapply

#Using sapply method to create list of Icons with splited value


sapply.list <- sapply(Icons, strsplit, ":", simplify = "array")
#create data.frame using the list
icons.df.sapply <- data.frame(matrix(unlist(sapply.list), nrow=4, byrow=T))
colnames(icons.df.sapply) <- c("ICON" , "DOB")
icons.df.sapply

# c lapply
#create data.frame using the list
lapply.list <- lapply(Icons, strsplit, ":")
icons.df.lapply <- data.frame(matrix(unlist(lapply.list), nrow=4, byrow=T))
colnames(icons.df.lapply) <- c("ICON" , "DOB")
icons.df.lapply

#4. Use the mpg dataset within ggplot2 package and visually analyse the following questions :

data(mpg)

#a. A single quantitative variable


# let's take displ
bar.displ <- ggplot(mpg , aes(displ))
# bar plot
bar.displ + geom_bar()
#b. Relationship between any two quantitative variables
plot.scatter.displacement.highwayperf <- ggplot(mpg, aes(displ, hwy))
plot.scatter.displacement.highwayperf+ geom_point()

#Correlation exists
# Downward trend
# Bigger the engine lesser the fuel economy
# I see some outliers towards the right

#c. A single qualitative variable


# let's take manufacturer
bar.manufacturer <- ggplot(mpg , aes(manufacturer))
# bar plot
bar.manufacturer + geom_bar()
# toyota and dogge have more frequency than the rest
#d. Relationship between any two qualitative variables
# let's take manufacturer and model
ggplot(mpg, aes(model, manufacturer)) + geom_point()
# each point represents a model-manufacturer combination
# No interpretable relationship
# No insight can be obtained by comparing the positions of the points
# Didn't find it useful

#e. A quantitative variable and a qualitative variable .


ggplot(mpg, aes(drv, cty)) +
geom_boxplot() +
scale_x_discrete(limits = c("f", "r", "4"),
labels = c("Front wheel", "Rear wheel", "Four wheel"))
# Front Wheel cars deliver better fuel economy

Das könnte Ihnen auch gefallen