Assignment 2

library(stringr)
library(dplyr)
library(tidyr)
library(ggplot2)
# read the cws event dataset

cws.event.df <- read.csv("/Users/shidey/dev/RTraining/cws_event_dataset.csv", header=TRUE,
sep=",")
head(cws.event.df)
################### ignore CWS Data not to be shared

###############################################
#1. Implement a for loop on the following by creating a simple example for each of them.
#
#a. Vector
# getting the first row our as a vector
first.row.vect <- as.vector(cws.event.df$Stage)
#
# function to return the unique STAGES
# @in_stages_char_vect : vector derived from the dataframe
uniqueStages <- function(in_stages_char_vect){
uniqueVector <- vector(mode="character")
for(stg in in_stages_char_vect){
if(stg %in% uniqueVector)
next
#
uniqueVector <- c(uniqueVector , stg)
}
return (uniqueVector)
}
#
unique.sates.vect <- uniqueStages(first.row.vect)
#
#
#b. Matrix
#
#
#c. Data frame
# changing factors in the ISO-8601 date field to "%Y-%m-%dT%H:%M:%S", tz="UTC"
#
cws.event.df$TimeStamp <- as.POSIXct(cws.event.df$X_time, "%Y-%m-%dT%H:%M:%S",
tz="UTC")
#d. List
#################### NON CWS data. Consider this one instead ####################
#1. Implement a for loop on the following by creating a simple example for each of them.
#a. Vector
#let me try radix sort in R
# this should have answers to the subsequent questions
##
# @input – a vector with numeric
#
##
radixSort<-function(input){
if(!is.vector(input)){
warning('Please enter a vector')
return(NA_real_)
}
if(!is.numeric(input)){
warning('Please enter a numeric vector')
return(NA_real_)
}
digits<-nchar(max(input))
input<-str_pad(input, digits, pad = "0")
rad<-data.frame(matrix(0, ncol = digits, nrow = length(input)))
for(i in 1:digits){
rad[,i] <- str_sub(input, i,i)
}
for(z in rev(1:ncol(rad))){
first <- which(rad[,z] == 0 )
second <- which(rad[,z] == 1 )
third <- which(rad[,z] == 2 )
fourth <- which(rad[,z] == 3 )
fifth <- which(rad[,z] == 4 )
sixth <- which(rad[,z] == 5 )
seventh <- which(rad[,z] == 6 )
eighth <- which(rad[,z] == 7 )
nineth <- which(rad[,z] == 8 )
tenth <- which(rad[,z] == 9 )
k<-c(first,second,third,fourth,fifth,sixth,seventh,eighth,nineth,tenth)
rad<-rad[k,]
}
rad<-rad %>% unite_(col = "num", from = colnames(rad), sep = "")

return(as.numeric(rad$num))
}
# test
radixSortTest <- as.vector(c(1123,100,244,76))
radixSort(radixSortTest)
#b. Matrix
# transpose a matrix
##
# @m – a matrix as input
#
##
transposeMatrix <- function(m) {

if (!is.matrix(m)) {
warning("Please enter a matrix. Return NA")
return(NA_real_)
}
tran <- matrix(1, nrow=ncol(m), ncol=nrow(m))
for (r in 1:nrow(m)) {
for (c in 1:ncol(m)) {
tran[c,r] <- m[r,c]
}
}
return(tran)
}
# test
z <- matrix(1:10, nrow=5, ncol=2)
tz <- transposeMatrix(z)
#c. Data frame

# answer to 'a' uses the dataframe for the radix
#d. List
# answer to 'a' uses a list for the index positions
#2. Implement a while loop on the following by creating a simple example for each of
them.
#a. Vector
# let me try quicksort in R. This includes example of a while(<condition>)
##
# @input – a vector with numeric
#
##
quickSort <- function(input){

if(!is.vector(input)){
warning('Please enter a vector')
return(NA_real_)
}
quickSortUtil(input , 1 , as.integer(length(input)))
}
quickSortUtil<- function(arr , lo , hi){

if(lo > hi){
return(arr)
}
if(hi - lo == 1){
if(arr[lo] > arr[hi]){
arr <- swap(arr,lo,hi)
}
return(arr)
}
#this list contains the pivot and the array rearranged around the pivot
#R goes by call by value
#please suggest a better way
vector.pivot.df <- partition(arr, lo, hi)
pivot <- as.integer(vector.pivot.df[1])
arr <- unlist(vector.pivot.df[2])
quickSortUtil(arr , lo , pivot -1)
quickSortUtil(arr , pivot +1 , hi)
}
##
# @arr – a vector with numeric
#@lo – lower bound
#@hi – upper bound
# returns – a list with consisting of the pivot and the rearranged vector
##
partition <- function(arr , lo , hi) {
pivotIndex <- lo+sample.int(hi - lo +1, 1)

pivot <- arr[pivotIndex]
arr <- swap(arr , lo , pivotIndex)
start <- lo
end <- hi+1
while (TRUE){
browser()
while (start <hi && arr[inc(start)] <= pivot) {
if(start == end)
break
#inc(start)
}
while (arr[dec(end)] > pivot) {

#browser()
if(start == end)
break
#dec(end)
}
if(end <= start){

arr <- swap(arr , lo , end)
pivot.and.vector <- list(as.integer(end) , arr)
return (pivot.and.vector)
}
arr <- swap(arr , start , end)
}# end of while
}# end of partition
##
# @arr – a vector with numeric
# @first – some index
# @second – other index
# returns – vector with the given indices swapped
##
swap <- function(arr , first , second) {

#browser()
sprintf("Calling swap with %d , %d", first, second)
tmp = arr[first]
arr[first] = arr[second]
arr[second] = tmp
return(arr)
}
## Increments an integer
# @x – an integer
#
##
inc <- function(x)

{
eval.parent(substitute(x <- x + 1))
return(x)
}
## Decrements an integer
# @x – an integer
#
##
dec <- function(x)

{
eval.parent(substitute(x <- x - 1))
return(x)
}
#b. Matrix
# transpose a matrix
transposeMatrix <- function(m) {
if (!is.matrix(m)) {
warning("Please enter a matrix. Return NA")
return(NA_real_)
}
tran <- matrix(1, nrow=ncol(m), ncol=nrow(m))
r <- 0
c <- 0
browser()
while (inc(r) <= nrow(m)) {
while (inc(c) <= ncol(m)) {
tran[c,r] <- m[r,c]
}
}
return(tran)
}
# test
z <- matrix(1:10, nrow=5, ncol=2)
tz <- transposeMatrix(z)
#c. Data frame
# populate the df in a loop

x<-runif(1)
y<-x+1
z<-x+y
#create placeholder
test.df = data.frame( x=rep(0, 10), y=rep(0,10), z=rep(0,10))
count <- 0
#populate
while(inc(count) <= nrow(test.df)){
test.df[count, ] = c(x, y, z)
}
#d. List
rnd.list <- list()
count <- 0
while(inc(count) <= 200){
rnd.list[[count]] <- runif(1)
}
hist(unlist(rnd.list))
#3. Convert Icons into a data frame with 2 columns: name and yob using the following
ideas:
# a. For loop
#b. Lapply
#c. Sapply
Icons<- c("Sachin Tendulkar:1973",
"Brian Lara:1969",
"Virat Kohli:1988",
"AB De Villiers:1984")
# a. loop
icons.df <- data.frame(ROWID= numeric(0) , ICONS= character() , DOB = character() ,

stringsAsFactors = FALSE)
for(icon in Icons){
icons.df[nrow(icons.df)+1,] <- c(nrow(icons.df)+1 , c(unlist(strsplit(icon, ":"))))
}
#Alternatively
rm(icons.df)
icons.df <- data.frame(ROWID= numeric(0) , ICONS= character() , DOB = character() ,
stringsAsFactors = FALSE)
cnt <- 0
for(icon in Icons){
token <- unlist(strsplit(icon , ":"))
icons.df <- rbind(icons.df , data.frame(ROWID = inc(cnt) , ICONS = token[1] , DOB = token[2] ))
}
# b sapply
#Using sapply method to create list of Icons with splited value

sapply.list <- sapply(Icons, strsplit, ":", simplify = "array")
#create data.frame using the list
icons.df.sapply <- data.frame(matrix(unlist(sapply.list), nrow=4, byrow=T))
colnames(icons.df.sapply) <- c("ICON" , "DOB")
icons.df.sapply
# c lapply
#create data.frame using the list
lapply.list <- lapply(Icons, strsplit, ":")
icons.df.lapply <- data.frame(matrix(unlist(lapply.list), nrow=4, byrow=T))
colnames(icons.df.lapply) <- c("ICON" , "DOB")
icons.df.lapply
#4. Use the mpg dataset within ggplot2 package and visually analyse the following questions :
data(mpg)
#a. A single quantitative variable

# let's take displ
bar.displ <- ggplot(mpg , aes(displ))
# bar plot
bar.displ + geom_bar()
#b. Relationship between any two quantitative variables
plot.scatter.displacement.highwayperf <- ggplot(mpg, aes(displ, hwy))
plot.scatter.displacement.highwayperf+ geom_point()
#Correlation exists
# Downward trend
# Bigger the engine lesser the fuel economy
# I see some outliers towards the right
#c. A single qualitative variable

# let's take manufacturer
bar.manufacturer <- ggplot(mpg , aes(manufacturer))
# bar plot
bar.manufacturer + geom_bar()
# toyota and dogge have more frequency than the rest
#d. Relationship between any two qualitative variables
# let's take manufacturer and model
ggplot(mpg, aes(model, manufacturer)) + geom_point()
# each point represents a model-manufacturer combination
# No interpretable relationship
# No insight can be obtained by comparing the positions of the points
# Didn't find it useful
#e. A quantitative variable and a qualitative variable .

ggplot(mpg, aes(drv, cty)) +
geom_boxplot() +
scale_x_discrete(limits = c("f", "r", "4"),
labels = c("Front wheel", "Rear wheel", "Four wheel"))
# Front Wheel cars deliver better fuel economy

Assignment 2

Hochgeladen von

Dokumentinformationen

Originaltitel

Copyright

Verfügbare Formate

Dieses Dokument teilen

Dokument teilen oder einbetten

Freigabeoptionen

Stufen Sie dieses Dokument als nützlich ein?

Sind diese Inhalte unangemessen?

Copyright:

Verfügbare Formate

Assignment 2

Hochgeladen von

Copyright:

Verfügbare Formate

library(stringr)

# read the cws event dataset

################### ignore CWS Data not to be shared

#################### NON CWS data. Consider this one instead ####################

rad<-rad %>% unite_(col = "num", from = colnames(rad), sep = "")

transposeMatrix <- function(m) {

#c. Data frame

# let me try quicksort in R. This includes example of a while(<condition>)

quickSort <- function(input){

quickSortUtil<- function(arr , lo , hi){

pivotIndex <- lo+sample.int(hi - lo +1, 1)

while (arr[dec(end)] > pivot) {

if(end <= start){

arr <- swap(arr , start , end)

swap <- function(arr , first , second) {

inc <- function(x)

dec <- function(x)

#c. Data frame

# populate the df in a loop

Icons<- c("Sachin Tendulkar:1973",

icons.df <- data.frame(ROWID= numeric(0) , ICONS= character() , DOB = character() ,

#Using sapply method to create list of Icons with splited value

#a. A single quantitative variable

#c. A single qualitative variable

#e. A quantitative variable and a qualitative variable .

Das könnte Ihnen auch gefallen