Beruflich Dokumente
Kultur Dokumente
du <- c()
x <- c(seq(from = .01, to = 1, by = .01))
for (y in x) {
du <append(du, (length(which(
margin.volume.clean[c(1:(y * nrow(margin.volume.clean))), 6] == 1
))) / nrow(um), after = length(du))
}
library(Hmisc)
library(reshape2)
setwd("C:\\Users\\ma\\Documents\\Columbia Data")
original.statistics <- spss.get("zip-bp01.sav")
demographics <read.csv("Demographic_Statistics_By_Zip_Code.csv", header = TRUE)
merged.stats <merge(demographics,
original.statistics,
by.y = "ZIP",
by.x = "JURISDICTION.NAME")
melted.merge <melt(
merged.stats,
id = c("SECTOR", "JURISDICTION.NAME"),
measure = c("PERCENT.CONTRACTS")
)
melted.merge <transform(melted.merge, Sector.ID = as.numeric(melted.merge$SECTOR))
casted.merge <- dcast(melted.merge, JURISDICTION.NAME ~ SECTOR)
sum.industries <- c()
for (n in 1:nrow(casted.merge)) {
sum.industries <- append(sum.industries, sum(casted.merge[n,c(2:22)]), after =
length(sum.industries))
}
casted.merge <- transform(casted.merge, Total.Industry = sum.industries)
casted.merge <- merge(demographics, casted.merge, by.x = JURISDICTION.NAME)
casted.merge.mod <- casted.merge
for( n in 2:22){
casted.merge.mod[,n] <- casted.merge.mod[,n]/sum.industries
}
casted.merge.mod <dnu <- c()
for (y in x) {
dnu <append(dnu, (length(which(
margin.volume.clean[c(1:(y * nrow(margin.volume.clean))), 6] == 0
))) / nrow(num), after = length(dnu))
}
distribution.percentile.total.margin <data.frame(
Cumulative.Performance.Percentile <- c(seq(.01, to = 1, by = .01)),
Percentage.Managed.Products <- du ,
Percentage.Non.Managed.Products <- dnu
)
duv <- c()
duum <- c()
oduct.contracts[a, 2]
## for nonmanaged
##distribution.percentile.product.contracts[a, 3] - distribution.percentile.prod
uct.contracts[a, 3]
##Convert the two managed product and nonmanaged product data frame column one t
o chracter from factors
um <- transform(um, BDM = as.character(um$BDM))
num <- transform(num, BDM = as.character(num$BDM))
##Count the number of times each person initates a managed product
z <- 0
managed.product.count.per.user <data.frame(
User = character(3132),
Count = numeric(3132),
stringsAsFactors = FALSE
)
while (z <= 3132) {
if (length(c(which(um[c(1:z), 7] == um[z, 7]))) == 1) {
managed.product.count.per.user[z,] = c(as.character(um[z, 1]), length(which(
um[, 7] == um[z, 7])))
z <- z + 1
} else{
z <- z + 1
}
}
managed.product.count.per.user <transform(managed.product.count.per.user,
Count = as.numeric(managed.product.count.per.user$Count))## ID= as.n
umeric(managed.product.count.per.user$ID))
managed.product.count.per.user <managed.product.count.per.user[-c(which(managed.product.count.per.user[, 2] ==
0)),]
managed.product.count.per.user <arrange(managed.product.count.per.user,-managed.product.count.per.user$Count)
## create and User ID vector that matches the User ID to the correct BDM in the
managed.productr.count.dataframe
UID <- c()
i <- 1
x <- 1
repeat {
if (grepl(um[i, 1], managed.product.count.per.user[x, 1]) == TRUE) {
UID <- append(UID, um[i, 7], after = length(UID))
i <- i + 1
x <- x + 1
} else{
i <- i + 1
if (i > nrow(um))
{
i <- 1
}
}
if (length(UID) == nrow(managed.product.count.per.user)) {
break
}
}
managed.product.count.per.user <transform(managed.product.count.per.user, UID = UID)
##add proportion of reports out of total.
lace =
FALSE), 4]),
after = length(margin.mean.of.ma
naged.samples)))
volume.mean.of.managed.samples <append(volume.mean.of.managed.samples, mean(c(um[sample(1:nrow(um), 300, rep
lace =
FALSE), 3]),
after = length(volume.mean.of.ma
naged.samples)))
unit.margin.mean.of.managed.samples <append(unit.margin.mean.of.managed.samples,
mean(c(um[sample(1:nrow(um), 300, replace =
FALSE), 5]),
after = length(unit.margin.mean.of.managed.samples)))
if (length(margin.mean.of.managed.samples) == 10000) {
break
}
}
margin.mean.of.non.managed.samples <- c()
volume.mean.of.non.managed.samples <- c()
unit.margin.mean.of.non.managed.samples <- c()
repeat {
margin.mean.of.non.managed.samples <append(margin.mean.of.non.managed.samples,
mean(c(num[sample(1:nrow(num), 300, replace =
FALSE), 4]),
after = length(margin.mean.of.non.managed.samples)))
volume.mean.of.non.managed.samples <append(volume.mean.of.non.managed.samples,
mean(c(num[sample(1:nrow(num), 300, replace =
FALSE), 3]),
after = length(volume.mean.of.non.managed.samples)))
unit.margin.mean.of.non.managed.samples <append(unit.margin.mean.of.non.managed.samples,
mean(c(num[sample(1:nrow(num), 300, replace =
FALSE), 5]),
after = length(
unit.margin.mean.of.non.managed.samples
)))
if (length(margin.mean.of.non.managed.samples) == 10000) {
break
}
}
sample.statistics <data.frame(
Type = c("Non.Managed", "Managed"),
Total.Margin.Mean = c(
mean(margin.mean.of.non.managed.samples),
mean(margin.mean.of.managed.samples)
),
Total.Margin.Median = c(
median(margin.mean.of.non.managed.samples),
median(margin.mean.of.managed.samples)
),
SD.Total.Margin.Mean = c(
sd(margin.mean.of.non.managed.samples),
sd(margin.mean.of.managed.samples)
),
Volume.Mean = c(
mean(volume.mean.of.non.managed.samples),
mean(volume.mean.of.managed.samples)
),
Volume.Median = c(
median(volume.mean.of.non.managed.samples),
median(volume.mean.of.managed.samples)
),
SD.Volume.Mean = c(
sd(volume.mean.of.non.managed.samples),
sd(volume.mean.of.managed.samples)
),
Unit.Margin.Mean = c(
mean(unit.margin.mean.of.non.managed.samples),
mean(unit.margin.mean.of.managed.samples)
),
Unit.Margin.Median = c(
median(unit.margin.mean.of.non.managed.samples),
median(unit.margin.mean.of.managed.samples)
),
SD.Unit.Margin.Mean = c(
sd(unit.margin.mean.of.non.managed.samples),
sd(unit.margin.mean.of.managed.samples)
)
)
##Find control for users
mean.total.margin.BDM.managed <- c()
mean.total.margin.BDM.non.managed <- c()
sd.total.margin.BDM.managed<- c()
sd.total.margin.BDM.non.managed<- c()
mean.unit.margin.BDM.managed<- c()
mean.unit.margin.BDM.non.managed<- c()
sd.unit.margin.BDM.managed<- c()
sd.unit.margin.BDM.non.managed<- c()
mean.volume.BDM.managed<- c()
mean.volume.BDM.non.managed<- c()
sd.volume.BDM.managed<- c()
sd.volume.BDM.non.managed<- c()
n<-0
##managed product list
for(n in 1:nrow(managed.product.count.per.user)){
mean.total.margin.BDM.managed <append(mean.total.margin.BDM.managed, mean(as.numeric(
margin.volume.clean[which(
grepl(managed.product.count.per.user[n,1], margin.volume.clean[,1]) == T
RUE
& margin.volume.clean$i2i == 1), 4]), na.rm = FALSE
, after= length(mean.total.margin.BDM.managed) ))
mean.total.margin.BDM.non.managed <append(mean.total.margin.BDM.non.managed, mean(as.numeric(
margin.volume.clean[which(
grepl(managed.product.count.per.user[n,1], margin.volume.clean[,1]) == T
RUE
& margin.volume.clean$i2i == 0), 4]), na.rm = FALSE)
, after= length(mean.total.margin.BDM.non.managed))
}
BDM.managed.non.managed.mean.comparison <data.frame(
BDM = managed.product.count.per.user$User,
total.margin.mean.managed = as.numeric(mean.total.margin.BDM.managed),
total.margin.mean.non.managed = as.numeric(mean.total.margin.BDM.non.managed
)
)
qw <- mean(BDM.managed.non.managed.mean.comparison$total.margin.mean.managed - B
DM.managed.non.managed.mean.comparison$total.margin.mean.non.managed, na.rm=TRUE
)
er <- length(
which(BDM.managed.non.managed.mean.comparison$total.margin.mean.managed - BDM.
managed.non.managed.mean.comparison$total.margin.mean.non.managed > 0)
)
##on average how much higher is the total margin for managed products than for n
onmanaged products
zw <- mean(
as.numeric(BDM.managed.non.managed.mean.comparison[which(
BDM.managed.non.managed.mean.comparison$total.margin.mean.managed - BDM.mana
ged.non.managed.mean.comparison$total.margin.mean.non.managed > 0
), 2]) as.numeric(BDM.managed.non.managed.mean.comparison[which(
BDM.managed.non.managed.mean.comparison$total.margin.mean.managed - BDM.ma
naged.non.managed.mean.comparison$total.margin.mean.non.managed > 0
), 3]),
na.rm = TRUE
)
##on average when the nonmanaged products win for a BDM, how much do the nonmana
ged products beat the managed products by
non.managed.bdm.mean <- mean(
as.numeric(BDM.managed.non.managed.mean.comparison[which(
BDM.managed.non.managed.mean.comparison$total.margin.mean.managed - BDM.mana
ged.non.managed.mean.comparison$total.margin.mean.non.managed < 0
), 3]) as.numeric(BDM.managed.non.managed.mean.comparison[which(
BDM.managed.non.managed.mean.comparison$total.margin.mean.managed - BDM.ma
naged.non.managed.mean.comparison$total.margin.mean.non.managed < 0
), 2]),
na.rm = TRUE
)
```
```{r, echo = FALSE}
win.rate.clean.uid <- merge(win.rate.clean, uid.table, by.x = "BDM")
is.in.i2i <- c()
for (n in 1:nrow(win.rate.clean.uid)) {
if (is.element(win.rate.clean.uid[n, 10], managed.product.count.per.user$UID)
== TRUE) {
is.in.i2i <- append(is.in.i2i, 1, after = length(is.in.i2i))
} else{
is.in.i2i <- append(is.in.i2i, 0, after = length(is.in.i2i))
}
}
win.rate.clean.uid.i2i <transform(win.rate.clean.uid, in.i2i = is.in.i2i)
win.rate.stats <- data.frame(
Type = c("Managed", "Non Managed") ,
Mean.Win.Rate = c(
mean(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[, 11] == 1), 4]),
mean(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[, 11] == 0), 4])
),
SD.Win.Rate = c(sd(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[, 11]
= 1), 4]),
sd(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[, 11]
= 0), 4])),
Mean.Renewal.Rate = c(
mean(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[, 11] == 1), 5]),
mean(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[, 11] == 0), 5])
),
SD.Renewal.Rate = c(sd(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[,
1] == 1), 5]),
sd(win.rate.clean.uid.i2i[which(win.rate.clean.uid.i2i[,
1] == 0), 5])))
=
=
1
1