Beruflich Dokumente
Kultur Dokumente
> getwd()
[1] "G:/TERMTHREE/BUSINESSANALYTICS/CLASSWORK/new"
> setwd("G:/TERMTHREE/BUSINESSANALYTICS/CLASSWORK/new")
> bData = read.csv(file="Cereals.csv", header = TRUE, sep = ",")
> head(bData)
name mfr type calories protein fat sodium fiber carbo sug
ars potass vitamins
1
100%_Bran N
C
70
4 1
130 10.0 5.0
6
280
25
2
100%_Natural_Bran Q
C
120
3 5
15 2.0 8.0
8
135
0
3
All-Bran K
C
70
4 1
260 9.0 7.0
5
320
25
4 All-Bran_with_Extra_Fiber K
C
50
4 0
140 14.0 8.0
0
330
25
5
Almond_Delight R
C
110
2 2
200 1.0 14.0
8
NA
25
6 Apple_Cinnamon_Cheerios G
C
110
2 2
180 1.5 10.5
10
70
25
shelf weight cups rating
1
3
1 0.33 68.40297
2
3
1 1.00 33.98368
3
3
1 0.33 59.42551
4
3
1 0.50 93.70491
5
3
1 0.75 34.38484
6
1
1 0.75 29.50954
> attach(bData)
The following objects are masked from bData (pos = 6):
calories, carbo, cups, fat, fiber, mfr, name, potass, protein, rating, shelf
, sodium,
sugars, type, vitamins, weight
>
> #Checking Multicollinearity
>
> require("usdm")
> head(bData)
name mfr type calories protein fat sodium fiber carbo sug
ars potass vitamins
1
100%_Bran N
C
70
4 1
130 10.0 5.0
6
280
25
2
100%_Natural_Bran Q
C
120
3 5
15 2.0 8.0
8
135
0
3
All-Bran K
C
70
4 1
260 9.0 7.0
5
320
25
4 All-Bran_with_Extra_Fiber K
C
50
4 0
140 14.0 8.0
0
330
25
5
Almond_Delight R
C
110
2 2
200 1.0 14.0
8
NA
25
6 Apple_Cinnamon_Cheerios G
C
110
2 2
180 1.5 10.5
10
70
25
shelf weight cups rating
1
3
1 0.33 68.40297
2
3
1 1.00 33.98368
3
3
1 0.33 59.42551
4
3
1 0.50 93.70491
5
3
1 0.75 34.38484
6
1
1 0.75 29.50954
> vif(bData[,c(5:12)])
Variables
VIF
1 protein 1.870357
2
fat 1.461836
3
sodium 1.279833
4
fiber 9.884092
5
carbo 2.426581
6
sugars 2.180586
7
potass 9.670269
8 vitamins 1.218836
> head(bData)
name mfr type calories protein fat sodium fiber carbo sug
ars potass vitamins
1
100%_Bran N
C
70
4 1
130 10.0 5.0
6
280
25
2
100%_Natural_Bran Q
C
120
3 5
15 2.0 8.0
8
135
0
3
All-Bran K
C
70
4 1
260 9.0 7.0
5
320
25
4 All-Bran_with_Extra_Fiber K
C
50
4 0
140 14.0 8.0
0
330
25
5
Almond_Delight R
C
110
2 2
200 1.0 14.0
8
NA
25
6 Apple_Cinnamon_Cheerios G
C
110
2 2
180 1.5 10.5
10
70
25
shelf weight cups rating
1
3
1 0.33 68.40297
2
3
1 1.00 33.98368
3
3
1 0.33 59.42551
4
3
1 0.50 93.70491
5
3
1 0.75 34.38484
6
1
1 0.75 29.50954
>
> #Solution Q1 part1
> #Since in all the independent variables VIF values are less than 10 so there i
s no multicollinearity.
>
> #Solution Q1 part2
> mreg1 <- lm(calories~protein+fat+sodium+fiber+carbo+sugars+potass+vitamins, da
ta=bData)
> summary(mreg1)
Call:
lm(formula = calories ~ protein + fat + sodium + fiber + carbo +
sugars + potass + vitamins, data = bData)
Residuals:
Min
1Q Median
-12.759 -3.412 -0.084
3Q
Max
2.676 15.914
Coefficients:
(Intercept)
protein
fat
sodium
fiber
carbo
t value
-0.649
5.573
12.818
-0.152
1.647
17.842
Pr(>|t|)
0.519
5.17e-07 ***
< 2e-16 ***
0.879
0.104
< 2e-16 ***
sugars
4.105063 0.201220 20.401 <
potass
-0.037836 0.026061 -1.452
vitamins
-0.007032 0.029414 -0.239
--Signif. codes: 0 *** 0.001 ** 0.01 *
2e-16 ***
0.151
0.812
0.05 .
0.1
3Q
Max
2.7673 15.9076
Coefficients:
Estimate Std. Error t value
(Intercept) -2.919296 4.549512 -0.642
protein
4.206780 0.749442 5.613
fat
9.133479 0.705356 12.949
fiber
1.257055 0.760619 1.653
carbo
4.229816 0.221730 19.076
sugars
4.099017 0.195799 20.935
potass
-0.037603 0.025822 -1.456
vitamins
-0.007978 0.028537 -0.280
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.523
4.27e-07
< 2e-16
0.103
< 2e-16
< 2e-16
0.150
0.781
*
***
***
***
***
0.05 .
0.1
3Q
Max
2.7873 15.9560
Coefficients:
Estimate Std. Error t value
(Intercept) -2.67847
4.43638 -0.604
protein
4.18666
0.74083 5.651
fat
9.13427
0.70048 13.040
fiber
1.23970
0.75285 1.647
carbo
4.20865
0.20696 20.335
sugars
4.08456
0.18754 21.779
potass
-0.03730
0.02562 -1.456
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.548
3.54e-07
< 2e-16
0.104
< 2e-16
< 2e-16
0.150
*
***
***
***
***
0.05 .
0.1
3Q
Max
2.5433 16.4884
Coefficients:
Estimate Std. Error t value
(Intercept) -1.2589
4.2917 -0.293
protein
3.8860
0.6963 5.581
fat
8.6983
0.6512 13.356
fiber
0.2501
0.3277 0.763
carbo
4.1446
0.2005 20.675
sugars
3.9681
0.1692 23.449
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.770
4.23e-07
< 2e-16
0.448
< 2e-16
< 2e-16
*
***
***
***
***
0.05 .
0.1
1Q
Median
3Q
Max
2.5555 16.5380
Coefficients:
Estimate Std. Error t value
(Intercept) 0.04175
3.92730 0.011
protein
4.15389
0.59962 6.928
fat
8.59671
0.63562 13.525
carbo
4.06656
0.17194 23.651
sugars
3.94232
0.16534 23.844
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.992
1.59e-09
< 2e-16
< 2e-16
< 2e-16
*
***
***
***
***
0.05 .
0.1
4
fiber 9.884092
5
carbo 2.426581
6
sugars 2.180586
7
potass 9.670269
8 vitamins 1.218836
> head(bData)
name mfr type calories protein fat sodium fiber carbo sug
ars potass vitamins
1
100%_Bran N
C
70
4 1
130 10.0 5.0
6
280
25
2
100%_Natural_Bran Q
C
120
3 5
15 2.0 8.0
8
135
0
3
All-Bran K
C
70
4 1
260 9.0 7.0
5
320
25
4 All-Bran_with_Extra_Fiber K
C
50
4 0
140 14.0 8.0
0
330
25
5
Almond_Delight R
C
110
2 2
200 1.0 14.0
8
NA
25
6 Apple_Cinnamon_Cheerios G
C
110
2 2
180 1.5 10.5
10
70
25
shelf weight cups rating
1
3
1 0.33 68.40297
2
3
1 1.00 33.98368
3
3
1 0.33 59.42551
4
3
1 0.50 93.70491
5
3
1 0.75 34.38484
6
1
1 0.75 29.50954
>
> #Correlation
>
> cor(bData[,c(4:12)],use="complete.obs")
calories
protein
fat
sodium
fiber
c
arbo
sugars
calories 1.00000000 0.03399166 0.5073732397 0.2962474981 -0.29521183 0.2706
0605 0.569120535
protein 0.03399166 1.00000000 0.2023533963 0.0115588913 0.51400610 -0.0367
4326 -0.286583967
fat
0.50737324 0.20235340 1.0000000000 0.0008219036 0.01403587 -0.2849
3369 0.287152487
sodium
0.29624750 0.01155889 0.0008219036 1.0000000000 -0.07073492 0.3284
0919 0.037058961
fiber
-0.29521183 0.51400610 0.0140358654 -0.0707349230 1.00000000 -0.3790
8370 -0.150948502
carbo
0.27060605 -0.03674326 -0.2849336855 0.3284091857 -0.37908370 1.0000
0000 -0.452069189
sugars
0.56912054 -0.28658397 0.2871524866 0.0370589612 -0.15094850 -0.4520
6919 1.000000000
potass -0.07136125 0.57874284 0.1996367171 -0.0394380876 0.91150392 -0.3650
0293 0.001413982
vitamins 0.25984556 0.05479952 -0.0305139099 0.3315759640 -0.03871734 0.2535
7897 0.072954382
potass
vitamins
calories -0.071361247 0.25984556
protein 0.578742837 0.05479952
fat
0.199636717 -0.03051391
sodium -0.039438088 0.33157596
fiber
0.911503921 -0.03871734
carbo
-0.365002934 0.25357897
sugars
0.001413982 0.07295438
potass
1.000000000 -0.00263583
[1] 0.4296726
> summary(est24)$adj.r.squared
[1] 0.6657591
> summary(est24)
Call:
lm(formula = calories ~ carbo + sugars, data = bData)
Residuals:
Min
1Q Median
-25.670 -7.790 -1.998
3Q
Max
5.136 32.178
Coefficients:
Estimate Std. Error t value
(Intercept) 29.1064
7.1786 4.055
carbo
3.3819
0.3796 8.909
sugars
3.9575
0.3387 11.683
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.000124 ***
2.76e-13 ***
< 2e-16 ***
*
0.05 .
0.1
3Q
Max
3.3473 22.2276
Coefficients:
Estimate Std. Error t value
(Intercept) 15.0069
4.2163 3.559
fat
9.8095
0.7855 12.488
sugars
3.4904
0.1953 17.871
carbo
3.8934
0.2187 17.803
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.000663
< 2e-16
< 2e-16
< 2e-16
*
***
***
***
***
0.05 .
0.1
>
> #Round Four
> est41 <- lm(calories~fiber+sugars+carbo+fat, data=bData)
> est42 <- lm(calories~sodium+sugars+carbo+fat, data=bData)
>
>
> #Round Four Summary
> summary(est41)$adj.r.squared
[1] 0.9071853
> summary(est42)$adj.r.squared
[1] 0.8916343
> summary(est41)
Call:
lm(formula = calories ~ fiber + sugars + carbo + fat, data = bData)
Residuals:
Min
1Q Median
-16.1526 -3.8290 -0.9644
3Q
Max
2.9200 24.2138
Coefficients:
Estimate Std. Error t value
(Intercept) 4.3915
4.9778 0.882
fiber
1.1719
0.3378 3.469
sugars
3.7476
0.1964 19.082
carbo
4.3113
0.2366 18.222
fat
9.9192
0.7321 13.548
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.380639
0.000892
< 2e-16
< 2e-16
< 2e-16
*
***
***
***
***
0.05 .
0.1
3Q
Max
2.8776 24.3893
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.262571 5.050786 0.844 0.40158
fiber
1.183730 0.344923 3.432 0.00101 **
sugars
3.758738 0.205047 18.331 < 2e-16 ***
carbo
4.332839 0.260324 16.644 < 2e-16 ***
fat
9.932379 0.739920 13.424 < 2e-16 ***
sodium
-0.001901 0.009246 -0.206 0.83770
--Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1
3Q
Max
2.9200 24.2138
Coefficients:
Estimate Std. Error t value
(Intercept) 4.3915
4.9778 0.882
fiber
1.1719
0.3378 3.469
sugars
3.7476
0.1964 19.082
carbo
4.3113
0.2366 18.222
fat
9.9192
0.7321 13.548
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.380639
0.000892
< 2e-16
< 2e-16
< 2e-16
*
***
***
***
***
0.05 .
0.1
#Question 3
#Regression model-1
mreg1 <- lm(rating~sugars+fat+sodium+fiber, data=bData)
summary(mreg1)
Call:
lm(formula = rating ~ sugars + fat + sodium + fiber, data = bData)
Residuals:
Min
1Q Median
3Q
Max
-4.3394 -1.3669 -0.2298 1.1915 7.3055
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 62.535230 0.818157 76.43 <2e-16 ***
sugars
-1.953304 0.066835 -29.23 <2e-16 ***
fat
-3.325458 0.287630 -11.56 <2e-16 ***
sodium
-0.055642 0.003358 -16.57 <2e-16 ***
fiber
2.832353 0.116396 24.33 <2e-16 ***
--Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1
1
3
1 0.33 68.40297
0
0
2
3
1 1.00 33.98368
0
0
3
3
1 0.33 59.42551
0
1
4
3
1 0.50 93.70491
0
1
5
3
1 0.75 34.38484
0
0
6
1
1 0.75 29.50954
1
0
>
> mreg2 <- lm(rating~sugars+fat+sodium+fiber+mfr_dummyK+mfr_dummyG, data=bData)
> summary(mreg2)
Call:
lm(formula = rating ~ sugars + fat + sodium + fiber + mfr_dummyK +
mfr_dummyG, data = bData)
Residuals:
Min
1Q Median
3Q
Max
-4.7066 -1.5615 -0.3118 1.4276 7.6004
Coefficients:
Estimate Std. Error t value
(Intercept) 62.519046 0.826690 75.626
sugars
-1.955599 0.069076 -28.311
fat
-3.231652 0.306129 -10.557
sodium
-0.054722 0.003664 -14.936
fiber
2.798131 0.121134 23.099
mfr_dummyK 0.181009 0.717970 0.252
mfr_dummyG -0.662525 0.747075 -0.887
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
< 2e-16
< 2e-16
4.77e-16
< 2e-16
< 2e-16
0.802
0.378
*
***
***
***
***
***
0.05 .
0.1
Almond_Delight R
C
110
NA
25
6 Apple_Cinnamon_Cheerios G
C
110
10
70
25
shelf weight cups rating mfr_dummyG mfr_dummyK
ction1
1
3
1 0.33 68.40297
0
0
038961
2
3
1 1.00 33.98368
0
0
883117
3
3
1 0.33 59.42551
0
1
000000
4
3
1 0.50 93.70491
0
1
805195
5
3
1 0.75 34.38484
0
0
883117
6
1
1 0.75 29.50954
1
0
805195
>
> detach(bData)
> attach(bData)
The following objects are masked from bData (pos =
200
1.0 14.0
180
1.5 10.5
weight_c
sugars_c intera
-0.02961039 -1.0263158
0.03
0.06
-0.02961039 -7.0263158
0.20
3):
calories, carbo, cups, fat, fiber, mfr, name, potass, protein, rating, shelf
, sodium,
sugars, type, vitamins, weight
The following objects are masked from bData (pos = 7):
calories, carbo, cups, fat, fiber, mfr, name, potass, protein, rating, shelf
, sodium,
sugars, type, vitamins, weight
>
> mreg3 <- lm(calories~sugars+weight+interaction1, data=bData)
> summary(mreg3)
Call:
lm(formula = calories ~ sugars + weight + interaction1, data = bData)
Residuals:
Min
1Q Median
-45.695 -5.617 0.038
3Q
Max
8.994 38.660
Coefficients:
Estimate Std. Error t value
(Intercept) 22.6174
10.6016 2.133
sugars
1.3303
0.3867 3.440
weight
73.7232
11.1794 6.595
interaction1 -3.1044
1.8042 -1.721
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.036301
0.000971
6.13e-09
0.089617
*
*
***
***
.
0.05 .
0.1
ficant,
Hit <Return> to see next plot: #but individually weight and sugars are significa
nt.And adj R2 = 56.25%
Hit <Return> to see next plot: #Data is independent and no heteroskedasity. And
no influential outliers.
Hit <Return> to see next plot:
> #Question 4B
> mean(fat)
[1] 1.012987
> bData$fat_c = fat - mean(fat)
> bData$interaction2 = bData$weight_c*bData$fat_c
> head(bData)
name mfr type calories protein fat sodium fiber carbo sug
ars potass vitamins
1
100%_Bran N
C
70
4 1
130 10.0 5.0
6
280
25
2
100%_Natural_Bran Q
C
120
3 5
15 2.0 8.0
8
135
0
3
All-Bran K
C
70
4 1
260 9.0 7.0
5
320
25
4 All-Bran_with_Extra_Fiber K
C
50
4 0
140 14.0 8.0
0
330
25
5
Almond_Delight R
C
110
2 2
200 1.0 14.0
8
NA
25
6 Apple_Cinnamon_Cheerios G
C
110
2 2
180 1.5 10.5
10
70
25
shelf weight cups rating mfr_dummyG mfr_dummyK
weight_c sugars_c intera
ction1
fat_c
1
3
1 0.33 68.40297
0
0 -0.02961039 -1.0263158 0.03
038961 -0.01298701
2
3
1 1.00 33.98368
0
0 -0.02961039 0.9736842 -0.02
883117 3.98701299
3
3
1 0.33 59.42551
0
1 -0.02961039 -2.0263158 0.06
000000 -0.01298701
4
3
1 0.50 93.70491
0
1 -0.02961039 -7.0263158 0.20
805195 -1.01298701
5
3
1 0.75 34.38484
0
0 -0.02961039 0.9736842 -0.02
883117 0.98701299
6
1
1 0.75 29.50954
1
0 -0.02961039 2.9736842 -0.08
805195 0.98701299
interaction2
1 0.0003845505
2 -0.1180570079
3 0.0003845505
4 0.0299949401
5 -0.0292258391
6 -0.0292258391
>
> detach(bData)
> attach(bData)
The following objects are masked from bData (pos = 3):
calories, carbo, cups, fat, fiber, mfr, name, potass, protein, rating, shelf
, sodium,
sugars, type, vitamins, weight
The following objects are masked from bData (pos = 7):
calories, carbo, cups, fat, fiber, mfr, name, potass, protein, rating, shelf
, sodium,
sugars, type, vitamins, weight
>
> mreg4 <- lm(calories~fat+weight+interaction2, data=bData)
> summary(mreg4)
Call:
lm(formula = calories ~ fat + weight + interaction2, data = bData)
Residuals:
Min
1Q Median
-47.485 -5.782 0.988
3Q
Max
5.339 30.988
Coefficients:
Estimate Std. Error t value
(Intercept)
18.416
9.976 1.846
fat
6.974
1.454 4.797
weight
79.273
9.709 8.165
interaction2 -6.797
11.623 -0.585
--Signif. codes: 0 *** 0.001 ** 0.01
Pr(>|t|)
0.0689 .
8.28e-06 ***
6.90e-12 ***
0.5605
*
0.05 .
0.1
> #Question 5
>
>
>
>
>
Call:
lm(formula = calories ~ weight, data = bData)
Residuals:
Min
1Q Median
-54.214 -4.214 5.786
3Q
Max
5.786 45.786
Coefficients:
Estimate Std. Error t
(Intercept)
14.08
11.17
weight
90.13
10.73
--Signif. codes: 0 *** 0.001 **
value Pr(>|t|)
1.261
0.211
8.397 2.1e-12 ***
0.01
0.05 .
0.1
3Q
Max
9.555 50.187
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 87.8459
8.6211 10.190 9.78e-16 ***
carbo
1.2922
0.5634 2.294 0.0246 *
--Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1
3Q
Max
5.162 45.162
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept)
13.33
11.17 1.193
0.237
weight
91.54
10.78 8.488 1.55e-12 ***
weight2
-31.00
27.27 -1.137
0.259
--Signif. codes: 0
***
0.001
** 0.01
0.05 .
0.1