Masaki Rhodes 11/16/2020: Library Function
Masaki Rhodes 11/16/2020: Library Function
Masaki Rhodes 11/16/2020: Library Function
Masaki Rhodes
11/16/2020
df = read.csv('oj.csv')
added = df
added$Q <- 10^(df$logmove)
library(plyr)
Df1 <- ddply(added, c('store','week'),function(x) c(weighted_mean = weighted.mean(x$price,x$Q)))
withmean = merge(added,Df1,by=c("store","week"))
names(withmean)
dataToPass<-withmean[,c("weighted_mean","AGE60","EDUC","ETHNIC","INCOME","HHLARGE","WORKWOM","HVAL150","SSTRDIST"
,"SSTRVOL","CPDIST5","CPWVOL5")]
library(maptree)
library(rpart)
fit<-rpart(as.formula(weighted_mean ~ AGE60 + EDUC + ETHNIC + INCOME + WORKWOM + HVAL150 + SSTRDIST + SSTRVOL + C
PDIST5 + CPWVOL5),data=dataToPass,method="anova",cp=0.007)
draw.tree(fit)
withmean$leaf = fit$where
print(unique(withmean$leaf))
## [1] 2 4 5
oj_leaf_L = withmean[which(withmean$leaf==2),]
reg_L <- glm(logmove~log(price)*brand*feat, data=oj_leaf_L)
summary(reg_L)
##
## Call:
## glm(formula = logmove ~ log(price) * brand * feat, data = oj_leaf_L)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.1952 -0.4484 0.0165 0.4301 3.1496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.71185 0.04364 245.472 < 2e-16 ***
## log(price) -2.84086 0.07489 -37.932 < 2e-16 ***
## brandminute.maid 0.04528 0.08565 0.529 0.597048
## brandtropicana 0.77579 0.09631 8.055 8.92e-16 ***
## feat 1.14217 0.07026 16.257 < 2e-16 ***
## log(price):brandminute.maid 0.38488 0.11672 3.298 0.000979 ***
## log(price):brandtropicana 0.17810 0.11002 1.619 0.105530
## log(price):feat -0.54600 0.13908 -3.926 8.71e-05 ***
## brandminute.maid:feat 1.05743 0.14876 7.108 1.26e-12 ***
## brandtropicana:feat 0.61889 0.18491 3.347 0.000820 ***
## log(price):brandminute.maid:feat -0.86456 0.22589 -3.827 0.000130 ***
## log(price):brandtropicana:feat -0.83767 0.23489 -3.566 0.000364 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.5138392)
##
## Null deviance: 10912.6 on 9098 degrees of freedom
## Residual deviance: 4669.3 on 9087 degrees of freedom
## AIC: 19777
##
## Number of Fisher Scoring iterations: 2
oj_leaf_M = withmean[which(withmean$leaf==4),]
reg_M <- glm(logmove~log(price)*brand*feat, data=oj_leaf_M)
summary(reg_M)
##
## Call:
## glm(formula = logmove ~ log(price) * brand * feat, data = oj_leaf_M)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.5120 -0.3999 -0.0220 0.3812 3.1700
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.22843 0.02731 374.538 < 2e-16 ***
## log(price) -2.62508 0.04515 -58.141 < 2e-16 ***
## brandminute.maid 0.13937 0.05499 2.534 0.0113 *
## brandtropicana 0.91296 0.05903 15.465 < 2e-16 ***
## feat 1.11325 0.04456 24.986 < 2e-16 ***
## log(price):brandminute.maid 0.74065 0.07177 10.320 < 2e-16 ***
## log(price):brandtropicana 0.63664 0.06583 9.671 < 2e-16 ***
## log(price):feat -0.51631 0.08621 -5.989 2.15e-09 ***
## brandminute.maid:feat 1.13436 0.09655 11.748 < 2e-16 ***
## brandtropicana:feat 0.68402 0.11468 5.965 2.49e-09 ***
## log(price):brandminute.maid:feat -1.06093 0.14317 -7.410 1.32e-13 ***
## log(price):brandtropicana:feat -0.86552 0.14370 -6.023 1.74e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.4171677)
##
## Null deviance: 17402.1 on 18443 degrees of freedom
## Residual deviance: 7689.2 on 18432 degrees of freedom
## AIC: 36231
##
## Number of Fisher Scoring iterations: 2
oj_leaf_N = withmean[which(withmean$leaf==5),]
reg_N <- glm(logmove~log(price)*brand*feat, data=oj_leaf_N)
summary(reg_N)
##
## Call:
## glm(formula = logmove ~ log(price) * brand * feat, data = oj_leaf_N)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.7616 -0.3175 -0.0107 0.3099 2.5853
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.9650 0.0887 112.342 < 2e-16 ***
## log(price) -2.8906 0.1322 -21.861 < 2e-16 ***
## brandminute.maid 0.6814 0.1842 3.698 0.000225 ***
## brandtropicana 1.5792 0.1960 8.058 1.65e-15 ***
## feat 1.1429 0.1505 7.595 5.60e-14 ***
## log(price):brandminute.maid 1.1458 0.2209 5.187 2.45e-07 ***
## log(price):brandtropicana 1.1611 0.2014 5.765 1.01e-08 ***
## log(price):feat -0.3304 0.2787 -1.186 0.236012
## brandminute.maid:feat 0.7365 0.3284 2.242 0.025086 *
## brandtropicana:feat 0.2019 0.3884 0.520 0.603343
## log(price):brandminute.maid:feat -0.9696 0.4706 -2.061 0.039531 *
## log(price):brandtropicana:feat -0.5441 0.4728 -1.151 0.250034
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.389)
##
## Null deviance: 1736.10 on 1403 degrees of freedom
## Residual deviance: 541.49 on 1392 degrees of freedom
## AIC: 2672.7
##
## Number of Fisher Scoring iterations: 2
library(reshape2)
oj_prices <-df[,1:6]
oj_wide <- dcast(oj_prices, store + week ~ brand)
oj_cross$leaf = fit$where
L = oj_cross[which(oj_cross$leaf==2),]
L_D = L[which(L$brand=="dominicks"),]
L_T = L[which(L$brand=="tropicana"),]
L_M = L[which(L$brand=="minute.maid"),]
regL_D <- glm(logmove~log(PriceD)*feat + log(PriceT)*feat + log(PriceM)*feat, data=L_D)
summary(regL_D)
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = L_D)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.7552 -0.4832 -0.0157 0.4613 2.9457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.12132 0.12440 73.323 < 2e-16 ***
## log(PriceD) -2.69070 0.06966 -38.628 < 2e-16 ***
## feat 0.57680 0.23812 2.422 0.01548 *
## log(PriceT) 0.26517 0.08645 3.067 0.00218 **
## log(PriceM) 1.22717 0.12759 9.618 < 2e-16 ***
## log(PriceD):feat -1.47140 0.15375 -9.570 < 2e-16 ***
## feat:log(PriceT) 1.53574 0.16430 9.347 < 2e-16 ***
## feat:log(PriceM) -0.77172 0.24085 -3.204 0.00137 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.6328214)
##
## Null deviance: 4880.4 on 3032 degrees of freedom
## Residual deviance: 1914.3 on 3025 degrees of freedom
## AIC: 7229.5
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = L_T)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.87455 -0.39807 0.00418 0.38860 2.65359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.18751 0.08910 125.568 < 2e-16 ***
## log(PriceD) 0.23394 0.04787 4.887 1.08e-06 ***
## feat 0.51024 0.22654 2.252 0.0244 *
## log(PriceT) -2.35652 0.06420 -36.704 < 2e-16 ***
## log(PriceM) 0.18989 0.08830 2.151 0.0316 *
## log(PriceD):feat -0.08824 0.14847 -0.594 0.5523
## feat:log(PriceT) -0.80197 0.17421 -4.603 4.33e-06 ***
## feat:log(PriceM) 1.28389 0.23871 5.378 8.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.3714472)
##
## Null deviance: 2391.8 on 3032 degrees of freedom
## Residual deviance: 1123.6 on 3025 degrees of freedom
## AIC: 5613.5
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = L_M)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.17181 -0.35544 -0.00831 0.34054 2.35574
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.93148 0.07833 126.792 < 2e-16 ***
## log(PriceD) 0.64296 0.04567 14.078 < 2e-16 ***
## feat 0.50198 0.21074 2.382 0.0173 *
## log(PriceT) 0.44194 0.05659 7.810 7.82e-15 ***
## log(PriceM) -2.40521 0.08391 -28.665 < 2e-16 ***
## log(PriceD):feat 0.10254 0.09976 1.028 0.3041
## feat:log(PriceT) 1.55478 0.14810 10.498 < 2e-16 ***
## feat:log(PriceM) -1.52177 0.23002 -6.616 4.35e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.3023461)
##
## Null deviance: 2317.9 on 3032 degrees of freedom
## Residual deviance: 914.6 on 3025 degrees of freedom
## AIC: 4989.2
##
## Number of Fisher Scoring iterations: 2
M = oj_cross[which(oj_cross$leaf==4),]
M_D = M[which(M$brand=="dominicks"),]
M_T = M[which(M$brand=="tropicana"),]
M_M = M[which(M$brand=="minute.maid"),]
regM_D <- glm(logmove~log(PriceD)*feat + log(PriceT)*feat + log(PriceM)*feat, data=M_D)
summary(regM_D)
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = M_D)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.3986 -0.5075 0.0073 0.5237 2.7654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.45176 0.07524 138.921 < 2e-16 ***
## log(PriceD) -2.79127 0.06838 -40.823 < 2e-16 ***
## feat -0.25082 0.13678 -1.834 0.0667 .
## log(PriceT) -0.42589 0.06814 -6.250 4.37e-10 ***
## log(PriceM) 0.58969 0.06565 8.982 < 2e-16 ***
## log(PriceD):feat -0.29729 0.11629 -2.556 0.0106 *
## feat:log(PriceT) 0.04981 0.12176 0.409 0.6825
## feat:log(PriceM) 1.41501 0.14835 9.538 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.6824063)
##
## Null deviance: 8092.4 on 6147 degrees of freedom
## Residual deviance: 4190.0 on 6140 degrees of freedom
## AIC: 15108
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = M_T)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3630 -0.3746 0.0019 0.3591 2.1894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.78391 0.05293 203.750 < 2e-16 ***
## log(PriceD) 0.06219 0.03830 1.624 0.104
## feat 1.64595 0.12587 13.077 < 2e-16 ***
## log(PriceT) -1.98235 0.04763 -41.622 < 2e-16 ***
## log(PriceM) 0.29538 0.04331 6.821 9.92e-12 ***
## log(PriceD):feat -0.13648 0.17467 -0.781 0.435
## feat:log(PriceT) -2.15739 0.12687 -17.005 < 2e-16 ***
## feat:log(PriceM) 1.08245 0.15626 6.927 4.73e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.3403246)
##
## Null deviance: 4201.7 on 6147 degrees of freedom
## Residual deviance: 2089.6 on 6140 degrees of freedom
## AIC: 10831
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = M_M)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.71989 -0.36564 -0.02786 0.33540 2.76750
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.30340 0.05065 203.408 < 2e-16 ***
## log(PriceD) 0.53484 0.04585 11.666 < 2e-16 ***
## feat 1.20296 0.10820 11.117 < 2e-16 ***
## log(PriceT) 0.13419 0.04960 2.705 0.00684 **
## log(PriceM) -2.25571 0.06550 -34.439 < 2e-16 ***
## log(PriceD):feat 0.36403 0.09109 3.997 6.50e-05 ***
## feat:log(PriceT) 0.61791 0.09771 6.324 2.73e-10 ***
## feat:log(PriceM) -1.47912 0.10014 -14.770 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.3370803)
##
## Null deviance: 6427.7 on 6147 degrees of freedom
## Residual deviance: 2069.7 on 6140 degrees of freedom
## AIC: 10772
##
## Number of Fisher Scoring iterations: 2
N = oj_cross[which(oj_cross$leaf==5),]
N_D = N[which(N$brand=="dominicks"),]
N_T = N[which(N$brand=="tropicana"),]
N_M = N[which(N$brand=="minute.maid"),]
regN_D <- glm(logmove~log(PriceD)*feat + log(PriceT)*feat + log(PriceM)*feat, data=N_D)
summary(regN_D)
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = N_D)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.94050 -0.46757 -0.01434 0.43755 1.56740
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.33586 0.26915 38.402 < 2e-16 ***
## log(PriceD) -2.71004 0.23183 -11.690 < 2e-16 ***
## feat -4.44065 8.33837 -0.533 0.5946
## log(PriceT) -1.13147 0.23302 -4.856 1.65e-06 ***
## log(PriceM) 1.17742 0.12886 9.137 < 2e-16 ***
## log(PriceD):feat 10.84807 5.32279 2.038 0.0421 *
## feat:log(PriceT) -0.01412 8.12852 -0.002 0.9986
## feat:log(PriceM) -0.35301 3.45307 -0.102 0.9186
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.4049321)
##
## Null deviance: 312.16 on 467 degrees of freedom
## Residual deviance: 186.27 on 460 degrees of freedom
## AIC: 914.97
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = N_T)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.87576 -0.29230 0.00475 0.33893 1.70695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.4926 0.3435 30.548 < 2e-16 ***
## log(PriceD) 0.5603 0.2192 2.556 0.0109 *
## feat 0.9984 0.6419 1.556 0.1205
## log(PriceT) -2.0504 0.3516 -5.831 1.04e-08 ***
## log(PriceM) 0.2092 0.1735 1.205 0.2287
## log(PriceD):feat -1.5747 0.6386 -2.466 0.0140 *
## feat:log(PriceT) -2.2407 0.4462 -5.022 7.34e-07 ***
## feat:log(PriceM) 2.6884 0.4065 6.614 1.04e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.2762951)
##
## Null deviance: 238.13 on 467 degrees of freedom
## Residual deviance: 127.10 on 460 degrees of freedom
## AIC: 736.08
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = logmove ~ log(PriceD) * feat + log(PriceT) * feat +
## log(PriceM) * feat, data = N_M)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0950 -0.3711 -0.0012 0.3790 1.9495
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.4426 0.2996 34.855 < 2e-16 ***
## log(PriceD) -1.6806 0.3971 -4.233 2.79e-05 ***
## feat 25.6137 9.9158 2.583 0.010099 *
## log(PriceT) 1.3722 0.2390 5.741 1.71e-08 ***
## log(PriceM) -2.3309 0.1477 -15.781 < 2e-16 ***
## log(PriceD):feat 1.9853 0.5749 3.454 0.000604 ***
## feat:log(PriceT) -0.2662 0.5190 -0.513 0.608317
## feat:log(PriceM) -37.0293 13.9041 -2.663 0.008012 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.3066576)
##
## Null deviance: 540.03 on 467 degrees of freedom
## Residual deviance: 141.06 on 460 degrees of freedom
## AIC: 784.87
##
## Number of Fisher Scoring iterations: 2
Lmat = c(summary(regL_D)$coefficients[2],summary(regL_D)$coefficients[5],summary(regL_D)$coefficients[4],summary
(regL_M)$coefficients[2],summary(regL_M)$coefficients[5],summary(regL_M)$coefficients[4],summary(regL_T)$coeffici
ents[2],summary(regL_T)$coefficients[5],summary(regL_T)$coefficients[4])
dim(Lmat) <- c(3,3)
colnames(Lmat) <- c("Dom","MM","Trop")
rownames(Lmat) <- c("Dom","MM","Trop")
Lmat
## Dom MM Trop
## Dom -2.6907009 0.6429618 0.2339440
## MM 1.2271669 -2.4052122 0.1898946
## Trop 0.2651688 0.4419437 -2.3565169
Nmat = c(summary(regN_D)$coefficients[2],summary(regN_D)$coefficients[5],summary(regN_D)$coefficients[4],summary
(regN_M)$coefficients[2],summary(regN_M)$coefficients[5],summary(regN_M)$coefficients[4],summary(regN_T)$coeffici
ents[2],summary(regN_T)$coefficients[5],summary(regN_T)$coefficients[4])
dim(Nmat) <- c(3,3)
colnames(Nmat) <- c("Dom","MM","Trop")
rownames(Nmat) <- c("Dom","MM","Trop")
Nmat
## Dom MM Trop
## Dom -2.710040 -1.680632 0.5602944
## MM 1.177425 -2.330906 0.2091613
## Trop -1.131469 1.372235 -2.0504286
Mmat = c(summary(regM_D)$coefficients[2],summary(regM_D)$coefficients[5],summary(regM_D)$coefficients[4],summary
(regM_M)$coefficients[2],summary(regM_M)$coefficients[5],summary(regM_M)$coefficients[4],summary(regM_T)$coeffici
ents[2],summary(regM_T)$coefficients[5],summary(regM_T)$coefficients[4])
dim(Mmat) <- c(3,3)
colnames(Mmat) <- c("Dom","MM","Trop")
rownames(Mmat) <- c("Dom","MM","Trop")
Mmat
## Dom MM Trop
## Dom -2.7912746 0.5348440 0.06219394
## MM 0.5896852 -2.2557095 0.29537661
## Trop -0.4258873 0.1341903 -1.98234575
# 3.c Own price elasticities are fairly similar, although they are higher for Minute Maid and Tropicana in Leaf
L.
# Cross price elasticites differ by a much larger amount. It is negative for MM-Dom in leaf N, but positive i
n the others. It is positive for Dom-Trop in Leaf L, but negative in the others.
# 4.a They should be lower in stores with the highest own-price elasticity, as the change in demand will be rela
tively high given a certain markup.
# 4.b The cross price elasticities are generally lower in the lowest own-price elasticity leaf(M).
# i. This implies we should see higher markups in these low cross-price elasticity areas as demand will not inc
rease as much due to markups from other brands, and it is in your best interest to have higher markups as well.
# ii. The timing should be different, as the cross-price elasticities are different, meaning the optimal respon
ses will also be different.