rm(list=ls(all=TRUE))
set.seed(060977) # setting a random-number generator seed to make the analyses random, but replicable



# Section 2.1: A small artificial data set ########################################################
## a first (made-up) small data set
P1s <- factor(rep(c("b","a","b","a"), c(1,7,9,3)) ) # predictor 1 small
P2s <- factor(rep(c("e","f","e"), c(6,10,4)))       # predictor 2 small
P3s <- factor(rep(c("m","n","m","n"), c(6,4,6,4)))  # predictor 3 small
DVs <- factor(rep(c("x","y"), c(10, 10)))           # response small
summary((D1 <- data.frame(P1s, P2s, P3s, DVs)))
##  P1s    P2s    P3s    DVs
##  a:10   e:10   m:12   x:10
##  b:10   f:10   n: 8   y:10
ftable(P1s, P2s, P3s, DVs)
##             DVs x y
## P1s P2s P3s
## a   e   m       5 0
##         n       0 3
##     f   m       0 0
##         n       2 0
## b   e   m       1 0
##         n       0 1
##     f   m       0 6
##         n       2 0
## if you try to predict DVs from P1s (a -> x, b -> y), you get 70% right
table(P1s, DVs)
##    DVs
## P1s x y
##   a 7 3
##   b 3 7
## if you try to predict DVs from P2s (e -> x, f -> y), you get 60% right
table(P2s, DVs)
##    DVs
## P2s x y
##   e 6 4
##   f 4 6
## if you try to predict DVs from P3s (m -> x, n -> y), you get 50% right, i.e. chance baseline
table(P3s, DVs)
##    DVs
## P3s x y
##   m 6 6
##   n 4 4
## but note: combining the two weak independent variables gives you perfect accuracy:
ftable(P2s, P3s, DVs)
##         DVs x y
## P2s P3s
## e   m       6 0
##     n       0 4
## f   m       0 6
##     n       4 0
table(P2s:P3s, DVs)
##      DVs
##       x y
##   e:m 6 0
##   e:n 0 4
##   f:m 0 6
##   f:n 4 0
## e+m -> x   and   e+n -> y
## f+m -> y   and   f+n -> x



## how do CART approaches handle this data set?
library(tree)
plot(tree.tree <- tree(DVs ~ P1s+P2s+P3s))
   text(tree.tree, pretty=0)

mean(predict(tree.tree, type="class")==DVs)
## [1] 0.75
detach(package:tree)


library(rpart)
plot(tree.rpart <- rpart(DVs ~ P1s+P2s+P3s), margin=0.1)
   text(tree.rpart, all=TRUE, use.n=TRUE)

mean(predict(tree.rpart, type="class")==DVs)
## [1] 0.7
detach(package:rpart)


library(party)
plot(tree.party <- ctree(DVs ~ P1s+P2s+P3s))

   mean(predict(tree.party)==DVs)
## [1] 0.5
detach(package:party)


library(partykit)
plot(tree.partykit <- ctree(DVs ~ P1s+P2s+P3s))

   mean(predict(tree.partykit)==DVs)
## [1] 0.5
detach(package:partykit)



# Section 2.2: The large version of the artificial data set #######################################
## a second (made-up) larger data set
P1l <- rep(P1s, 10) # predictor 1 large
P2l <- rep(P2s, 10) # predictor 2 large
P3l <- rep(P3s, 10) # predictor 3 large
DVl <- rep(DVs, 10) # response large
summary(D2 <- data.frame(P1l, P2l, P3l, DVl))
##  P1l     P2l     P3l     DVl
##  a:100   e:100   m:120   x:100
##  b:100   f:100   n: 80   y:100
## how do CART approaches handle this data set?
library(tree)
plot(tree.tree <- tree(DVl ~ P1l+P2l+P3l))
   text(tree.tree, pretty=0)

mean(predict(tree.tree, type="class")==DVs)
## [1] 1
### but does this go away when we prune the tree using misclassifications?
pruning <- cv.tree(tree.tree, FUN=prune.misclass)
plot(pruning$size, pruning$dev, type="b"); grid()

cart.1.pruned <- tree::prune.tree(tree.tree, best=4)
plot(cart.1.pruned)
   text(cart.1.pruned, pretty=0, all=TRUE)

mean(predict(cart.1.pruned, type="class")==DVl)
## [1] 0.85
### but does this go away when we prune the tree using deviance?
pruning <- cv.tree(tree.tree)
plot(pruning$size, pruning$dev, type="b"); grid()

cart.1.pruned <- tree::prune.tree(tree.tree, best=4)
plot(cart.1.pruned)
   text(cart.1.pruned, pretty=0, all=TRUE)

mean(predict(cart.1.pruned, type="class")==DVl)
## [1] 0.85
detach(package:tree)


library(rpart)
plot(tree.rpart <- rpart(DVl ~ P1l+P2l+P3l), margin=0.1)
   text(tree.rpart, all=TRUE, use.n=TRUE)

mean(predict(tree.rpart, type="class")==DVs)
## [1] 1
## but does this go away when we prune the tree using misclassifications?
plotcp(tree.rpart) # there is no value below the dotted line, thus, we pick 0.12

tree.rpart.pruned <- prune(tree.rpart, cp=0.12) # what we decided on in the previous code
plot(tree.rpart.pruned, margin=0.1)
   text(tree.rpart.pruned, pretty=TRUE, use.n=TRUE)

mean(predict(tree.rpart.pruned, type="class")==DVs)
## [1] 0.85
detach(package:rpart)


library(party)
plot(tree.ctree <- party::ctree(DVl ~ P1l+P2l+P3l))