Multitreatment-uplift-modeling/performance.R at main · VerbekeLab/Multitreatment-uplift-modeling · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Based on Leo Guelman and Floris Devriendt
# Evaluating the performance of MTUM

# Compute the uplift score based on model predictions
uplift_score <- function(predictions,model_name,df_test,indicator_treatment,control,treatment_1,treatment_2){

  if(model_name == "sma" | model_name == "dia"){

    pr.y1_ct <- predictions[,3]
    pr.y1_t1 <- predictions[,1]
    pr.y1_t2 <- predictions[,2]
    dif.pred.t1 <- as.numeric(pr.y1_t1 - pr.y1_ct)
    dif.pred.t2 <- as.numeric(pr.y1_t2 - pr.y1_ct)
    max.dif <- pmax(dif.pred.t1,dif.pred.t2)
    pred.treat <- ifelse(max.dif < 0,0,ifelse(max.dif == dif.pred.t1,1,2))
    uplift_predictions <- cbind(max.dif,pred.treat)
    return(uplift_predictions)

  } else if(model == "cknn"){

    pr.y1_ct <- predictions[,1]
    pr.y1_t1 <- predictions[,2]
    pr.y1_t2 <- predictions[,3]
    dif.pred.t1 <- as.numeric(pr.y1_t1 - pr.y1_ct)
    dif.pred.t2 <- as.numeric(pr.y1_t2 - pr.y1_ct)
    max.dif <- pmax(dif.pred.t1,dif.pred.t2)
    pred.treat <- ifelse(max.dif < 0,0,ifelse(max.dif == dif.pred.t1,1,2))
    uplift_predictions <- cbind(max.dif,pred.treat)
    return(uplift_predictions)

  } else if(model == "nua"){

    pr.y1_ct_m1 <- predictions[,3]
    pr.y1_ct_m2 <- predictions[,4]
    pr.y1_t1 <- predictions[,1]
    pr.y1_t2 <- predictions[,2]
    dif.pred.t1 <- as.numeric(pr.y1_t1 - pr.y1_ct_m1)
    dif.pred.t2 <- as.numeric(pr.y1_t2 - pr.y1_ct_m2)
    max.dif <- pmax(dif.pred.t1,dif.pred.t2)
    pred.treat <- ifelse(max.dif < 0,0,ifelse(max.dif == dif.pred.t1,1,2))
    uplift_predictions <- cbind(max.dif,pred.treat)
    return(uplift_predictions)

  } else if(model == "mmoa"){
    # Predicted probabilities for the classes of interest
    pr.y0.ct <- predictions[,1]
    pr.y0.t1 <- predictions[,2]
    pr.y0.t2 <- predictions[,3]
    pr.y1.ct <- predictions[,4]
    pr.y1.t1 <- predictions[,5]
    pr.y1.t2 <- predictions[,6]
    # Probabilities of each treatment group
    obs.treat.probs <- as.data.frame(prop.table(table(df_test[,indicator_treatment])))
    obs.prob.ct <- obs.treat.probs[which(obs.treat.probs$Var1 == control),2]
    obs.prob.t1 <- obs.treat.probs[which(obs.treat.probs$Var1 == treatment_1),2]
    obs.prob.t2 <- obs.treat.probs[which(obs.treat.probs$Var1 == treatment_2),2]
    # Calculating the model score for both treatments
    dif.pred.t1 <- (pr.y1.t1/obs.prob.t1 + pr.y0.ct/obs.prob.ct) - (pr.y0.t1/obs.prob.t1 + pr.y1.ct/obs.prob.ct)
    dif.pred.t2 <- (pr.y1.t2/obs.prob.t2 + pr.y0.ct/obs.prob.ct) - (pr.y0.t2/obs.prob.t2 + pr.y1.ct/obs.prob.ct)
    max.dif <- pmax(dif.pred.t1,dif.pred.t2)
    pred.treat <- ifelse(max.dif < 0,0,ifelse(max.dif == dif.pred.t1,1,2))
    uplift_predictions <- cbind(max.dif,pred.treat)
    return(uplift_predictions)

  } else if(model == "python"){

    dif.pred.t1 <- predictions[,2]
    dif.pred.t2 <- predictions[,3]
    max.dif <- pmax(dif.pred.t1,dif.pred.t2)
    pred.treat <- ifelse(max.dif < 0,0,ifelse(max.dif == dif.pred.t1,1,2))
    uplift_predictions <- cbind(max.dif,pred.treat)
    return(uplift_predictions)
  }
}

# Compute the performance table
performance_table <- function(predictions,df_test,uplift_predictions,indicator_treatment,outcome,control,treatment_1,treatment_2){

  df_test <- df_test[,c(indicator_treatment,outcome)]
  df_performance <- cbind(df_test,predictions,uplift_predictions)
  df_performance$correct_pred <- ifelse(df_performance[,indicator_treatment] == treatment_1 & df_performance$pred.treat == 1,1,
                                 ifelse(df_performance[,indicator_treatment] == treatment_1 & df_performance$pred.treat == 2,1,
                                 ifelse(df_performance[,indicator_treatment] == control,1,0)))
  performance.order <- order(df_performance$max.dif, decreasing = TRUE)
  performance.sorted <- df_performance[performance.order,]
  performance.sorted[,"n.treat"] <- ifelse(performance.sorted[,indicator_treatment] == treatment_1 | performance.sorted[,indicator_treatment] == treatment_2,1,0)
  performance.sorted[,"n.control"] <- ifelse(performance.sorted[,indicator_treatment] == control,1,0)
  performance.sorted[,"n.y1_treat"] <- ifelse(performance.sorted[,"n.treat"] & performance.sorted[,outcome] == 1,1,0)
  performance.sorted[,"n.y1_control"] <- ifelse(performance.sorted[,"n.control"] == 1 & performance.sorted[,outcome] == 1,1,0)
  performance.sorted[,"r.y1_treat"] <- performance.sorted$n.y1_treat/performance.sorted$n.treat
  performance.sorted[,"r.y1_control"] <- performance.sorted$n.y1_control/performance.sorted$n.control
  performance.sorted[is.na(performance.sorted)] <- 0
  performance.sorted[,"uplift"] <- performance.sorted[,"r.y1_treat"] - performance.sorted[,"r.y1_control"]
  df_performance <- subset(performance.sorted, correct_pred == 1)
  res <- cbind.data.frame(group   = 1:nrow(df_performance),
                          n.treat    = df_performance$n.treat,
                          n.control    = df_performance$n.control,
                          n.y1_treat = df_performance$n.y1_treat,
                          n.y1_control = df_performance$n.y1_control,
                          r.y1_treat = df_performance$r.y1_treat,
                          r.y1_control = df_performance$r.y1_control,
                          uplift   = df_performance$uplift)

  #class(res) <- "performance"
  return(res)
}

# Expected response
expected_response <- function(df_test,predictions,uplift_predictions,indicator_treatment,treatment_1,treatment_2,control){
  obs.treat.probs <- as.data.frame(prop.table(table(df_test[,indicator_treatment])))
  obs.prob.ct <- obs.treat.probs[which(obs.treat.probs$Var1 == ppdf$control),2]
  obs.prob.t1 <- obs.treat.probs[which(obs.treat.probs$Var1 == ppdf$treatment_1),2]
  obs.prob.t2 <- obs.treat.probs[which(obs.treat.probs$Var1 == ppdf$treatment_2),2]
  df_predictions <- as.data.frame(predictions)
  df_predictions$dif.pred <- uplift_predictions[,1]
  df_predictions$opt.treat <- uplift_predictions[,2]
  df_predictions$obs.treat <- ifelse(df_test[,indicator_treatment] == treatment_1,1,ifelse(df_test[,indicator_treatment] == treatment_2,2,0))
  df_predictions$outcome <- df_test[,outcome]
  df_predictions$z.opt <- ((df_predictions$outcome/obs.prob.ct)*(ifelse(df_predictions$opt.treat == 0,1,0))*(ifelse(df_predictions$obs.treat == 0,1,0)))+
                          ((df_predictions$outcome/obs.prob.t1)*(ifelse(df_predictions$opt.treat == 1,1,0))*(ifelse(df_predictions$obs.treat == 1,1,0)))+
                          ((df_predictions$outcome/obs.prob.t2)*(ifelse(df_predictions$opt.treat == 2,1,0))*ifelse(df_predictions$obs.treat == 2,1,0))
  df_predictions$z.ct <- (df_predictions$outcome/obs.prob.ct)*ifelse(df_predictions$obs.treat==0,1,0)
  predictions_order <- order(df_predictions$dif.pred,decreasing = TRUE)
  predictions_sorted <- df_predictions[predictions_order,]
  average.expected.response <- c()
  for (x in 1:(nrow(predictions_sorted)-1)){
    average.expected.response[x] <- (sum(predictions_sorted[1:x,"z.opt"])+sum(predictions_sorted[(x+1):nrow(predictions_sorted),"z.ct"]))/nrow(predictions_sorted)
  }
  all.control <- mean(predictions_sorted$z.ct)
  all.opt.treat <- mean(predictions_sorted$z.opt)
  average.expected.response <- c(all.control,average.expected.response,all.opt.treat)
  res <- data.frame(x = seq(0,length(average.expected.response)-1),z=average.expected.response)
  return(res)
}