##################################################################################################
### Trial Code to run 2 arm fixed trial with Bayesian ordinal regression as primary analysis ###
### Prepared by Liz Lorenzi and Lindsay Berry
### Functions broken up into groups:
###   - data: data.init, data.add, data.tally, data.output 
###       - Generates patient data based on probability assumptions on the ordinal outcome and summarizes the data
###   - allocation: fixedallocator.init, fixedallocator.allocate, fixedallocator.output 
###       - Allocates patients to two arms and summarizes allocation
###   - model: model.init, model.fit, model.output 
###       - Fits Bayesian model using Stan, generates posterior samples, estimates QOI 
###       - NOTE: for Jags users, an option to use jags instead of stan is provided at the bottom of this file
###   - decision: bestprpbodecision.init, bestprpbodecision.evaluate, bestprpbodecision.output 
###       - Summarizes whether success or inferiority decisions are made at end of trial
### Runonetrial - runs a single trial
### getPower - run multiple trials and return power (as well as matrix of all simulated trial results)
##################################################################################################

## Packages to be loaded ##
library(dplyr)
library(rstanarm)

###### Data Generation Functions ######
data.init=function(ctrl.probs =   c(.2, .10, rep(0.015, 14), rep(0.07, 7)),
                   OR = c(1,1,1)){
  #Dependencies
  #     requires an allocator with a$newarms providing
  #     new arms to be allocated in the add method
  #     Depends on function get_probs_OR to convert outcome probabilities under assumed treatment effect
  #d$datamat will contain two rows, first is arm (1=control, 2=active)
  #    and second is the outcome level for each subject
  #d$OR = OR for the active intervention
  #d$ctrl.probs contains the control probabilities for each level of the ordinal outcome
  #d$numCat - number of categories in the ordinal endpoint
  #d$numarms is the number of arms (length(OR)+1)
  #d$n is the number enrolled per arm
  #d$add (method) generates and adds data to datamat
  #     assumes an allocator a exists and a$newarm is vector
  #     of arms for the new patients
  #d$tally (method) just takes the database and produces
  #     d$n, d$avgy and d$numdeaths which are the sample size per arm,
  #     the average outcome per arm, and the number of deaths per arm
  #d$output (method) generates a vector  to be included in output matrices
  #d$p_outs - list of outcome probabilities for each arm
  d = list()
  d$datamat = NULL
  d$OR = OR
  d$ctrl.probs = ctrl.probs
  d$numCat = length(ctrl.probs)
  d$numarms = length(OR) + 1
  d$n = rep(0, d$numarms)
  d$add = data.add
  d$tally = data.tally
  d$output = data.output
  
  ## Save list of outcome probs for each arm
  p_outs = list()
  p_outs[[1]] = ctrl.probs
  for(i in 1:length(OR)){
    p_outs[[i+1]] = get_probs_OR(ctrl.probs, OR[i])
  }
  d$p_outs = p_outs
  
  return(d)
}


get_probs_OR = function(probs0, OR = 1){
  # function input is prob outcome vector and odds ratio
  # returns the implied prob vector with OR treatment effect
  cumprobs0 = cumsum(probs0[-length(probs0)])
  L0 = log(cumprobs0/(1-cumprobs0))
  L1 = L0 - log(OR)
  cumprobs1 = exp(L1)/(1 + exp(L1))
  probs1 = c(cumprobs1 - c(0, cumprobs1[-length(cumprobs1)]), 1 - cumprobs1[length(cumprobs1)])
  probs1
}

data.add=function(d, a){
  #a is an allocator list, must have a$newarms
  #giving assignments for a set of new patients
  n = length(a$newarms)
  newy = sapply(1:n, function(x) {
    sample(c(1:d$numCat), 1, prob = d$p_outs[[a$newarms[x]]])
  })

  newdata = rbind(a$newarms[1:n], newy)
  d$datamat = cbind(d$datamat, newdata)
  
  d = data.tally(d)  
  d = data.output(d)
  return(d)
}

data.tally=function(d) {
  #d$n, d$avgy and d$numDeaths - 
  #provides estimates of number per arm, 
  #average y per arm, and number of deaths per arm
  d$n = d$avgy = d$numDeaths = rep(0, d$numarms)
  for(i in 1:d$numarms){
    d$n[i] = sum(d$datamat[1,] == i)
    d$avgy[i] = mean(d$datamat[2, d$datamat[1,] == i])
    d$numDeaths[i] = sum(d$datamat[2,d$datamat[1,] == i] == 1) #1 is outcome level for deaths
  }
  d = data.output(d)  #keep data up to date
  return(d)
}

data.output=function(d) {
  d$outvec = c(sum(d$n), d$n, d$avgy, d$numDeaths)
  names(d$outvec)=c("nComplete",
                    paste("nComp",1:d$numarms,sep=""),
                    paste("avg_y",1:d$numarms,sep=""),
                    paste("numDeaths",1:d$numarms,sep=""))
  return(d)
}


###### Subject Allocation Functions ######
fixedallocator.init=function(ntoadd,
                             armratio=c(1,1), 
                             succ.thresh=0.975, 
                             fut.thresh=0) {
  #Dependencies
  #     number to add - in this example same as maxN
  #     armratio - (1,1) or (1,2,1), etc.
  #     success threshold or futility thresholds (leave at 0 if no futility)
  #generate patients assignments for ntoadd patients at a time
  #  a$armratio gives the number of patients on each arm in
  #  each block. If n is not divisible by sum(a$armratio)
  #  then partial blocks are generated
  #a$allocate (method) generates a$newarms, the new assignments
  #a$output (method) generates output, the proportion of
  # patients on control and the relative proportion of patients
  # on each active arm
  #a$sthreshold - success threhsold for determining success on active arm
  #a$fthreshold - futility threshold for determining inferiority of active arm
  a=list()
  a$armratio=armratio
  a$ntoadd=ntoadd
  a$activearms = rep(TRUE, length(armratio))
  a$allocate=fixedallocator.allocate
  a$output=fixedallocator.output
  
  a$sthreshold = succ.thresh
  a$fthreshold = fut.thresh
  
  return(a)
}

fixedallocator.allocate=function(a, d, db) {
  #allocates a$ntoadd patients in blocks of sum(a$armratio)
  #incomplete blocks are generated as necessary
  numblocks=trunc(a$ntoadd/sum(a$armratio))+1
  a$newarms=NULL
  blocktemp=rep(1:length(a$armratio),a$armratio)
  for (i in 1:numblocks) {
    a$newarms=c(a$newarms,sample(blocktemp))
  }
  a$newarms=a$newarms[1:a$ntoadd]
  a=fixedallocator.output(a)  #keep up to date
  return(a)
}


fixedallocator.output=function(a) {
  #provides resulting allocation probabilities
  a$outvec=c(a$armratio[1]/sum(a$armratio),
             a$armratio[-1]/sum(a$armratio))
  names(a$outvec)=c("ctrlalloc",
                    paste("actalloc",2:(length(a$armratio)),
                          sep=""))
  return(a)
}


###### Bayesian Model Functions ######
model.init=function(mcmc.iter = 2000) {
  #Inputs - number of mcmc samples to generate
  #m$fit - fits bayesian ordinal model, 
  #computes posterior distribution and QOI such as Pr(Pbo) 
  #m$output - returns QOI such as Pr(PBO)
  
  m=list()
  m$mcmc.iter = mcmc.iter
  m$fit=model.fit
  m$output=model.output
  return(m)
}

model.fit=function(m,d,a) {
  #Grabs outcome from d$datamat, and creates X variable to indicate
  #whether patient assigned to active treatment (1) or control (0)
  #Fits stan model, extracts posterior samples,
  #Estimates posterior median of OR and
  #probability OR>1 (prpbo), and Pr(NIF) or the Pr(OR>1.2)

  Y = d$datamat[2,]

  #Convert treatment assignment to the indicator variable inputted into model
  X = rep(0, length(Y))
  X[which(d$datamat[1, ] == 2)] <- 1

  N = length(Y)

  stan_data = data.frame(
    x=X,
    y=factor(Y),
    K=8,
    N=length(Y)
  )  
  
  fit = stan_polr(y ~ x, data = stan_data, prior = NULL, prior_counts = dirichlet(1),
                  seed = 12345, method = 'logistic', algorithm = 'sampling',
                  chains = 1)
  
  samp = fit$stanfit@sim[[1]][[1]]$`beta[1]`
  ORsamp = exp(samp)
  
  m$postMedianOR =  m$prpbo =  m$prNIF= numeric(d$numarms-1)

  for(i in 2:d$numarms){
    m$postMedianOR[i-1] = median(ORsamp)
    m$prpbo[i-1] = mean(ORsamp > 1)
    m$prNIF[i-1] = mean(ORsamp < 1.2)
  }

  m$bestarm = ifelse(m$prpbo>0.5, 2, 1)
  m=model.output(m)
  return(m)
}


model.output=function(m) {
  m$outvec=c(m$postMedianOR, m$prpbo, m$prNIF,
              m$bestarm)
  narms=length(m$postMedianOR)
  names(m$outvec)=c(paste("postMedianOR",1:narms,sep=""),
                    paste("prpbo",1:narms,sep=""),
                    paste("prNIF1.2_",1:narms,sep=""),
                    "bestarm")
  return(m)
}


bestprpbodecision.init=function(threshold=0.90,minn=240,
                                higher=TRUE) {
  #Dependencies
  #     requires data d with d$n giving sample size
  #     requires model m with m$prpbo giving Pr(Pbo) for active arm
  #Evaluates whether m$prpbo is higher (higher=TRUE)
  #     or if m$prNIF is lower (higher=FALSE) than threshold AND
  #     that at least minn patients have been enrolled in
  #     the trial (not arm)
  dr=list()
  dr$threshold=threshold
  dr$minn=minn
  dr$higher=higher
  dr$evaluate=bestprpbodecision.evaluate
  dr$output=bestprpbodecision.output
  return(dr)
}

bestprpbodecision.evaluate=function(dr,m,d) {
  if (dr$higher) {
    dr$result= (sum(d$n) >= dr$minn) && (m$prpbo >= dr$threshold) #success result
  } else {
    dr$result = (sum(d$n) >= dr$minn) && (m$prNIF > dr$threshold) #inferiority result
  }
  dr = bestprpbodecision.output(dr)  #keep up to date
  return(dr)
}


bestprpbodecision.output=function(dr) {
  dr$outvec=dr$result
  names(dr$outvec)="drresult" 
  return(dr)
}


###############################################
## Put all of the pieces together and run a full trial
###############################################
runonetrial=function(maxN = 500,
                     OR = c(1),
                     ctrl.probs = c(0.02, 0.01, 0.02, 0.07, 0.08, 0.10, 0.30, 0.40),
                     succ.thresh = 0.975,
                     fut.thresh = 0.90){
  
  d = data.init(ctrl.probs = ctrl.probs, OR = OR)
  m = model.init()
  a = fixedallocator.init(armratio =  c(1, 1), 
                          ntoadd = maxN, 
                          succ.thresh = succ.thresh,
                          fut.thresh = fut.thresh)
  
  drsucc = bestprpbodecision.init(threshold = succ.thresh, minn = maxN, higher = TRUE)
  drfut = bestprpbodecision.init(threshold = fut.thresh, minn = maxN, higher = FALSE)

  a = a$allocate(a=a, d=d)
  d = d$add(d=d, a=a)
  m = m$fit(m=m, d=d, a=a)
  drsucc = drsucc$evaluate(drsucc, m=m, d=d)
  drfut = drfut$evaluate(drfut, m=m, d=d)
  trialoutvec = c(d$outvec, a$outvec, m$outvec,
                  drsucc$outvec, drfut$outvec)
  trialoutmat = matrix(trialoutvec, nrow=1)
  colnames(trialoutmat) = names(trialoutvec)
  
  colnames(trialoutmat)[which(colnames(trialoutmat) == "drresult")] = c('succ.drresult',
                                                                        "fut.drresult")
  
  return(list(trialoutmat = trialoutmat))
}


getPower = function(maxN = 500, n_it = 1000, cont.rates=  c(0.02, 0.01, 0.02, 0.07, 0.08, 0.10, 0.30, 0.40), 
                                    OR=c(1), succ.thresh = 0.975){
  save = lapply(1:n_it, function(x){runonetrial(maxN = maxN, 
                                                OR=OR,
                                                succ.thresh=succ.thresh,
                                                ctrl.probs = cont.rates)})
  outmat_list = lapply(seq_along(save), function(x){cbind(x, save[[x]]$trialoutmat)})
  interims = do.call(rbind, outmat_list)
  colnames(interims)[1] = "Sim"

  power = mean(interims[,"succ.drresult"])
  
  return(list(OR=OR, power=power, interims=interims))#interims = interims))
}