Sample AKAP79 with ABC • uqsa

We use icpm-kth/SBtabVFGEN to read the model files and icpm-kth/rgsl as an interface to gsl_odeiv2 solvers (for initial value problems) to simulate the model:

require(SBtabVFGEN)
require(rgsl)
library(uqsa)
require(parallel)
#> Loading required package: parallel

Next, we import the model’s data tables from the SBtab files:

modelName <- checkModel("AKAP79",uqsa_example("AKAP79",pat="_gvf[.]c$"))
modelFiles <- uqsa_example(modelName,pat="[.]tsv$")
SBtab <- SBtabVFGEN::sbtab_from_tsv(modelFiles)
experiments <- SBtabVFGEN::sbtab.data(SBtab)

The function checkModel checks that the model files exist and compiles the model to a shared library using a c compiler, if the second argument is a c file.

parVal <- SBtab$Parameter[["!DefaultValue"]]

We have also defined a function that will transform the sampling variables parABC used by the ABC method into something that the model will accept as parameters. The plan is to sample in logarithmic space: the ABC algorithm will sample the logarithms of the model parameters. We limit the sampling to ranges, as appropriate for each parameter:

defRange <- c(rep(1000,19),1.9,1000,1.25,1.25,1.25,1.5,1.5,2)

ll <- parVal/defRange # lower limit
ul <- parVal*defRange # upper limit

ll = log10(ll) # log10-scale
ul = log10(ul) # log10-scale

The sampling procedure takes data in the list of experiments and compares the data points to the solution that gsl_odeiv2 returns. We define a list of experiments, subdividing them into smaller groups and processing the groups in sequence. Between the rounds of sampling the posterior of every result is used as the prior distribution of the next round. This mimics the arrival of data sets in sequence (from the lab). The intermediate distributions will be modelled using VineCopula.

experimentsIndices <- list(c(3, 12,18, 9), c(2, 11, 17, 8), c(1, 10, 16, 7))

Problem Size and core distribution:

ns <- 100 # Size of the sub-sample from each chain
npc <- 1000 # pre-calibration sample size

nChains <- 4
nCores <- parallel::detectCores()
nCoresPerChain <- nCores %/% nChains

options(mc.cores=nCoresPerChain)

ABC related settings:

delta <- 0.01

set.seed(7619201)

distanceMeasure <- function(funcSim, dataExpr=Inf, dataErr=Inf){
  if (all(is.finite(funcSim))){
    distance <- mean(((funcSim-as.matrix(dataExpr))/as.matrix(dataErr))^2, na.rm=TRUE)
  } else {
    distance <- Inf
  }
  return(distance)
}

We also setup a convenience function which saves the sampled values to a file:

save_sample <- function(ind,ABCMCMCoutput){
    I <- paste(ind, collapse="_")
    timeStr <- Sys.time()
    timeStr <- gsub(":","_", timeStr)
    timeStr <- gsub(" ","_", timeStr)
    if (!dir.exists("./PosteriorSamples")) {
        dir.create("./PosteriorSamples")
    }
    outFileR <- paste("./PosteriorSamples/Draws",modelName,"nChains",nChains,"ns",ns,"npc",npc,I,timeStr,".RData",collapse="_",sep="_")
    save(ABCMCMCoutput, file=outFileR)
}

Build a random number generator, and density function for the intial prior:

rprior <- rUniformPrior(ll, ul)
dprior <- dUniformPrior(ll, ul)

The sampling loop:

start_time = Sys.time()
for (i in 1:length(experimentsIndices)){
    expInd <- experimentsIndices[[i]]

    objectiveFunction <- makeObjective(experiments[expInd], modelName, distanceMeasure, log10ParMap)

    options(mc.cores = nCores)
    pC <- preCalibration(objectiveFunction, npc, rprior)
    M <- getMCMCPar(pC$prePar, pC$preDelta, delta=delta, num = nChains)

    options(mc.cores = nCoresPerChain)
    cl <- makeForkCluster(nChains)
    clusterExport(cl, c("objectiveFunction", "M", "ns", "delta", "dprior"))
    out_ABCMCMC <- parLapply(cl, 1:nChains, function(j) ABCMCMC(objectiveFunction, M$startPar[,j], ns, M$Sigma, delta, dprior))
    stopCluster(cl)

    ABCMCMCoutput <- do.call(Map, c(rbind,out_ABCMCMC))

    if(i == 1){

      C <- fitCopula(ABCMCMCoutput$draws, nCoresPerChain)

    } else { # Keep the draws that also fit the experiments considered in previous loops

      objectiveFunction <- makeObjective(experiments[unlist(experimentsIndices[1:(i-1)])], modelName, distanceMeasure, log10ParMap)
      # We relax the delta from 0.01 to 0.05
      ABCMCMCoutput$drawsAfterCheckFit <- checkFitWithPreviousExperiments(ABCMCMCoutput$draws,objectiveFunction,delta = delta*5)
      C <- fitCopula(ABCMCMCoutput$drawsAfterCheckFit, nCoresPerChain)

    }

    save_sample(expInd,ABCMCMCoutput)

    rprior <- rCopulaPrior(C)
    dprior <- dCopulaPrior(C)
}
end_time = Sys.time()
time_ = end_time - start_time

cat("Total time:\n",time_)

The sampling loop ran for ~24.5 minutes on a machine with the following specs:

MacBook Pro	(13-inch, M1, 2020)
Chip:	Apple M1
Memory:	16 GB

The result is a collection of intermediate samples (2 files) and a final posterior sample (1 file).