Skip to contents

This example covers the AKAP79 example model and rgsl as solver backend, alternatively, it is possible to use deSolve instead (but not covered here).

We use icpm-kth/SBtabVFGEN to read the model files and icpm-kth/rgsl as an interface to gsl_odeiv2 solvers (for initial value problems) to simulate the model:

library(uqsa)
require(SBtabVFGEN)
#> Loading required package: SBtabVFGEN
require(rgsl)
#> Loading required package: rgsl
require(parallel)
#> Loading required package: parallel

Next, we import the model’s data tables from the SBtab files:

modelName <- checkModel("AKAP79",uqsa_example("AKAP79",pat="gvf[.]c$"))
model.tsv <- uqsa_example(modelName,pat="[.]tsv$")
model.tab <- SBtabVFGEN::sbtab_from_tsv(model.tsv)
experiments <- SBtabVFGEN::sbtab.data(model.tab)

The function checkModel checks that the model files exist and compiles the model to a shared library using a c compiler, the gsl library needs to be installed on the system for this to work (file ending in .so). On windows, it is easier to use the deSolve solvers.

The model variable contains the tabulated properties of the model as a list of data.frames. The source files of the model AKAP79.R, AKAP79.vf, and AKAP79_gvf.c were all created automatically from the tables. Because they are tables, we can retrieve default values from them.

parVal <- model.tab$Parameter[["!DefaultValue"]]

parMap <- function(parABC){
    return(10^parABC)
}

We have also defined a function that will transform the sampling variables parABC used by the ABC method into something that the model will accept as parameters. The plan is to sample in logarithmic space: the ABC algorithm will sample the logarithms of the model parameters. We limit the sampling to ranges, as appropriate for each parameter:

defRange <- c(rep(1000,19),1.9,1000,1.25,1.25,1.25,1.5,1.5,2)

# Define Lower and Upper Limits for logUniform prior distribution for the parameters

ll <- parVal/defRange

ul <- parVal*defRange

ll = log10(ll) # log10-scale
ul = log10(ul) # log10-scale

The sampling procedure takes data in the list of experiments and compares the data points to the solution that gsl_odeiv2 returns. We define a list of experiments, subdividing them into smaller groups and processing the groups in sequence. Between the rounds of sampling the posterior of every result is used as the prior distribution of the next round. This mimics the arrival of data sets in sequence (from the lab). The intermediate distributions will be modelled using VineCopula.

experimentsIndices <- list(c(3, 12,18, 9), c(2, 11, 17, 8), c(1, 10, 16, 7))

Problem Size and core distribution:

ns <- 100 # Size of the sub-sample from each chain
npc <- 1000 # pre-calibration sample size

nChains <- 4
nCores <- parallel::detectCores()
nCoresPerChain <- nCores %/% nChains

options(mc.cores=nCoresPerChain)

ABC related settings:

delta <- 0.01

set.seed(7619201)

distanceMeasure <- function(funcSim, dataExpr=Inf, dataErr=Inf){
  if (all(is.finite(funcSim))){
    distance <- mean((1/71.67*(funcSim-as.matrix(dataExpr))/as.matrix(dataErr))^2, na.rm=TRUE)
  } else {
    distance <- Inf
  }
  return(distance)
}

We also setup a convenience function which saves the sampled values to a file:

save_sample <- function(ind,ABCMCMCoutput){
    I <- paste(ind, collapse="_")
    timeStr <- Sys.time()
    timeStr <- gsub(":","_", timeStr)
    timeStr <- gsub(" ","_", timeStr)
    if (!dir.exists("./PosteriorSamples")) {
        dir.create("./PosteriorSamples")
    }
    outFileR <- paste("./PosteriorSamples/Draws",modelName,"nChains",nChains,"ns",ns,"npc",npc,I,timeStr,".RData",collapse="_",sep="_")
    save(ABCMCMCoutput, file=outFileR)
}

Build a random number generator, and density function for the intial prior:

rprior <- rUniformPrior(ll, ul)
dprior <- dUniformPrior(ll, ul)

The sampling loop:

start_time = Sys.time()
for (i in 1:length(experimentsIndices)){
    expInd <- experimentsIndices[[i]]

    objectiveFunction <- makeObjective(experiments[expInd], modelName, distanceMeasure, parMap)
  
    options(mc.cores = nCores)
    pC <- preCalibration(objectiveFunction, npc, rprior)
    M <- getMCMCPar(pC$prePar, pC$preDelta, delta=delta, num = nChains)

    options(mc.cores = nCoresPerChain)
    cl <- makeForkCluster(nChains)
    clusterExport(cl, c("objectiveFunction", "M", "ns", "delta", "dprior"))
    out_ABCMCMC <- parLapply(cl, 1:nChains, function(j) ABCMCMC(objectiveFunction, M$startPar[,j], ns, M$Sigma, delta, dprior))
    stopCluster(cl)

    ABCMCMCoutput <- do.call(Map, c(rbind,out_ABCMCMC))
    
    
    if(i == 1){
      
      C <- fitCopula(ABCMCMCoutput$draws, nCoresPerChain)
      
    } else { # Keep the draws that also fit the experiments considered in previous loops
      
      objectiveFunction <- makeObjective(experiments[unlist(experimentsIndices[1:(i-1)])], modelName, distanceMeasure, parMap)
      # We relax the delta from 0.01 to 0.05
      ABCMCMCoutput$drawsAfterCheckFit <- checkFitWithPreviousExperiments(ABCMCMCoutput$draws,objectiveFunction,delta = 0.05)
    C <- fitCopula(ABCMCMCoutput$drawsAfterCheckFit, nCoresPerChain)
    
    }
    
    save_sample(expInd,ABCMCMCoutput)
    
    rprior <- rCopulaPrior(C)
    dprior <- dCopulaPrior(C)
}
end_time = Sys.time()
time_ = end_time - start_time

cat("Total time:\n",time_)

The sampling loop ran for ~24.5 minutes on a machine with the following specs:

MacBook Pro (13-inch, M1, 2020)
Chip: Apple M1
Memory: 16 GB

The result is a collection of intermediate samples (2 files) and a final posterior sample (1 file).

Here we plot data from experimental time series with simulations from a subsample of the draws from the posterior distribution.

draws <- ABCMCMCoutput$drawsAfterCheckFit

simulate <- simulator.c(experiments[unlist(experimentsIndices)],modelName,parMap, noise = TRUE)
output_yy <- simulate(t(draws[sample(1:dim(draws)[1],min(100,dim(draws)[1])),]))
p<-plotTimeSeries(output_yy,experiments[unlist(experimentsIndices)])