Parallel RoadRunnerMap

RoadRunner compiles sbml code into machine code for fast execution of model simulations. However model compilation takes time and can be prohibitory if users need to simulate a large variety of models at once. To alleviate some of this burden, we provide a RoadRunnerMap object, which is a container for roadrunner objects. The RoadRunnerMap has a dict-like interface and is intended to be a drop in replacement for a python dictionary specifically for use with roadrunner models. The major difference is that RoadRunnerMap thread-safe and uses a threadpool for building and storing models. The interested reader can refer to the open source parallel hashmap for the thread-safe hash map and thread-pool for the multithreading capabilities used by RoadRunnerMap.

For these demonstrations, we require some sbml models to load. To simplify this process we provide a function that will download the curated section of biomodels locally for you to use.

Note

Windows likes to hang on to open files, meaning we cannot clean up all trace files used in the download process. You can delete any residual manually.

Here is the code for downloading the curated section of biomodels. It is imported in the following examples.

import zipfile
import os, sys, glob

from roadrunner import RoadRunner, RoadRunnerMap
from roadrunner.tests.download_biomodels import download_biomodels

current_directory = os.path.dirname(__file__)
biomodels_zip = os.path.join(current_directory, "biomodels.zip")
biomodels_directory = os.path.join(current_directory, "biomodels")

def get_biomodels_folder() -> str:
    """download curated section of biomodels, if not already exists and
    extract to a folder called biomodels"""
    # don't do work more than once
    if os.path.isdir(biomodels_directory):
        return biomodels_directory
    # downloads models if not exist
    print("Downloading the curated section from biomodels")
    models_zip = download_biomodels(current_directory)
    assert biomodels_zip == models_zip, f"{biomodels_zip} != {models_zip}"

    # extract models.
    with zipfile.ZipFile(models_zip, 'r') as zip_ref:
        zip_ref.extractall(biomodels_directory)
    return biomodels_directory



if __name__ == "__main__":
    # get some local sbml files for use with the example
    get_biomodels_folder()

Loading Models: 1 Thread

When a single thread is requested, serial algorithms take over from the parallel ones. This avoids some of the overhead required to set up multiple threads and is useful when you only have a few models to manage.

import os, sys, glob
from roadrunner import RoadRunner, RoadRunnerMap, Config
from get_biomodels import get_biomodels_folder
import time

# get curated section of biomodels locally.
biomodels_folder = get_biomodels_folder()
biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))

# disable model caching to compare model load times
Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)

number_of_threads = 1
number_of_models = 20

# build the first `number_of_models` models using `number_of_threads` threads
# When the number of threads is 1, multithreading is disabled and
# serial algorithms take over
start_time = time.time()
rrm = RoadRunnerMap(biomodels_files[:20], number_of_threads)
assert len(rrm) == number_of_models
duration = time.time() - start_time
print(f"{number_of_models} models loaded in {duration:.4f} seconds using {number_of_threads} threads")


"""
Script output
=============
20 models loaded in 1.7368 seconds using 1 threads

"""

Loading Models: Multithreading

To use multithreading, pass the number of threads you want to use in to the RoadRunnerMap constructor.

import os, sys, glob
from roadrunner import RoadRunner, RoadRunnerMap, Config
from get_biomodels import get_biomodels_folder
import time

# get curated section of biomodels locally.
biomodels_folder = get_biomodels_folder()
biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))

# disable model caching to compare model load times
Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)

number_of_threads = 3
number_of_models = 20

# build the first `number_of_models` models using `number_of_threads` threads
start_time = time.time()
rrm = RoadRunnerMap(biomodels_files[:number_of_models], number_of_threads)
duration = time.time() - start_time

print(f"{number_of_models} models loaded in {duration:.4f} seconds using {number_of_threads} threads")

"""
Script output
=============
20 models loaded in 0.7088 seconds using 3 threads

"""

RoadRunnerMap has a dict-like interface

Many of the functions you are used to from Python’s dict object are also available in RoadRunnerMap. The RoadRunnerMap.keys(), RoadRunnerMap.values() and RoadRunnerMap.items() functions all have linear complexity O(N) as the map is iterated over to construct lists at runt time.

import os, sys, glob
from roadrunner import RoadRunner, RoadRunnerMap, Config
from get_biomodels import get_biomodels_folder
import time

# get curated section of biomodels locally.
biomodels_folder = get_biomodels_folder()
biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))

# disable model caching to compare model load times
Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)

number_of_threads = 3
number_of_models = 20

# build the first `number_of_models` models using `number_of_threads` threads
rrm = RoadRunnerMap(biomodels_files[:number_of_models], number_of_threads)

# keys, values and items work as expected
print("keys:", rrm.keys())
# print("values", rrm.values()) # prints out list of roadrunner models

for modelName, rrModel in rrm.items():
    print(f"model \"{modelName}\" has {rrModel.getModel().getNumReactions()} reactions")

# delete an item
print(f"number of models before deletion: {len(rrm)}")
del rrm[rrm.keys()[0]]
print(f"number of models after deletion: {len(rrm)}")

"""
Script output
=============
keys: ('Goldbeter1995_CircClock', 'Tyson1991 - Cell Cycle 6 var', 'Novak1997 - Cell Cycle', 'Elowitz2000 - Repressilator', 'Levchenko2000_MAPK_noScaffold', 'Gardner1998 - Cell Cycle Goldbeter', 'Goldbeter1991 - Min Mit Oscil', 'Schoeberl2002 - EGF MAPK', 'Huang1996 - Ultrasensitivity in MAPK cascade', 'Goldbeter1991 - Min Mit Oscil, Expl Inact', 'Edelstein1996 - EPSP ACh species', 'hodgkin-huxley squid-axon 1952', 'Kholodenko2000 - Ultrasensitivity and negative feedback bring oscillations in MAPK cascade', 'Hoefnagel2002_PyruvateBranches', 'Tyson1991 - Cell Cycle 2 var', 'Curto1998 - purine metabolism', 'Edelstein1996 - EPSP ACh event', 'Morrison1989 - Folate Cycle', 'Levchenko2000_MAPK_Scaffold', 'Poolman2004_CalvinCycle')
model "Goldbeter1995_CircClock" has 10 reactions
model "Tyson1991 - Cell Cycle 6 var" has 9 reactions
model "Novak1997 - Cell Cycle" has 25 reactions
model "Elowitz2000 - Repressilator" has 12 reactions
model "Levchenko2000_MAPK_noScaffold" has 30 reactions
model "Gardner1998 - Cell Cycle Goldbeter" has 13 reactions
model "Goldbeter1991 - Min Mit Oscil" has 7 reactions
model "Schoeberl2002 - EGF MAPK" has 125 reactions
model "Huang1996 - Ultrasensitivity in MAPK cascade" has 20 reactions
model "Goldbeter1991 - Min Mit Oscil, Expl Inact" has 7 reactions
model "Edelstein1996 - EPSP ACh species" has 17 reactions
model "hodgkin-huxley squid-axon 1952" has 0 reactions
model "Kholodenko2000 - Ultrasensitivity and negative feedback bring oscillations in MAPK cascade" has 10 reactions
model "Hoefnagel2002_PyruvateBranches" has 14 reactions
model "Tyson1991 - Cell Cycle 2 var" has 3 reactions
model "Curto1998 - purine metabolism" has 37 reactions
model "Edelstein1996 - EPSP ACh event" has 17 reactions
model "Morrison1989 - Folate Cycle" has 47 reactions
model "Levchenko2000_MAPK_Scaffold" has 300 reactions
model "Poolman2004_CalvinCycle" has 21 reactions
number of models before deletion: 20
number of models after deletion: 19
"""

Inserting new models

Insertion of a new model into the dictionary works much like you might expect, except that instead of passing in a fully constructed RoadRunner model, you instead pass in the sbml string or the path to sbml file. Much like Python’s dict object, keys are unique, so inserting another model with the same key as an existing model will overwrite the old model.

At present, the sbml model name is used as the default key for the model. We have implemented this as a “first pass” for simplicity but can foresee problems with model names not being unique. We therefore anticipate improvements in future releases once user feedback has been generated. For now, users may specify their own key manually.

import os, sys, glob
from roadrunner import RoadRunner, RoadRunnerMap, Config
from get_biomodels import get_biomodels_folder
import time

# get curated section of biomodels locally.
biomodels_folder = get_biomodels_folder()
biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))

# disable model caching to compare model load times
Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)

number_of_threads = 3
number_of_models = 20

# build the first `number_of_models` models using `number_of_threads` threads
rrm = RoadRunnerMap(biomodels_files[:number_of_models], number_of_threads)

print(f"RoadRunnerMap size: {len(rrm)}")

# insert a single model with a custom key
rrm["newModel"] = biomodels_files[50]
print(f"RoadRunnerMap size: {len(rrm)}")

# or equivalently
rrm.insert("newModel", biomodels_files[50])
print(f"RoadRunnerMap size: {len(rrm)}")

# Insert using the model name as key, which is the default behaviour
rrm.insert(biomodels_files[50])
print(f"RoadRunnerMap size: {len(rrm)}")

# Insert another set of models into the map in parallel
print(f"number of threads being used: {rrm.getNumThreads()}")

# change the number of threads
number_of_threads = 5
rrm.setNumThreads(5)
print(f"number of threads being used: {rrm.getNumThreads()}")

start = time.time()
rrm.insert(biomodels_files[500:600])
duration = time.time() - start

print(f"Another 100 models loaded in {duration:0.2f} seconds")

print(f"RoadRunnerMap size: {len(rrm)}")


"""
Script output
=============
RoadRunnerMap size: 20
RoadRunnerMap size: 21
RoadRunnerMap size: 21
RoadRunnerMap size: 22
number of threads being used: 3
number of threads being used: 5
Another 100 models loaded in 6.34 seconds
RoadRunnerMap size: 122

"""