Parallel RoadRunnerMap

RoadRunner compiles sbml code into machine code for fast execution of model simulations. However model compilation takes time and can be prohibitory if users need to simulate a large variety of models at once. To alleviate some of this burden, we provide a RoadRunnerMap object, which is a container for roadrunner objects. The RoadRunnerMap has a dict-like interface and is intended to be a drop in replacement for a python dictionary specifically for use with roadrunner models. The major difference is that RoadRunnerMap thread-safe and uses a threadpool for building and storing models. The interested reader can refer to the open source parallel hashmap for the thread-safe hash map and thread-pool for the multithreading capabilities used by RoadRunnerMap.

For these demonstrations, we require some sbml models to load. To simplify this process we provide a function that will download the curated section of biomodels locally for you to use.

Note

Windows likes to hang on to open files, meaning we cannot clean up all trace files used in the download process. You can delete any residual manually.

Here is the code for downloading the curated section of biomodels. It is imported in the following examples.

 1import zipfile
 2import os, sys, glob
 3
 4from roadrunner import RoadRunner, RoadRunnerMap
 5from roadrunner.tests.download_biomodels import download_biomodels
 6
 7current_directory = os.path.dirname(__file__)
 8biomodels_zip = os.path.join(current_directory, "biomodels.zip")
 9biomodels_directory = os.path.join(current_directory, "biomodels")
10
11def get_biomodels_folder() -> str:
12    """download curated section of biomodels, if not already exists and
13    extract to a folder called biomodels"""
14    # don't do work more than once
15    if os.path.isdir(biomodels_directory):
16        return biomodels_directory
17    # downloads models if not exist
18    print("Downloading the curated section from biomodels")
19    models_zip = download_biomodels(current_directory)
20    assert biomodels_zip == models_zip, f"{biomodels_zip} != {models_zip}"
21
22    # extract models.
23    with zipfile.ZipFile(models_zip, 'r') as zip_ref:
24        zip_ref.extractall(biomodels_directory)
25    return biomodels_directory
26
27
28
29if __name__ == "__main__":
30    # get some local sbml files for use with the example
31    get_biomodels_folder()

Loading Models: 1 Thread

When a single thread is requested, serial algorithms take over from the parallel ones. This avoids some of the overhead required to set up multiple threads and is useful when you only have a few models to manage.

 1import os, sys, glob
 2from roadrunner import RoadRunner, RoadRunnerMap, Config
 3from get_biomodels import get_biomodels_folder
 4import time
 5
 6# get curated section of biomodels locally.
 7biomodels_folder = get_biomodels_folder()
 8biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))
 9
10# disable model caching to compare model load times
11Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)
12
13number_of_threads = 1
14number_of_models = 20
15
16# build the first `number_of_models` models using `number_of_threads` threads
17# When the number of threads is 1, multithreading is disabled and
18# serial algorithms take over
19start_time = time.time()
20rrm = RoadRunnerMap(biomodels_files[:20], number_of_threads)
21assert len(rrm) == number_of_models
22duration = time.time() - start_time
23print(f"{number_of_models} models loaded in {duration:.4f} seconds using {number_of_threads} threads")
24
25
26"""
27Script output
28=============
2920 models loaded in 1.7368 seconds using 1 threads
30
31"""

Loading Models: Multithreading

To use multithreading, pass the number of threads you want to use in to the RoadRunnerMap constructor.

 1import os, sys, glob
 2from roadrunner import RoadRunner, RoadRunnerMap, Config
 3from get_biomodels import get_biomodels_folder
 4import time
 5
 6# get curated section of biomodels locally.
 7biomodels_folder = get_biomodels_folder()
 8biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))
 9
10# disable model caching to compare model load times
11Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)
12
13number_of_threads = 3
14number_of_models = 20
15
16# build the first `number_of_models` models using `number_of_threads` threads
17start_time = time.time()
18rrm = RoadRunnerMap(biomodels_files[:number_of_models], number_of_threads)
19duration = time.time() - start_time
20
21print(f"{number_of_models} models loaded in {duration:.4f} seconds using {number_of_threads} threads")
22
23"""
24Script output
25=============
2620 models loaded in 0.7088 seconds using 3 threads
27
28"""

RoadRunnerMap has a dict-like interface

Many of the functions you are used to from Python’s dict object are also available in RoadRunnerMap. The RoadRunnerMap.keys(), RoadRunnerMap.values() and RoadRunnerMap.items() functions all have linear complexity O(N) as the map is iterated over to construct lists at runt time.

 1import os, sys, glob
 2from roadrunner import RoadRunner, RoadRunnerMap, Config
 3from get_biomodels import get_biomodels_folder
 4import time
 5
 6# get curated section of biomodels locally.
 7biomodels_folder = get_biomodels_folder()
 8biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))
 9
10# disable model caching to compare model load times
11Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)
12
13number_of_threads = 3
14number_of_models = 20
15
16# build the first `number_of_models` models using `number_of_threads` threads
17rrm = RoadRunnerMap(biomodels_files[:number_of_models], number_of_threads)
18
19# keys, values and items work as expected
20print("keys:", rrm.keys())
21# print("values", rrm.values()) # prints out list of roadrunner models
22
23for modelName, rrModel in rrm.items():
24    print(f"model \"{modelName}\" has {rrModel.getModel().getNumReactions()} reactions")
25
26# delete an item
27print(f"number of models before deletion: {len(rrm)}")
28del rrm[rrm.keys()[0]]
29print(f"number of models after deletion: {len(rrm)}")
30
31"""
32Script output
33=============
34keys: ('Goldbeter1995_CircClock', 'Tyson1991 - Cell Cycle 6 var', 'Novak1997 - Cell Cycle', 'Elowitz2000 - Repressilator', 'Levchenko2000_MAPK_noScaffold', 'Gardner1998 - Cell Cycle Goldbeter', 'Goldbeter1991 - Min Mit Oscil', 'Schoeberl2002 - EGF MAPK', 'Huang1996 - Ultrasensitivity in MAPK cascade', 'Goldbeter1991 - Min Mit Oscil, Expl Inact', 'Edelstein1996 - EPSP ACh species', 'hodgkin-huxley squid-axon 1952', 'Kholodenko2000 - Ultrasensitivity and negative feedback bring oscillations in MAPK cascade', 'Hoefnagel2002_PyruvateBranches', 'Tyson1991 - Cell Cycle 2 var', 'Curto1998 - purine metabolism', 'Edelstein1996 - EPSP ACh event', 'Morrison1989 - Folate Cycle', 'Levchenko2000_MAPK_Scaffold', 'Poolman2004_CalvinCycle')
35model "Goldbeter1995_CircClock" has 10 reactions
36model "Tyson1991 - Cell Cycle 6 var" has 9 reactions
37model "Novak1997 - Cell Cycle" has 25 reactions
38model "Elowitz2000 - Repressilator" has 12 reactions
39model "Levchenko2000_MAPK_noScaffold" has 30 reactions
40model "Gardner1998 - Cell Cycle Goldbeter" has 13 reactions
41model "Goldbeter1991 - Min Mit Oscil" has 7 reactions
42model "Schoeberl2002 - EGF MAPK" has 125 reactions
43model "Huang1996 - Ultrasensitivity in MAPK cascade" has 20 reactions
44model "Goldbeter1991 - Min Mit Oscil, Expl Inact" has 7 reactions
45model "Edelstein1996 - EPSP ACh species" has 17 reactions
46model "hodgkin-huxley squid-axon 1952" has 0 reactions
47model "Kholodenko2000 - Ultrasensitivity and negative feedback bring oscillations in MAPK cascade" has 10 reactions
48model "Hoefnagel2002_PyruvateBranches" has 14 reactions
49model "Tyson1991 - Cell Cycle 2 var" has 3 reactions
50model "Curto1998 - purine metabolism" has 37 reactions
51model "Edelstein1996 - EPSP ACh event" has 17 reactions
52model "Morrison1989 - Folate Cycle" has 47 reactions
53model "Levchenko2000_MAPK_Scaffold" has 300 reactions
54model "Poolman2004_CalvinCycle" has 21 reactions
55number of models before deletion: 20
56number of models after deletion: 19
57"""

Inserting new models

Insertion of a new model into the dictionary works much like you might expect, except that instead of passing in a fully constructed RoadRunner model, you instead pass in the sbml string or the path to sbml file. Much like Python’s dict object, keys are unique, so inserting another model with the same key as an existing model will overwrite the old model.

At present, the sbml model name is used as the default key for the model. We have implemented this as a “first pass” for simplicity but can foresee problems with model names not being unique. We therefore anticipate improvements in future releases once user feedback has been generated. For now, users may specify their own key manually.

 1import os, sys, glob
 2from roadrunner import RoadRunner, RoadRunnerMap, Config
 3from get_biomodels import get_biomodels_folder
 4import time
 5
 6# get curated section of biomodels locally.
 7biomodels_folder = get_biomodels_folder()
 8biomodels_files = glob.glob(os.path.join(biomodels_folder, "*.xml"))
 9
10# disable model caching to compare model load times
11Config.setValue(Config.LOADSBMLOPTIONS_RECOMPILE, True)
12
13number_of_threads = 3
14number_of_models = 20
15
16# build the first `number_of_models` models using `number_of_threads` threads
17rrm = RoadRunnerMap(biomodels_files[:number_of_models], number_of_threads)
18
19print(f"RoadRunnerMap size: {len(rrm)}")
20
21# insert a single model with a custom key
22rrm["newModel"] = biomodels_files[50]
23print(f"RoadRunnerMap size: {len(rrm)}")
24
25# or equivalently
26rrm.insert("newModel", biomodels_files[50])
27print(f"RoadRunnerMap size: {len(rrm)}")
28
29# Insert using the model name as key, which is the default behaviour
30rrm.insert(biomodels_files[50])
31print(f"RoadRunnerMap size: {len(rrm)}")
32
33# Insert another set of models into the map in parallel
34print(f"number of threads being used: {rrm.getNumThreads()}")
35
36# change the number of threads
37number_of_threads = 5
38rrm.setNumThreads(5)
39print(f"number of threads being used: {rrm.getNumThreads()}")
40
41start = time.time()
42rrm.insert(biomodels_files[500:600])
43duration = time.time() - start
44
45print(f"Another 100 models loaded in {duration:0.2f} seconds")
46
47print(f"RoadRunnerMap size: {len(rrm)}")
48
49
50"""
51Script output
52=============
53RoadRunnerMap size: 20
54RoadRunnerMap size: 21
55RoadRunnerMap size: 21
56RoadRunnerMap size: 22
57number of threads being used: 3
58number of threads being used: 5
59Another 100 models loaded in 6.34 seconds
60RoadRunnerMap size: 122
61
62"""
63