Downloading Biomodels

It is often useful to have access to the entire biomodels database locally, though it isn’t necessarily obvious how best to download them. Here, we provide a script that makes use to bioservices.BioModels for downloading sbml models from the curated section.

Note

This example does not use libOmexMeta and so arguably has no place in this documentation. However, this is a common task and can therefore be useful to our users.

Note

This script downloads sbml files only. Biomodels hosts a number of other files associated with individual models. It is likely that these could also be downloaded using a similar strategy, but we do not do that here.

Download the curated section of biomodels
  1import os
  2import subprocess
  3import sys
  4
  5try:
  6    from bioservices import BioModels
  7except ImportError:
  8    subprocess.check_call([sys.executable, "-m", "pip", "install", "bioservices"])
  9    from bioservices import BioModels
 10
 11import json
 12import zipfile as z
 13
 14thisDir = os.path.dirname(os.path.abspath(__file__))
 15
 16s = BioModels()
 17
 18
 19def get_number_of_curated_models() -> int:
 20    """Figure out how many curated models there are in biomodels right now"""
 21    all: dict = s.search("*")
 22    stats: str = all["facetStats"]  # this is a string.
 23    json_stats: list = json.loads(stats)
 24    for item in json_stats:
 25        for val in item["facetValues"]:
 26            if val["value"] == "Manually curated":
 27                return int(val["count"])
 28    raise ValueError("Somethings not quite right")
 29
 30
 31def download_biomodels(directory: str, num_per_download=100):
 32    """downloads sbml models from the curated section of biomodels
 33
 34    Biomodels has a strict limit of 100 models per download so in this
 35    function we have to dance around this fact and compute appropriate
 36    chunk sizes.
 37
 38    :param directory: (str) where to put zip files containing num_per_download sbml models
 39    :param num_per_download: How many sbml models to download at a time.
 40    :return:
 41    """
 42    if num_per_download > 100:
 43        raise ValueError("Maximum number of models that can be downloaded at a time is 100")
 44
 45    # do index math.
 46    total_models = get_number_of_curated_models()
 47    num_downloads = int(total_models / num_per_download)
 48    remainder = total_models % num_per_download
 49    if remainder > 0:
 50        num_downloads += 1
 51
 52    filenames = []
 53
 54    start = 1
 55    for download_number in range(1, num_downloads + 1):
 56        if download_number == num_downloads:
 57            # handle last, which may have remainder
 58            end = total_models + 1  # account for 0 indexed python, 1 indexed biomodels
 59        else:
 60            end = (download_number * num_per_download) + 1  # account for 0 indexed python, 1 indexed biomodels
 61        # do something ...
 62
 63        fname = os.path.join(directory, f"Biomodels{start}-{end - 1}.zip")
 64        filenames.append(fname)
 65
 66        if os.path.isfile(fname):
 67            os.remove(fname)
 68
 69        biomodels_ids = [f"BIOMD{i:010}" for i in range(start, end)]
 70
 71        s.search_download(biomodels_ids, output_filename=fname)
 72        print(f"Biomodels models from id {start} to {end - 1} saved to {fname}")
 73
 74        start = end
 75
 76    # consolidate zips
 77    with z.ZipFile(filenames[0], 'a') as z1:
 78        for fname in filenames[1:]:
 79            zf = z.ZipFile(fname, 'r')
 80            for n in zf.namelist():
 81                z1.writestr(n, zf.open(n).read())
 82
 83    # rename first zip
 84    biomodels_zip = os.path.join(directory, "biomodels.zip")
 85    if not os.path.isfile(biomodels_zip):
 86        os.rename(filenames[0], biomodels_zip)
 87
 88    # try to get rid of the rest. Windows likes to hang on to them though so might fail
 89    for i in range(1, len(filenames)):
 90        try:
 91            os.remove(filenames[i])
 92        except Exception:
 93            continue
 94
 95    return filenames
 96
 97
 98if __name__ == "__main__":
 99
100    # set to true to actually do the download
101    ACTIVATE_DOWNLOAD = False
102
103    # we do this so that we do not need to download biomodels
104    # every time the documentation is built.
105    if ACTIVATE_DOWNLOAD:
106        download_biomodels(os.path.join(os.path.dirname(__file__)))