Downloading Biomodels
It is often useful to have access to the entire biomodels database locally, though it isn’t necessarily obvious how best to download them. Here, we provide a script that makes use to bioservices.BioModels for downloading sbml models from the curated section.
Note
This example does not use libOmexMeta and so arguably has no place in this documentation. However, this is a common task and can therefore be useful to our users.
Note
This script downloads sbml files only. Biomodels hosts a number of other files associated with individual models. It is likely that these could also be downloaded using a similar strategy, but we do not do that here.
Download the curated section of biomodels
1import os
2import subprocess
3import sys
4
5try:
6 from bioservices import BioModels
7except ImportError:
8 subprocess.check_call([sys.executable, "-m", "pip", "install", "bioservices"])
9 from bioservices import BioModels
10
11import json
12import zipfile as z
13
14thisDir = os.path.dirname(os.path.abspath(__file__))
15
16s = BioModels()
17
18
19def get_number_of_curated_models() -> int:
20 """Figure out how many curated models there are in biomodels right now"""
21 all: dict = s.search("*")
22 stats: str = all["facetStats"] # this is a string.
23 json_stats: list = json.loads(stats)
24 for item in json_stats:
25 for val in item["facetValues"]:
26 if val["value"] == "Manually curated":
27 return int(val["count"])
28 raise ValueError("Somethings not quite right")
29
30
31def download_biomodels(directory: str, num_per_download=100):
32 """downloads sbml models from the curated section of biomodels
33
34 Biomodels has a strict limit of 100 models per download so in this
35 function we have to dance around this fact and compute appropriate
36 chunk sizes.
37
38 :param directory: (str) where to put zip files containing num_per_download sbml models
39 :param num_per_download: How many sbml models to download at a time.
40 :return:
41 """
42 if num_per_download > 100:
43 raise ValueError("Maximum number of models that can be downloaded at a time is 100")
44
45 # do index math.
46 total_models = get_number_of_curated_models()
47 num_downloads = int(total_models / num_per_download)
48 remainder = total_models % num_per_download
49 if remainder > 0:
50 num_downloads += 1
51
52 filenames = []
53
54 start = 1
55 for download_number in range(1, num_downloads + 1):
56 if download_number == num_downloads:
57 # handle last, which may have remainder
58 end = total_models + 1 # account for 0 indexed python, 1 indexed biomodels
59 else:
60 end = (download_number * num_per_download) + 1 # account for 0 indexed python, 1 indexed biomodels
61 # do something ...
62
63 fname = os.path.join(directory, f"Biomodels{start}-{end - 1}.zip")
64 filenames.append(fname)
65
66 if os.path.isfile(fname):
67 os.remove(fname)
68
69 biomodels_ids = [f"BIOMD{i:010}" for i in range(start, end)]
70
71 s.search_download(biomodels_ids, output_filename=fname)
72 print(f"Biomodels models from id {start} to {end - 1} saved to {fname}")
73
74 start = end
75
76 # consolidate zips
77 with z.ZipFile(filenames[0], 'a') as z1:
78 for fname in filenames[1:]:
79 zf = z.ZipFile(fname, 'r')
80 for n in zf.namelist():
81 z1.writestr(n, zf.open(n).read())
82
83 # rename first zip
84 biomodels_zip = os.path.join(directory, "biomodels.zip")
85 if not os.path.isfile(biomodels_zip):
86 os.rename(filenames[0], biomodels_zip)
87
88 # try to get rid of the rest. Windows likes to hang on to them though so might fail
89 for i in range(1, len(filenames)):
90 try:
91 os.remove(filenames[i])
92 except Exception:
93 continue
94
95 return filenames
96
97
98if __name__ == "__main__":
99
100 # set to true to actually do the download
101 ACTIVATE_DOWNLOAD = False
102
103 # we do this so that we do not need to download biomodels
104 # every time the documentation is built.
105 if ACTIVATE_DOWNLOAD:
106 download_biomodels(os.path.join(os.path.dirname(__file__)))