Source code for gypsum_client.fetch_metadata_schema
import os
import tempfile
import requests
from filelock import FileLock
from .cache_directory import cache_directory
from .config import REQUESTS_MOD
__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"
[docs]
def fetch_metadata_schema(
name: str = "bioconductor/v1.json",
cache_dir: str = cache_directory(),
overwrite: bool = False,
) -> str:
"""Fetch a JSON schema file for metadata to be inserted into a SQLite database.
Fetch a JSON schema file for metadata to be inserted into a SQLite database
See `metadata index <https://github.com/ArtifactDB/bioconductor-metadata-index>`_
for more details.
Each SQLite database is created from metadata files uploaded to the gypsum backend,
so clients uploading objects to be incorporated into the database should
validate their metadata against the corresponding JSON schema.
See Also:
:py:func:`~gypsum_client.validate_metadata.validate_metadata`, to
validate metadata against a chosen schema.
:py:func:`~gypsum_client.fetch_metadata_database.fetch_metadata_database`,
to obtain the SQLite database of metadata.
Example:
.. code-block:: python
schema_path = fetch_metadata_schema()
Args:
name:
Name of the schema.
Defaults to "bioconductor/v1.json".
cache_dir:
Path to the cache directory.
overwrite:
Whether to overwrite existing file in cache.
Returns:
Path containing the downloaded schema.
"""
cache_path = None
if cache_dir is None:
cache_path = tempfile.mktemp(suffix=".json")
else:
cache_dir = os.path.join(cache_dir, "schemas")
cache_path = os.path.join(cache_dir, name)
if not os.path.exists(cache_path):
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
if os.path.exists(cache_path) and not overwrite:
_lock = FileLock(cache_path + ".LOCK")
if not _lock.is_locked:
return cache_path
_lock = FileLock(cache_path + ".LOCK")
with _lock:
url = "https://artifactdb.github.io/bioconductor-metadata-index/" + name
response = requests.get(url, verify=REQUESTS_MOD["verify"])
with open(cache_path, "wb") as f:
f.write(response.content)
return cache_path