Source code for gypsum_client.prepare_directory_for_upload

"""Prepare to upload a directory's contents.

Files in `directory` (that are not symlinks) are used as
regular uploads, i.e., `files=` in

If `directory` contains a symlink to a file in `cache`,
we assume that it points to a file that was previously downloaded
by, e.g., :py:func:`~gypsum_client.upload_api_operations.save_file` or
Thus, instead of performing a regular upload, we attempt to
create an upload link, i.e., ``links=`` in
This is achieved by examining the destination path of the
symlink and inferring the link destination in the backend.
Note that this still works if the symlinks are dangling.

If a symlink cannot be converted into an upload link, it will
be used as a regular upload, i.e., the contents of the symlink
destination will be uploaded by
In this case, an error will be raised if the symlink is dangling
as there is no file that can actually be uploaded.
If ``links="always"``, an error is raised instead upon symlink
conversion failure.

This function is intended to be used with
which creates symlinks to files in `cache`.

See Also:
    to actually start the upload.

    to prepare the symlinks.


    .. code-block:: python

        import tempfile
        cache = tempfile.mkdtemp()
        dest = tempfile.mkdtemp()

        # Clone a project
        clone_version("test-R", "basic", "v1", destination=dest, cache_dir=cache)

        # Make some modification
        with open(os.path.join(dest, "heanna"), "w") as f:

        # Prepare the directory for upload
        prepped = prepare_directory_upload(dest, cache_dir=cache)

import os
from typing import Literal

from ._utils import (
from .cache_directory import cache_directory

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"

[docs] def prepare_directory_upload( directory: str, links: Literal["auto", "always", "never"] = "auto", cache_dir: str = cache_directory(), ) -> dict: """Prepare to upload a directory's contents. Prepare to upload a directory's contents via `start_upload`. This goes through the directory to list its contents and convert symlinks to upload links. Args: directory: Path to a directory, the contents of which are to be uploaded via :py:func:`~gypsum_client.start_upload.start_upload`. links: Indicate how to handle symlinks in `directory`. Must be one of the following: - "auto": Will attempt to convert symlinks into upload links. If the conversion fails, a regular upload is performed. - "always": Will attempt to convert symlinks into upload links. If the conversion fails, an error is raised. - "never": Will never attempt to convert symlinks into upload links. All symlinked files are treated as regular uploads. cache_dir: Path to the cache directory, used to convert symlinks into upload links. Returns: Dictionary containing: - `files`: list of strings to be used as `files=` in :py:func:`~gypsum_client.start_upload.start_upload`. - `links`: dictionary to be used as `links=` in :py:func:`~gypsum_client.start_upload.start_upload`. """ _links_options = ["auto", "always", "never"] if links not in _links_options: raise ValueError( f"Invalid value for 'links': {links}. Must be one of {_links_options}." ) out_files = [] out_links = [] cache_dir = _normalize_and_sanitize_path(cache_dir) if not cache_dir.endswith("/"): cache_dir += "/" for root, _, files in os.walk(directory): for name in files: rel_path = os.path.relpath(os.path.join(root, name), directory) if not os.path.islink(os.path.join(directory, rel_path)): out_files.append(rel_path) continue dest = os.readlink(os.path.join(directory, rel_path)) if links == "never": if not os.path.exists(dest): raise ValueError( f"Cannot use a dangling link to '{dest}' as a regular upload." ) out_files.append(rel_path) continue dest = _normalize_and_sanitize_path(dest) dest_components = _match_path_to_cache(dest, cache_dir) if dest_components: out_links.append( { "from.path": rel_path, "to.project": dest_components["project"], "to.asset": dest_components["asset"], "to.version": dest_components["version"], "to.path": dest_components["path"], } ) continue if links == "always": raise ValueError( f"Failed to convert symlink '{dest}' to an upload link." ) elif not os.path.exists(dest): raise ValueError( f"Cannot use a dangling link to '{dest}' as a regular upload." ) out_files.append(rel_path) return {"files": out_files, "links": out_links}
def _normalize_and_sanitize_path(path: str) -> str: if os.path.exists(path): path = os.path.join( os.path.normpath(os.path.dirname(path)), os.path.basename(path) ) return _sanitize_path(path) def _match_path_to_cache(path: str, cache: str) -> dict: if not path.startswith(cache): return None remainder = path[len(cache) :] components = remainder.split("/") if len(components) <= 4 or components[0] != BUCKET_CACHE_NAME: return None return { "project": components[1], "asset": components[2], "version": components[3], "path": "/".join(components[4:]), }