Source code for gypsum_client.prepare_directory_for_upload

"""Prepare to upload a directory's contents.

Files in `directory` (that are not symlinks) are used as
regular uploads, i.e., `files=` in
:py:func:`~gypsum_client.upload_api_operations.start_upload`.

If `directory` contains a symlink to a file in `cache`,
we assume that it points to a file that was previously downloaded
by, e.g., :py:func:`~gypsum_client.upload_api_operations.save_file` or
:py:func:`~gypsum_client.upload_api_operations.save_version`.
Thus, instead of performing a regular upload, we attempt to
create an upload link, i.e., ``links=`` in
:py:func:`~gypsum_client.upload_api_operations.start_upload`.
This is achieved by examining the destination path of the
symlink and inferring the link destination in the backend.
Note that this still works if the symlinks are dangling.

If a symlink cannot be converted into an upload link, it will
be used as a regular upload, i.e., the contents of the symlink
destination will be uploaded by
:py:func:`~gypsum_client.upload_api_operations.start_upload`.
In this case, an error will be raised if the symlink is dangling
as there is no file that can actually be uploaded.
If ``links="always"``, an error is raised instead upon symlink
conversion failure.

This function is intended to be used with
:py:func:`~gypsum_client.clone_operations.clone_version`,
which creates symlinks to files in `cache`.

See Also:
    :py:func:`~gypsum_client.upload_api_operations.start_upload`,
    to actually start the upload.

    :py:func:`~gypsum_client.clone_operations.clone_version`,
    to prepare the symlinks.

Example:

    .. code-block:: python

        import tempfile
        cache = tempfile.mkdtemp()
        dest = tempfile.mkdtemp()

        # Clone a project
        clone_version("test-R", "basic", "v1", destination=dest, cache_dir=cache)

        # Make some modification
        with open(os.path.join(dest, "heanna"), "w") as f:
            f.write("sumire")

        # Prepare the directory for upload
        prepped = prepare_directory_upload(dest, cache_dir=cache)
"""

import os
from typing import Literal

from ._utils import (
    BUCKET_CACHE_NAME,
    _sanitize_path,
)
from .cache_directory import cache_directory

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"


[docs] def prepare_directory_upload( directory: str, links: Literal["auto", "always", "never"] = "auto", cache_dir: str = cache_directory(), ) -> dict: """Prepare to upload a directory's contents. Prepare to upload a directory's contents via `start_upload`. This goes through the directory to list its contents and convert symlinks to upload links. Args: directory: Path to a directory, the contents of which are to be uploaded via :py:func:`~gypsum_client.start_upload.start_upload`. links: Indicate how to handle symlinks in `directory`. Must be one of the following: - "auto": Will attempt to convert symlinks into upload links. If the conversion fails, a regular upload is performed. - "always": Will attempt to convert symlinks into upload links. If the conversion fails, an error is raised. - "never": Will never attempt to convert symlinks into upload links. All symlinked files are treated as regular uploads. cache_dir: Path to the cache directory, used to convert symlinks into upload links. Returns: Dictionary containing: - `files`: list of strings to be used as `files=` in :py:func:`~gypsum_client.start_upload.start_upload`. - `links`: dictionary to be used as `links=` in :py:func:`~gypsum_client.start_upload.start_upload`. """ _links_options = ["auto", "always", "never"] if links not in _links_options: raise ValueError( f"Invalid value for 'links': {links}. Must be one of {_links_options}." ) out_files = [] out_links = [] cache_dir = _normalize_and_sanitize_path(cache_dir) if not cache_dir.endswith("/"): cache_dir += "/" for root, _, files in os.walk(directory): for name in files: rel_path = os.path.relpath(os.path.join(root, name), directory) if not os.path.islink(os.path.join(directory, rel_path)): out_files.append(rel_path) continue dest = os.readlink(os.path.join(directory, rel_path)) if links == "never": if not os.path.exists(dest): raise ValueError( f"Cannot use a dangling link to '{dest}' as a regular upload." ) out_files.append(rel_path) continue dest = _normalize_and_sanitize_path(dest) dest_components = _match_path_to_cache(dest, cache_dir) if dest_components: out_links.append( { "from.path": rel_path, "to.project": dest_components["project"], "to.asset": dest_components["asset"], "to.version": dest_components["version"], "to.path": dest_components["path"], } ) continue if links == "always": raise ValueError( f"Failed to convert symlink '{dest}' to an upload link." ) elif not os.path.exists(dest): raise ValueError( f"Cannot use a dangling link to '{dest}' as a regular upload." ) out_files.append(rel_path) return {"files": out_files, "links": out_links}
def _normalize_and_sanitize_path(path: str) -> str: if os.path.exists(path): path = os.path.join( os.path.normpath(os.path.dirname(path)), os.path.basename(path) ) return _sanitize_path(path) def _match_path_to_cache(path: str, cache: str) -> dict: if not path.startswith(cache): return None remainder = path[len(cache) :] components = remainder.split("/") if len(components) <= 4 or components[0] != BUCKET_CACHE_NAME: return None return { "project": components[1], "asset": components[2], "version": components[3], "path": "/".join(components[4:]), }