"""Prepare to upload a directory's contents.
Files in `directory` (that are not symlinks) are used as
regular uploads, i.e., `files=` in
:py:func:`~gypsum_client.upload_api_operations.start_upload`.
If `directory` contains a symlink to a file in `cache`,
we assume that it points to a file that was previously downloaded
by, e.g., :py:func:`~gypsum_client.upload_api_operations.save_file` or
:py:func:`~gypsum_client.upload_api_operations.save_version`.
Thus, instead of performing a regular upload, we attempt to
create an upload link, i.e., ``links=`` in
:py:func:`~gypsum_client.upload_api_operations.start_upload`.
This is achieved by examining the destination path of the
symlink and inferring the link destination in the backend.
Note that this still works if the symlinks are dangling.
If a symlink cannot be converted into an upload link, it will
be used as a regular upload, i.e., the contents of the symlink
destination will be uploaded by
:py:func:`~gypsum_client.upload_api_operations.start_upload`.
In this case, an error will be raised if the symlink is dangling
as there is no file that can actually be uploaded.
If ``links="always"``, an error is raised instead upon symlink
conversion failure.
This function is intended to be used with
:py:func:`~gypsum_client.clone_operations.clone_version`,
which creates symlinks to files in `cache`.
See Also:
    :py:func:`~gypsum_client.upload_api_operations.start_upload`,
    to actually start the upload.
    :py:func:`~gypsum_client.clone_operations.clone_version`,
    to prepare the symlinks.
Example:
    .. code-block:: python
        import tempfile
        cache = tempfile.mkdtemp()
        dest = tempfile.mkdtemp()
        # Clone a project
        clone_version("test-R", "basic", "v1", destination=dest, cache_dir=cache)
        # Make some modification
        with open(os.path.join(dest, "heanna"), "w") as f:
            f.write("sumire")
        # Prepare the directory for upload
        prepped = prepare_directory_upload(dest, cache_dir=cache)
"""
import os
from typing import Literal
from ._utils import (
    BUCKET_CACHE_NAME,
    _sanitize_path,
)
from .cache_directory import cache_directory
__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"
[docs]
def prepare_directory_upload(
    directory: str,
    links: Literal["auto", "always", "never"] = "auto",
    cache_dir: str = cache_directory(),
) -> dict:
    """Prepare to upload a directory's contents.
    Prepare to upload a directory's contents via `start_upload`.
    This goes through the directory to list its contents and
    convert symlinks to upload links.
    Args:
        directory:
            Path to a directory, the contents of which are to be
            uploaded via :py:func:`~gypsum_client.start_upload.start_upload`.
        links:
            Indicate how to handle symlinks in `directory`.
            Must be one of the following:
            - "auto": Will attempt to convert symlinks into upload links.
            If the conversion fails, a regular upload is performed.
            - "always": Will attempt to convert symlinks into upload links.
            If the conversion fails, an error is raised.
            - "never": Will never attempt to convert symlinks into upload
            links. All symlinked files are treated as regular uploads.
        cache_dir:
            Path to the cache directory, used to convert symlinks into upload links.
    Returns:
        Dictionary containing:
        - `files`: list of strings to be used as `files=`
        in :py:func:`~gypsum_client.start_upload.start_upload`.
        - `links`: dictionary to be used as `links=` in
        :py:func:`~gypsum_client.start_upload.start_upload`.
    """
    _links_options = ["auto", "always", "never"]
    if links not in _links_options:
        raise ValueError(
            f"Invalid value for 'links': {links}. Must be one of {_links_options}."
        )
    out_files = []
    out_links = []
    cache_dir = _normalize_and_sanitize_path(cache_dir)
    if not cache_dir.endswith("/"):
        cache_dir += "/"
    for root, _, files in os.walk(directory):
        for name in files:
            rel_path = os.path.relpath(os.path.join(root, name), directory)
            if not os.path.islink(os.path.join(directory, rel_path)):
                out_files.append(rel_path)
                continue
            dest = os.readlink(os.path.join(directory, rel_path))
            if links == "never":
                if not os.path.exists(dest):
                    raise ValueError(
                        f"Cannot use a dangling link to '{dest}' as a regular upload."
                    )
                out_files.append(rel_path)
                continue
            dest = _normalize_and_sanitize_path(dest)
            dest_components = _match_path_to_cache(dest, cache_dir)
            if dest_components:
                out_links.append(
                    {
                        "from.path": rel_path,
                        "to.project": dest_components["project"],
                        "to.asset": dest_components["asset"],
                        "to.version": dest_components["version"],
                        "to.path": dest_components["path"],
                    }
                )
                continue
            if links == "always":
                raise ValueError(
                    f"Failed to convert symlink '{dest}' to an upload link."
                )
            elif not os.path.exists(dest):
                raise ValueError(
                    f"Cannot use a dangling link to '{dest}' as a regular upload."
                )
            out_files.append(rel_path)
    return {"files": out_files, "links": out_links} 
def _normalize_and_sanitize_path(path: str) -> str:
    if os.path.exists(path):
        path = os.path.join(
            os.path.normpath(os.path.dirname(path)), os.path.basename(path)
        )
    return _sanitize_path(path)
def _match_path_to_cache(path: str, cache: str) -> dict:
    if not path.startswith(cache):
        return None
    remainder = path[len(cache) :]
    components = remainder.split("/")
    if len(components) <= 4 or components[0] != BUCKET_CACHE_NAME:
        return None
    return {
        "project": components[1],
        "asset": components[2],
        "version": components[3],
        "path": "/".join(components[4:]),
    }