SummarizedExperiment.js

import { DataFrame, SummarizedExperiment } from "bioconductor";
import { H5Group, H5DataSet } from "./h5.js";
import { readObject, readObjectFile, saveObject } from "./general.js";
import { joinPath, jsonBuffer } from "./utils.js";
import { readAnnotatedMetadata, saveAnnotatedMetadata } from "./metadata.js";

/**
 * A summarized experiment.
 * @external SummarizedExperiment 
 * @see {@link https://ltla.github.io/bioconductor.js/SummarizedExperiment.html}
 */

/**
 * @param {string} path - Path to the takane-formatted object directory containing the {@link external:SummarizedExperiment SummarizedExperiment}.
 * @param {object} metadata - Takane object metadata, typically generated by calling {@linkcode readObjectFile} on `path`.
 * @param {object} globals - Object satisfying the {@link GlobalsInterface}.
 * @param {object} [options={}] - Further options.
 * @param {?function} [options.SummarizedExperiment_readAssay=null] - How to read the assays.
 * If `null`, {@linkcode readObject} is used.
 * If a function is provided, it should accept `nrow` and `ncol` (the number of rows and columns in the SummarizedExperiment, respectively) as well as `path`, `metadata`, `globals` and `options` (as described above);
 * and should return an object (possibly asynchronously) for which [`NUMBER_OF_ROWS`](https://ltla.github.io/bioconductor.js/global.html#NUMBER_OF_ROWS) is equal to `nrow`
 * and [`NUMBER_OF_COLUMNS`](https://ltla.github.io/bioconductor.js/global.html#NUMBER_OF_COLUMNS) is equal to `ncol`. 
 * @param {function|boolean} [options.SummarizedExperiment_readMetadata=true] - How to read the metadata.
 * If `true`, {@linkcode readObject} is used, while if `false`, metadata will be skipped.
 * If a function is provided, it should accept `path`, `metadata`, `globals` and `options` (as described above), and return a {@link external:List List}.
 *
 * @return {external:SummarizedExperiment} The summarized experiment object.
 * @async
 */
export async function readSummarizedExperiment(path, metadata, globals, options = {}) {
    let read_assay = null;
    if ("SummarizedExperiment_readAssay" in options) {
        read_assay = options.SummarizedExperiment_readAssay;
    }

    let handle_stack = [];
    const se_options = {};
    const assays = {};
    const name_path = joinPath(path, "assays/names.json");
    if (await globals.exists(name_path)) {
        let names_contents = await globals.get(name_path, { asBuffer: true });
        const dec = new TextDecoder;
        const assay_names = JSON.parse(dec.decode(names_contents));

        se_options.assayOrder = assay_names;
        for (const [i, aname] of Object.entries(assay_names)) {
            let assay_path = joinPath(path, "assays", String(i));
            let assay_meta = await readObjectFile(assay_path, globals);
            if (read_assay === null) {
                assays[aname] = await readObject(assay_path, assay_meta, globals, options);
            } else {
                assays[aname] = await read_assay(metadata.summarized_experiment.dimensions[0], metadata.summarized_experiment.dimensions[1], assay_path, assay_meta, globals, options);

            }
        }
    }

    if (await globals.exists(joinPath(path, "column_data/OBJECT"))) {
        let cd = await readObject(joinPath(path, "column_data"), null, globals, options);
        se_options.columnData = cd;
        se_options.columnNames = cd.rowNames();
    } else if (Object.keys(assays).length == 0) {
        se_options.columnData = new DataFrame({}, { numberOfRows: 0 });
    }

    if (await globals.exists(joinPath(path, "row_data/OBJECT"))) {
        let cd = await readObject(joinPath(path, "row_data"), null, globals, options);
        se_options.rowData = cd;
        se_options.rowNames = cd.rowNames();
    } else if (Object.keys(assays).length == 0) {
        se_options.rowData = new DataFrame({}, { numberOfRows: 0 });
    }

    se_options.metadata = await readAnnotatedMetadata(joinPath(path, "other_data"), globals, options, "SummarizedExperiment_readMetadata")
    return new SummarizedExperiment(assays, se_options);
}

/**
 * @param {external:SummarizedExperiment} x - The summarized experiment.
 * @param {string} path - Path to the directory in which to save `x`.
 * @param {object} globals - Object satisfying the {@link GlobalsInterface}.
 * @param {object} [options={}] - Further options.
 *
 * @return `x` is stored at `path`.
 * @async
 */
export async function saveSummarizedExperiment(x, path, globals, options = {}) {
    await globals.mkdir(path);
    await globals.write(joinPath(path, "OBJECT"), jsonBuffer({
        type: "summarized_experiment",
        summarized_experiment: {
            version: "1.0",
            dimensions: [ x.numberOfRows(), x.numberOfColumns() ]
        }
    }));

    const assay_names = x.assayNames();
    if (assay_names.length > 0) {
        const adir = joinPath(path, "assays");
        await globals.mkdir(adir);
        await globals.write(joinPath(adir, "names.json"), jsonBuffer(assay_names));
        for (const [i, aname] of Object.entries(assay_names)) {
            await saveObject(x.assay(aname), joinPath(adir, String(i)), globals, options);
        }
    }

    if (x.columnData().numberOfColumns() > 0 || x.columnNames() !== null) {
        const cd = x.columnData().setRowNames(x.columnNames());
        await saveObject(cd, joinPath(path, "column_data"), globals, options);
    }

    if (x.rowData().numberOfColumns() > 0 || x.rowNames() !== null) {
        const cd = x.rowData().setRowNames(x.rowNames());
        await saveObject(cd, joinPath(path, "row_data"), globals, options);
    }

    await saveAnnotatedMetadata(x.metadata(), joinPath(path, "other_data"), globals, options);
}