lamindb.examples.datasets.mini_immuno .md

Two “mini immuno” datasets.

Datasets

lamindb.examples.datasets.mini_immuno.get_dataset1(otype='DataFrame', gene_symbols_in_index=False, with_typo=False, with_cell_type_synonym=False, with_cell_type_typo=False, with_gene_typo=False, with_outdated_gene=False, with_wrong_subtype=False, with_index_type_mismatch=False, with_date_as_iso_string=True)

A small tabular dataset measuring expression & metadata.

Return type:

DataFrame | AnnData

lamindb.examples.datasets.mini_immuno.get_dataset2(otype='DataFrame', gene_symbols_in_index=False, with_date_as_iso_string=True)

A second small tabular dataset measuring expression & metadata.

Return type:

DataFrame | AnnData

Schemas

lamindb.examples.datasets.mini_immuno.define_features_labels()

Features & labels to validate the mini immuno datasets.

import bionty as bt

import lamindb as ln

# define valid labels
perturbation_type = ln.Record(name="Perturbation", is_type=True).save()
ln.Record(name="DMSO", type=perturbation_type).save()
ln.Record(name="IFNG", type=perturbation_type).save()
bt.CellType.from_source(name="B cell").save()
bt.CellType.from_source(name="T cell").save()

# define valid features
ln.Feature(name="perturbation", dtype=perturbation_type).save()
ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save()
ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save()
ln.Feature(name="assay_oid", dtype=bt.ExperimentalFactor.ontology_id).save()
ln.Feature(name="concentration", dtype=str).save()
ln.Feature(name="treatment_time_h", dtype="num", coerce=True).save()
ln.Feature(name="donor", dtype=str, nullable=True).save()
ln.Feature(name="donor_ethnicity", dtype=list[bt.Ethnicity]).save()
Return type:

None

lamindb.examples.datasets.mini_immuno.define_mini_immuno_schema_flexible()

Features & labels to validate the mini immuno datasets.

import lamindb as ln

schema = ln.Schema(
    name="Mini immuno schema",
    features=[
        ln.Feature.get(name="perturbation"),
        ln.Feature.get(name="cell_type_by_model"),
        ln.Feature.get(name="assay_oid"),
        ln.Feature.get(name="donor"),
        ln.Feature.get(name="concentration"),
        ln.Feature.get(name="treatment_time_h"),
    ],
    flexible=True,  # _additional_ columns in a dataframe are validated & annotated
).save()
Return type:

Schema

Utilities

lamindb.examples.datasets.mini_immuno.save_mini_immuno_datasets()

Save the two “mini immuno” datasets.

from datetime import date

import bionty as bt

import lamindb as ln

## define valid labels
ln.Record.from_values(["DMSO", "IFNG"], create=True).save()
ln.Record.from_values(["Experiment 1", "Experiment 2"], create=True).save()
bt.CellType.from_values(["B cell", "T cell"]).save()

# observation-level metadata
ln.Feature(name="perturbation", dtype=ln.Record).save()
ln.Feature(name="sample_note", dtype=str).save()
ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save()
ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save()
# dataset-level metadata
ln.Feature(name="temperature", dtype=float).save()
ln.Feature(name="experiment", dtype=ln.Record).save()
ln.Feature(name="date_of_study", dtype=date, coerce=True).save()
ln.Feature(name="study_note", dtype=str).save()
ln.Feature(name="study_metadata", dtype=dict).save()

schema = ln.examples.schemas.anndata_ensembl_gene_ids_and_valid_features_in_obs()

## Ingest dataset1
adata = ln.examples.datasets.mini_immuno.get_dataset1(otype="AnnData")
artifact = ln.Artifact.from_anndata(
    adata,
    key="examples/dataset1.h5ad",
    schema=schema,
).save()
adhoc = {"study_metadata": {"detail1": "123", "detail2": 1}}
dataset_metadata = adata.uns
dataset_metadata.update(adhoc)
artifact.features.add_values(dataset_metadata)  # type: ignore

# Ingest dataset2
adata2 = ln.examples.datasets.mini_immuno.get_dataset2(otype="AnnData")
artifact2 = ln.Artifact.from_anndata(
    adata2,
    key="examples/dataset2.h5ad",
    schema=schema,
).save()
adhoc2 = {"study_metadata": {"detail1": "456", "detail2": 2}}
dataset_metadata2 = adata2.uns
dataset_metadata2.update(adhoc2)
artifact2.features.add_values(dataset_metadata2)  # type: ignore