##### Multi-modal [image: .md][image]

Here, we'll showcase how to curate and register ECCITE-seq data from
Papalexi21 in the form of MuData objects.

ECCITE-seq is designed to enable interrogation of single-cell
transcriptomes together with surface protein markers in the context of
CRISPR screens.

MuData objects build on top of AnnData objects to store multimodal
data.

 # pip install lamindb
 !lamin init --storage ./test-multimodal --modules bionty

 import lamindb as ln
 import bionty as bt

 bt.settings.organism = "human"
 ln.track()

#### Creating MuData Artifacts

lamindb provides a "from_mudata()" method to create "Artifact" from
MuData objects.

 mdata = ln.core.datasets.mudata_papalexi21_subset()
 mdata

 mdata_artifact = ln.Artifact.from_mudata(mdata, key="papalexi.h5mu")
 mdata_artifact

 # MuData Artifacts have the corresponding otype
 mdata_artifact.otype

 # MuData Artifacts can easily be loaded back into memory
 papalexi_in_memory = mdata_artifact.load()
 papalexi_in_memory

#### Schema

 # define labels
 perturbation = ln.ULabel(name="Perturbation", is_type=True).save()
 ln.ULabel(name="Perturbed", type=perturbation).save()
 ln.ULabel(name="NT", type=perturbation).save()

 replicate = ln.ULabel(name="Replicate", is_type=True).save()
 ln.ULabel(name="rep1", type=replicate).save()
 ln.ULabel(name="rep2", type=replicate).save()
 ln.ULabel(name="rep3", type=replicate).save()

 # define obs schema
 obs_schema = ln.Schema(
 name="mudata_papalexi21_subset_obs_schema",
 features=[
 ln.Feature(name="perturbation", dtype=perturbation).save(),
 ln.Feature(name="replicate", dtype=replicate).save(),
 ],
 ).save()

 obs_schema_rna = ln.Schema(
 name="mudata_papalexi21_subset_rna_obs_schema",
 features=[
 ln.Feature(name="nCount_RNA", dtype=int).save(),
 ln.Feature(name="nFeature_RNA", dtype=int).save(),
 ln.Feature(name="percent.mito", dtype=float).save(),
 ],
 coerce_dtype=True,
 ).save()

 obs_schema_hto = ln.Schema(
 name="mudata_papalexi21_subset_hto_obs_schema",
 features=[
 ln.Feature(name="nCount_HTO", dtype=int).save(),
 ln.Feature(name="nFeature_HTO", dtype=int).save(),
 ln.Feature(name="technique", dtype=bt.ExperimentalFactor).save(),
 ],
 coerce_dtype=True,
 ).save()

 var_schema_rna = ln.Schema(
 name="mudata_papalexi21_subset_rna_var_schema",
 itype=bt.Gene.symbol,
 dtype=float,
 ).save()

 # define composite schema
 mudata_schema = ln.Schema(
 name="mudata_papalexi21_subset_mudata_schema",
 otype="MuData",
 slots={
 "obs": obs_schema,
 "rna:obs": obs_schema_rna,
 "hto:obs": obs_schema_hto,
 "rna:var": var_schema_rna,
 },
 ).save()

 mudata_schema.describe()

#### Validate MuData annotations

 curator = ln.curators.MuDataCurator(mdata, mudata_schema)

 try:
 curator.validate()
 except ln.errors.ValidationError:
 pass

 curator.slots["rna:var"].cat.standardize("columns")

 curator.slots["rna:var"].cat.add_new_from("columns")

 curator.validate()

#### Register curated Artifact

 artifact = curator.save_artifact(key="mudata_papalexi21_subset.h5mu")

 artifact.describe()

 ln.finish()

 # clean up test instance
 bt.settings.organism = None
 !rm -r test-multimodal
 !lamin delete --force test-multimodal