Jupyter Notebook

Multi-modal

Here, we’ll showcase how to curate and register ECCITE-seq data from Papalexi21 in the form of MuData objects.

ECCITE-seq is designed to enable interrogation of single-cell transcriptomes together with surface protein markers in the context of CRISPR screens.

MuData objects build on top of AnnData objects to store multimodal data.

# !pip install 'lamindb[jupyter,bionty]'
!lamin init --storage ./test-multimodal --modules bionty
Hide code cell output
 initialized lamindb: testuser1/test-multimodal
import lamindb as ln
import bionty as bt

ln.track()
Hide code cell output
 connected lamindb: testuser1/test-multimodal
 created Transform('5MvP6WeoMC430000'), started new Run('QKewR8L6...') at 2025-04-15 16:37:36 UTC
 notebook imports: bionty==1.3.0 lamindb==1.4.0

Creating MuData Artifacts

lamindb provides a from_mudata() method to create Artifact from MuData objects.

mdata = ln.core.datasets.mudata_papalexi21_subset()
mdata
Hide code cell output
MuData object with n_obs × n_vars = 200 × 300
  obs:	'perturbation', 'replicate'
  var:	'name'
  4 modalities
    rna:	200 x 173
      obs:	'nCount_RNA', 'nFeature_RNA', 'percent.mito'
      var:	'name'
    adt:	200 x 4
      obs:	'nCount_ADT', 'nFeature_ADT'
      var:	'name'
    hto:	200 x 12
      obs:	'nCount_HTO', 'nFeature_HTO', 'technique'
      var:	'name'
    gdo:	200 x 111
      obs:	'nCount_GDO'
      var:	'name'
mdata_af = ln.Artifact.from_mudata(mdata, key="papalexi.h5mu")
mdata_af
Hide code cell output
Artifact(uid='XQhZiiz1zBns4oK90000', is_latest=True, key='papalexi.h5mu', suffix='.h5mu', kind='dataset', otype='MuData', size=549984, hash='aFIJ7G9AIcxoEib8kecChw', n_observations=200, space_id=1, storage_id=1, run_id=1, created_by_id=1, created_at=<django.db.models.expressions.DatabaseDefault object at 0x7f94c8e00770>)
# MuData Artifacts have the corresponding otype
mdata_af.otype
Hide code cell output
'MuData'
# MuData Artifacts can easily be loaded back into memory
papalexi_in_memory = mdata_af.load()
papalexi_in_memory
Hide code cell output
MuData object with n_obs × n_vars = 200 × 300
  obs:	'perturbation', 'replicate'
  var:	'name'
  4 modalities
    rna:	200 x 173
      obs:	'nCount_RNA', 'nFeature_RNA', 'percent.mito'
      var:	'name'
    adt:	200 x 4
      obs:	'nCount_ADT', 'nFeature_ADT'
      var:	'name'
    hto:	200 x 12
      obs:	'nCount_HTO', 'nFeature_HTO', 'technique'
      var:	'name'
    gdo:	200 x 111
      obs:	'nCount_GDO'
      var:	'name'

Schema

# define labels
perturbation = ln.ULabel(name="Perturbation", is_type=True).save()
ln.ULabel(name="Perturbed", type=perturbation).save()
ln.ULabel(name="NT", type=perturbation).save()

replicate = ln.ULabel(name="Replicate", is_type=True).save()
ln.ULabel(name="rep1", type=replicate).save()
ln.ULabel(name="rep2", type=replicate).save()
ln.ULabel(name="rep3", type=replicate).save()

# define obs schema
obs_schema = ln.Schema(
    name="mudata_papalexi21_subset_obs_schema",
    features=[
        ln.Feature(name="perturbation", dtype="cat[ULabel[Perturbation]]").save(),
        ln.Feature(name="replicate", dtype="cat[ULabel[Replicate]]").save(),
    ],
).save()

obs_schema_rna = ln.Schema(
    name="mudata_papalexi21_subset_rna_obs_schema",
    features=[
        ln.Feature(name="nCount_RNA", dtype=int).save(),
        ln.Feature(name="nFeature_RNA", dtype=int).save(),
        ln.Feature(name="percent.mito", dtype=float).save(),
    ],
    coerce_dtype=True,
).save()

obs_schema_hto = ln.Schema(
    name="mudata_papalexi21_subset_hto_obs_schema",
    features=[
        ln.Feature(name="nCount_HTO", dtype=float).save(),
        ln.Feature(name="nFeature_HTO", dtype=int).save(),
        ln.Feature(name="technique", dtype=bt.ExperimentalFactor).save(),
    ],
    coerce_dtype=True,
).save()

var_schema_rna = ln.Schema(
    name="mudata_papalexi21_subset_rna_var_schema",
    itype=bt.Gene.symbol,
    dtype=float,
).save()

# define composite schema
mudata_schema = ln.Schema(
    name="mudata_papalexi21_subset_mudata_schema",
    otype="MuData",
    components={
        "obs": obs_schema,
        "rna:obs": obs_schema_rna,
        "hto:obs": obs_schema_hto,
        "rna:var": var_schema_rna,
    },
).save()
Hide code cell output
! record with similar name exists! did you mean to load it?
uid name is_type description reference reference_type space_id type_id run_id created_at created_by_id _aux _branch_code
id
1 s0GowB3a Perturbation True None None None 1 None 1 2025-04-15 16:37:38.041000+00:00 1 None 1
! record with similar name exists! did you mean to load it?
uid name dtype is_type unit description array_rank array_size array_shape proxy_dtype synonyms _expect_many _curation space_id type_id run_id created_at created_by_id _aux _branch_code
id
4 RJIRkiuFrFkv nFeature_RNA int None None None 0 0 None None None True None 1 None 1 2025-04-15 16:37:38.103000+00:00 1 {'af': {'0': None, '1': True, '2': False}} 1
mudata_schema
Schema(uid='uBmIjPJrzVhKoOZ9II4p', name='mudata_papalexi21_subset_mudata_schema', n=-1, itype='Composite', is_type=False, otype='MuData', dtype='num', hash='muhs8kiT-fTL0SufVA-pOQ', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-04-15 16:37:38 UTC)

Validate MuData annotations

curator = ln.curators.MuDataCurator(mdata, mudata_schema)
try:
    curator.validate()
except ln.errors.ValidationError:
    pass
! using default organism = human
! using default organism = human
! using default organism = human
!   96 terms are not validated: 'RP5-827C21.6', 'XX-CR54.1', 'RP11-379B18.5', 'RP11-778D9.12', 'RP11-703G6.1', 'AC005150.1', 'RP11-717H13.1', 'CTC-498J12.1', 'CTC-467M3.1', 'HIST1H4K', 'RP11-524H19.2', 'AC006042.7', 'AC002066.1', 'AC073934.6', 'RP11-268G12.1', 'U52111.14', 'RP11-235C23.5', 'RP11-12J10.3', 'CASC1', 'RP11-324E6.9', ...
    12 synonyms found: "CTC-467M3.1" → "MEF2C-AS2", "HIST1H4K" → "H4C12", "CASC1" → "DNAI7", "LARGE" → "LARGE1", "NBPF16" → "NBPF15", "C1orf65" → "CCDC185", "IBA57-AS1" → "IBA57-DT", "KIAA1239" → "NWD2", "TMEM75" → "LINC02912", "AP003419.16" → "RPS6KB2-AS1", "FAM65C" → "RIPOR3", "C14orf177" → "LINC02914"
    → curate synonyms via .standardize("columns")
    for remaining terms:
    → fix typos, remove non-existent values, or save terms via .add_new_from("columns")
curator.slots["rna:var"].cat.standardize("columns")
curator.slots["rna:var"].cat.add_new_from("columns")
curator.validate()
Hide code cell output
! using default organism = human
! using default organism = human
! using default organism = human

Register curated Artifact

artifact = curator.save_artifact(key="mudata_papalexi21_subset.h5mu")
Hide code cell output
! using default organism = human
! using default organism = human
! using default organism = human
 returning existing schema with same hash: Schema(uid='tQt1swwzScOyxegHtaPa', name='mudata_papalexi21_subset_obs_schema', n=2, itype='Feature', is_type=False, hash='s_pyvTup4kDNqfE0Uda-RQ', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-04-15 16:37:38 UTC)
! using default organism = human
! 12 unique terms (6.90%) are not validated for symbol: 'CTC-467M3.1', 'HIST1H4K', 'CASC1', 'LARGE', 'NBPF16', 'C1orf65', 'IBA57-AS1', 'KIAA1239', 'TMEM75', 'AP003419.16', ...
! using default organism = human
 returning existing schema with same hash: Schema(uid='H4ZIxaJLpBYrlabb2j1I', name='mudata_papalexi21_subset_rna_obs_schema', n=3, itype='Feature', is_type=False, hash='P7Ov2yyw84ZWPjhvoarMvA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-04-15 16:37:38 UTC)
 returning existing schema with same hash: Schema(uid='AIKI1xc8kvgEz3AYTGHH', name='mudata_papalexi21_subset_hto_obs_schema', n=3, itype='Feature', is_type=False, hash='O1BhyIsVj6eIX62YDnrhtA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-04-15 16:37:38 UTC)
artifact.describe()
Hide code cell output
Artifact .h5mu/MuData
├── General
│   ├── .uid = 'HOk9sjYwgYsKU9JZ0000'
│   ├── .key = 'mudata_papalexi21_subset.h5mu'
│   ├── .size = 549984
│   ├── .hash = 'aFIJ7G9AIcxoEib8kecChw'
│   ├── .n_observations = 200
│   ├── .path = 
│   │   /home/runner/work/lamin-usecases/lamin-usecases/docs/test-multimodal/.lamindb/HOk9sjYwgYsKU9JZ0000.h5mu
│   ├── .created_by = testuser1 (Test User1)
│   ├── .created_at = 2025-04-15 16:37:42
│   └── .transform = 'Multi-modal'
├── Dataset features
│   ├── obs2                     [Feature]                                                           
│   │   perturbation                cat[ULabel[Perturbation]]  NT, Perturbed                            
│   │   replicate                   cat[ULabel[Replicate]]     rep1, rep2, rep3                         
│   ├── ['rna'].var172           [bionty.Gene]                                                       
│   │   SH2D6                       float                                                               
│   │   ARHGAP26-AS1                float                                                               
│   │   GABRA1                      float                                                               
│   │   HLA-DQB1-AS1                float                                                               
│   │   HLA-DQB1-AS1                float                                                               
│   │   HLA-DQB1-AS1                float                                                               
│   │   HLA-DQB1-AS1                float                                                               
│   │   HLA-DQB1-AS1                float                                                               
│   │   HLA-DQB1-AS1                float                                                               
│   │   HLA-DQB1-AS1                float                                                               
│   │   SPACA1                      float                                                               
│   │   VNN1                        float                                                               
│   │   CTAGE15                     float                                                               
│   │   CTAGE15                     float                                                               
│   │   PFKFB1                      float                                                               
│   │   TRPC5                       float                                                               
│   │   RBPMS-AS1                   float                                                               
│   │   CA8                         float                                                               
│   │   CSMD3                       float                                                               
│   │   ZNF483                      float                                                               
│   ├── ['rna'].obs3             [Feature]                                                           
│   │   nCount_RNA                  int                                                                 
│   │   nFeature_RNA                int                                                                 
│   │   percent.mito                float                                                               
│   └── ['hto'].obs3             [Feature]                                                           
technique                   cat[bionty.ExperimentalF…  cell hashing                             
nCount_HTO                  float                                                               
nFeature_HTO                int                                                                 
└── Labels
    └── .experimental_factors       bionty.ExperimentalFactor  cell hashing                             
        .ulabels                    ULabel                     Perturbed, NT, rep1, rep2, rep3          
ln.finish()
Hide code cell output
! cells [(15, 17)] were not run consecutively
 finished Run('QKewR8L6') after 8s at 2025-04-15 16:37:44 UTC
# clean up test instance
!rm -r test-multimodal
!lamin delete --force test-multimodal
Hide code cell output
 deleting instance testuser1/test-multimodal