Featurize single-cell images
¶
Here, we use scPortrait to extract features that capture both morphological and intensity-based properties of individual cells:
Morphological features:
Cell area (in pixels)
Intensity features for each fluorescence channel:
Mean intensity
Median intensity
25th and 75th percentile intensities
Total intensity
Intensity density (total intensity normalized by area)
These comprehensive cellular profiles enable downstream machine learning analysis to identify cell types and states.
import lamindb as ln
import bionty as bt
import numpy as np
import pandas as pd
from alphabase.io import tempmmap
from scportrait.pipeline.featurization import CellFeaturizer
ln.track()
Show code cell output
→ connected lamindb: testuser1/test-sc-imaging
→ created Transform('BVmvckSIggRK0000', key='sc-imaging3.ipynb'), started new Run('xBtbCxpn1L4p4Zsi') at 2026-05-18 19:15:22 UTC
→ notebook imports: alphabase==1.8.1 bionty==2.4.0 lamindb-core==2.4.2 numpy==2.0.2 pandas==2.3.3 scportrait==1.8.0
• recommendation: to identify the notebook across renames, pass the uid: ln.track("BVmvckSIggRK")
We will use the single-cell image datasets we generated earlier.
# Get single-cell images and config
sc_datasets = (
ln.Artifact.connect("scportrait/examples")
.filter(ulabels__name="autophagy imaging", is_latest=True)
.filter(ulabels__name="scportrait single-cell images")
)
config = (
ln.Artifact.filter(ulabels__name="autophagy imaging")
.filter(ulabels__name="scportrait config")
.distinct()
.one()
)
Extract cellular features from WT cells:
# Process single-cell images with scPortrait's featurizer
featurizer = CellFeaturizer(directory=".", config=config.cache(), project_location=None)
def featurize_datasets(artifact_list) -> pd.DataFrame:
paths = [dataset.cache() for dataset in artifact_list]
dataset_lookup = {idx: cell.uid for idx, cell in enumerate(artifact_list)}
mm = featurizer.process(
dataset_paths=paths,
dataset_labels=list(dataset_lookup.keys()),
return_results=True,
)
# Current scPortrait returns InferenceMemmap paths for out-of-memory inference.
feature_matrix = tempmmap.mmap_array_from_path(mm.features_path)
labels = np.asarray(tempmmap.mmap_array_from_path(mm.labels_path)).ravel().astype(int)
results = pd.DataFrame(data=np.asarray(feature_matrix), columns=mm.var_names)
results["label"] = labels
# Store original dataset uid for tracking
results["dataset"] = results["label"].map(dataset_lookup)
return results.drop(columns=["label"])
# Get WT cells and extract features by condition
wt_cells_afs = sc_datasets.filter(ulabels__name="WT")
class_lookup = {"untreated": 0, "14h Torin-1": 1}
# Get unique conditions
conditions = {af.features.get_values()["stimulation"] for af in wt_cells_afs}
condition_uls = [
ln.ULabel.connect("scportrait/examples").get(name=name) for name in conditions
]
# Process each condition
features_list = []
for condition_ul in condition_uls:
cells = wt_cells_afs.filter(ulabels=condition_ul)
results = featurize_datasets(cells)
results["class"] = class_lookup[condition_ul.name]
features_list.append(results)
features = pd.concat(features_list, ignore_index=True)
Show code cell output
→ transferred: Artifact(uid='iuuMnf7xC4wYmkv80000'), Schema(uid='TjVipebkpJEhUM3P')
→ transferred: Artifact(uid='uJ9W0phl9z0QhFOY0000')
→ transferred: Artifact(uid='9m0dxLtxu35ludr70000')
→ transferred: Artifact(uid='uTNKe0UmY5IOowhC0000')
→ transferred: Artifact(uid='zGFV103h7KW1AbmE0000')
→ transferred: Artifact(uid='89C8kQyV4Kjzj4SB0000')
→ transferred: Artifact(uid='1GKwxrAp7XJmAqpt0000')
→ transferred: Artifact(uid='p8J4ly0vv0QjuPEe0000')
Ingest the generated features to our instance:
artifact = ln.Artifact.from_dataframe(
features,
description="featurized single-cell images",
key="featurization_results/WT.parquet",
).save()
cell_line = bt.CellLine.from_source(ontology_id="CVCL_0042").save()
artifact.cell_lines.add(cell_line)
artifact.features.add_values(
{
"study": "autophagy imaging",
"genotype": "WT",
}
)
Extract features from KO cells using the same approach:
# Process KO cells to see if they behave differently
ko_cells_afs = sc_datasets.filter(ulabels__name="EI24KO")
# Get unique conditions for KO cells
conditions = {af.features.get_values()["stimulation"] for af in ko_cells_afs}
condition_uls = [
ln.ULabel.connect("scportrait/examples").get(name=name) for name in conditions
]
# Process each condition
features_ko_list = []
for condition_ul in condition_uls:
cells = ko_cells_afs.filter(ulabels=condition_ul)
results = featurize_datasets(cells)
results["class"] = class_lookup[condition_ul.name]
features_ko_list.append(results)
features_ko = pd.concat(features_ko_list, ignore_index=True)
Show code cell output
→ transferred: Artifact(uid='9KvNUZng67uxy4G90000')
→ transferred: Artifact(uid='9tb7NNhreuubzeHl0000')
→ transferred: Artifact(uid='cHKg1yCqvJgShsKc0002')
→ transferred: Artifact(uid='Uh11TE4SKi8JXBGE0000')
→ transferred: Artifact(uid='wTSbpxi4KDY0FQql0000')
→ transferred: Artifact(uid='yH6LpwCzNk5dYq6Q0000')
→ transferred: Artifact(uid='XaTalaUNv7d3QwXc0000')
→ transferred: Artifact(uid='jiJ2Rg8Xjk1OCD4n0000')
artifact = ln.Artifact.from_dataframe(
features_ko,
description="featurized single-cell images",
key="featurization_results/EI24KO.parquet",
).save()
artifact.cell_lines.add(cell_line)
artifact.features.add_values(
{
"study": "autophagy imaging",
"genotype": "EI24KO",
}
)
ln.finish()
Show code cell output
→ finished Run('xBtbCxpn1L4p4Zsi') after 44s at 2026-05-18 19:16:07 UTC