imaging3/4 Jupyter Notebook

Featurize single-cell images

Here, we use scPortrait to extract features that capture both morphological and intensity-based properties of individual cells:

Morphological features:

  • Cell area (in pixels)

Intensity features for each fluorescence channel:

  • Mean intensity

  • Median intensity

  • 25th and 75th percentile intensities

  • Total intensity

  • Intensity density (total intensity normalized by area)

These comprehensive cellular profiles enable downstream machine learning analysis to identify cell types and states.

import lamindb as ln
import bionty as bt
import pandas as pd

from scportrait.pipeline.featurization import CellFeaturizer

ln.track()
Hide code cell output
 connected lamindb: testuser1/test-sc-imaging
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray_schema/__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
  from pkg_resources import DistributionNotFound, get_distribution
 created Transform('YVDO1ifMrxLd0000'), started new Run('aIDtdxHV...') at 2025-07-08 11:02:46 UTC
 notebook imports: bionty==1.6.0 lamindb==1.7.1 pandas==2.3.1 scportrait==1.3.5
 recommendation: to identify the notebook across renames, pass the uid: ln.track("YVDO1ifMrxLd")

We will use the single-cell image datasets we generated earlier.

# Get single-cell images and config
sc_datasets = (
    ln.Artifact.using("scportrait/examples")
    .filter(ulabels__name="autophagy imaging", is_latest=True)
    .filter(ulabels__name="scportrait single-cell images")
)

config = (
    ln.Artifact.filter(ulabels__name="autophagy imaging")
    .filter(ulabels__name="scportrait config")
    .distinct()
    .one()
)

Extract cellular features from WT cells:

# Process single-cell images with scPortrait's featurizer
featurizer = CellFeaturizer(directory=".", config=config.cache(), project_location=None)


def featurize_datasets(artifact_list) -> pd.DataFrame:
    paths = [dataset.cache() for dataset in artifact_list]
    dataset_lookup = {idx: cell.uid for idx, cell in enumerate(artifact_list)}

    results = featurizer.process(
        dataset_paths=paths,
        dataset_labels=list(dataset_lookup.keys()),
        return_results=True,
    )

    # Store original dataset uid for tracking
    results["dataset"] = results["label"].map(dataset_lookup)
    return results.drop(columns=["label"])


# Get WT cells and extract features by condition
wt_cells_afs = sc_datasets.filter(ulabels__name="WT")
class_lookup = {"untreated": 0, "14h Torin-1": 1}

# Get unique conditions
conditions = {af.features.get_values()["stimulation"] for af in wt_cells_afs}
condition_uls = [
    ln.ULabel.using("scportrait/examples").get(name=name) for name in conditions
]

# Process each condition
features_list = []
for condition_ul in condition_uls:
    cells = wt_cells_afs.filter(ulabels=condition_ul)
    results = featurize_datasets(cells)
    results["class"] = class_lookup[condition_ul.name]
    features_list.append(results)

features = pd.concat(features_list, ignore_index=True)
Hide code cell output
 transferred: Artifact(uid='iuuMnf7xC4wYmkv80000'), Schema(uid='TjVipebkpJEhUM3P86HE')
 transferred: Artifact(uid='uJ9W0phl9z0QhFOY0000')
 transferred: Artifact(uid='9m0dxLtxu35ludr70000')
 transferred: Artifact(uid='uTNKe0UmY5IOowhC0000')
 transferred: Artifact(uid='zGFV103h7KW1AbmE0000')
 transferred: Artifact(uid='89C8kQyV4Kjzj4SB0000')
 transferred: Artifact(uid='1GKwxrAp7XJmAqpt0000')
 transferred: Artifact(uid='p8J4ly0vv0QjuPEe0000')

Ingest the generated features to our instance:

artifact = ln.Artifact.from_df(
    features,
    description="featurized single-cell images",
    key="featurization_results/WT.parquet",
).save()

artifact.cell_lines.add(bt.CellLine.get(name="U-2 OS cell"))

artifact.features.add_values(
    {
        "study": "autophagy imaging",
        "genotype": "WT",
    }
)

Extract features from KO cells using the same approach:

# Process KO cells to see if they behave differently
ko_cells_afs = sc_datasets.filter(ulabels__name="EI24KO")

# Get unique conditions for KO cells
conditions = {af.features.get_values()["stimulation"] for af in ko_cells_afs}
condition_uls = [
    ln.ULabel.using("scportrait/examples").get(name=name) for name in conditions
]

# Process each condition
features_ko_list = []
for condition_ul in condition_uls:
    cells = ko_cells_afs.filter(ulabels=condition_ul)
    results = featurize_datasets(cells)
    results["class"] = class_lookup[condition_ul.name]
    features_ko_list.append(results)

features_ko = pd.concat(features_ko_list, ignore_index=True)
Hide code cell output
 transferred: Artifact(uid='9KvNUZng67uxy4G90000')
 transferred: Artifact(uid='9tb7NNhreuubzeHl0000')
 transferred: Artifact(uid='cHKg1yCqvJgShsKc0002')
 transferred: Artifact(uid='Uh11TE4SKi8JXBGE0000')
 transferred: Artifact(uid='wTSbpxi4KDY0FQql0000')
 transferred: Artifact(uid='yH6LpwCzNk5dYq6Q0000')
 transferred: Artifact(uid='XaTalaUNv7d3QwXc0000')
 transferred: Artifact(uid='jiJ2Rg8Xjk1OCD4n0000')
artifact = ln.Artifact.from_df(
    features_ko,
    description="featurized single-cell images",
    key="featurization_results/EI24KO.parquet",
).save()

artifact.cell_lines.add(bt.CellLine.filter(name="U-2 OS cell").one())

artifact.features.add_values(
    {
        "study": "autophagy imaging",
        "genotype": "EI24KO",
    }
)
ln.finish()
 finished Run('aIDtdxHV') after 1m at 2025-07-08 11:04:12 UTC