imaging3/4 Jupyter Notebook

Featurize single-cell images

Here, we use scPortrait to extract cell features that characterize both morphological and intensity-based properties of individual cells:

  • Area of the masks in pixels

  • Mean intensity of the chosen channel in the regions labelled by each of the masks

  • Median intensity of the chosen channel in the regions labelled by each of the masks

  • 75% quantile of the chosen channel in the regions labelled by each of the masks

  • 25% quantile of the chosen channel in the regions labelled by each of the masks

  • Summed intensity of the chosen channel in the regions labelled by each of the masks

  • Summed intensity of the chosen channel in the region labelled by each of the masks normalized for area

These features provide a comprehensive profile for later training of machine learning models to identify cell types and states.

import lamindb as ln
import bionty as bt
import pandas as pd

from scportrait.pipeline.featurization import CellFeaturizer

ln.track()
Hide code cell output
 connected lamindb: testuser1/test-sc-imaging
 created Transform('w2NXlJ1CTIPe0000'), started new Run('IFpWYXMX...') at 2025-03-31 12:50:28 UTC
 notebook imports: bionty==1.2.1 lamindb==1.3.1 pandas==2.2.3 scportrait==1.3.3

We will generate these features on the basis of the previously generated single-cell image datasets.

# Get single-cell images and config
sc_datasets = (
    ln.Artifact.using("scportrait/examples")
    .filter(ulabels__name="autophagy imaging")
    .filter(ulabels__name="scportrait single-cell images")
)
config = (
    ln.Artifact.filter(ulabels__name="autophagy imaging")
    .filter(ulabels__name="scportrait config")
    .distinct()
    .one()
)
# Process single-cell images with scPortrait's featurizer
featurizer = CellFeaturizer(directory=".", config=config.cache(), project_location=None)


def featurize_datasets(artifact_list) -> pd.DataFrame:
    paths = [dataset.cache() for dataset in artifact_list]
    dataset_lookup = {idx: cell.uid for idx, cell in enumerate(artifact_list)}
    labels = list(dataset_lookup.keys())
    results = featurizer.process(
        dataset_paths=paths, dataset_labels=labels, return_results=True
    )

    # ensure we store the original dataset uid to be able to track featurization results back to their original dataset
    results["dataset"] = results["label"].map(dataset_lookup)
    del results["label"]
    return results


# Train on wildtype (WT) cells
wt_cells_afs = sc_datasets.filter(ulabels__name="WT")

# we have two different conditions which will be the two classes that our classifier should be able to tell apart
condition_uls = [
    ln.ULabel.using("scportrait/examples").get(name=stim_name)
    for stim_name in {af.features.get_values()["stimulation"] for af in wt_cells_afs}
]

# map condition names to class labels
class_lookup = {"untreated": 0, "14h Torin-1": 1}

features = None
for _, condition_ul in enumerate(condition_uls):
    cells = wt_cells_afs.filter(ulabels=condition_ul)
    results = featurize_datasets(cells)

    # save condition as a class label
    results["class"] = class_lookup[condition_ul.name]

    # concatenate results together
    if features is None:
        features = results
    else:
        features = pd.concat([features, results])
Hide code cell output
 completing transfer to track Artifact('zGFV103h') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='A2945i5P'), ULabel(uid='KH1brNUW'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='zGFV103h7KW1AbmE0000'), Schema(uid='TjVipebkpJEhUM3P86HE'), ULabel(uid='JWE2jNdk')
 completing transfer to track Artifact('89C8kQyV') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='A2945i5P'), ULabel(uid='KH1brNUW'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='89C8kQyV4Kjzj4SB0000')
 completing transfer to track Artifact('1GKwxrAp') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='A2945i5P'), ULabel(uid='GadMJgvN'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='1GKwxrAp7XJmAqpt0000')
 completing transfer to track Artifact('p8J4ly0v') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='A2945i5P'), ULabel(uid='GadMJgvN'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='p8J4ly0vv0QjuPEe0000')
 completing transfer to track Artifact('iuuMnf7x') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='EgihgpJC'), ULabel(uid='KH1brNUW'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='iuuMnf7xC4wYmkv80000')
 completing transfer to track Artifact('9m0dxLtx') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='EgihgpJC'), ULabel(uid='KH1brNUW'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='9m0dxLtxu35ludr70000')
 completing transfer to track Artifact('uTNKe0Um') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='EgihgpJC'), ULabel(uid='GadMJgvN'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='uTNKe0UmY5IOowhC0000')
 completing transfer to track Artifact('uJ9W0phl') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='9hHptuyb'), ULabel(uid='EgihgpJC'), ULabel(uid='GadMJgvN'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='uJ9W0phl9z0QhFOY0000')

Ingest the generated features to our instance:

artifact = ln.Artifact.from_df(
    features,
    description="featurized single-cell images",
    key="featurization_results/WT.parquet",
).save()
artifact.cell_lines.add(bt.CellLine.get(name="U2OS"))

artifact.features.add_values(
    {
        "study": "autophagy imaging",
        "genotype": "WT",
    }
)

Repeat this process for KO cells:

# Process KO cells to see if they behave differently
ko_cells_afs = sc_datasets.filter(ulabels__name="EI24KO")

# we have the same two conditions as before
condition_uls = [
    ln.ULabel.using("scportrait/examples").get(name=stimulation_name)
    for stimulation_name in {
        af.features.get_values()["stimulation"] for af in ko_cells_afs
    }
]

features_ko = None
for _, condition_ul in enumerate(condition_uls):
    cells = ko_cells_afs.filter(ulabels=condition_ul)
    results = featurize_datasets(cells)

    # save condition as a class label
    results["class"] = class_lookup[condition_ul.name]

    if features_ko is None:
        features_ko = results
    else:
        features_ko = pd.concat([features_ko, results])
Hide code cell output
 completing transfer to track Artifact('XaTalaUN') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='A2945i5P'), ULabel(uid='joRCMMWX'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='XaTalaUNv7d3QwXc0000')
 completing transfer to track Artifact('jiJ2Rg8X') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='A2945i5P'), ULabel(uid='joRCMMWX'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='jiJ2Rg8Xjk1OCD4n0000')
 completing transfer to track Artifact('wTSbpxi4') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='A2945i5P'), ULabel(uid='JIbWVXma'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='wTSbpxi4KDY0FQql0000')
 completing transfer to track Artifact('yH6LpwCz') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='A2945i5P'), ULabel(uid='JIbWVXma'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='yH6LpwCzNk5dYq6Q0000')
 completing transfer to track Artifact('cHKg1yCq') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='EgihgpJC'), ULabel(uid='joRCMMWX'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='cHKg1yCqvJgShsKc0001')
 completing transfer to track Artifact('9KvNUZng') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='EgihgpJC'), ULabel(uid='joRCMMWX'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='9KvNUZng67uxy4G90000')
 completing transfer to track Artifact('9tb7NNhr') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='EgihgpJC'), ULabel(uid='JIbWVXma'), ULabel(uid='CrR7fgIZ'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='9tb7NNhreuubzeHl0000')
 completing transfer to track Artifact('Uh11TE4S') as input
 returning existing schema with same hash: Schema(uid='3Ajy5VY7H6PVoAhsFLfn', name='single-cell image dataset schema obs', n=1, itype='Feature', is_type=False, hash='0Wb4Qaes_RIx5s4g5SWeDA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=3, created_at=2025-03-31 12:47:28 UTC)
 mapped records: ULabel(uid='Aj8KGwbh'), ULabel(uid='EgihgpJC'), ULabel(uid='JIbWVXma'), ULabel(uid='1KPobNqK'), ULabel(uid='QrU6fxsG'), ULabel(uid='xhpmj7p7'), ULabel(uid='xHqZKcIG'), ULabel(uid='JWE2jNdk'), ULabel(uid='PKiCEP1h'), ULabel(uid='HRRTqARL'), ULabel(uid='e82fx2wm')
 transferred records: Artifact(uid='Uh11TE4SKi8JXBGE0000')
artifact = ln.Artifact.from_df(
    features_ko,
    description="featurized single-cell images",
    key="featurization_results/EI24KO.parquet",
).save()
artifact.cell_lines.add(bt.CellLine.filter(name="U2OS").one())

# annotate with required metadata
artifact.features.add_values(
    {
        "study": "autophagy imaging",
        "genotype": "EI24KO",
    }
)
ln.finish()
 finished Run('IFpWYXMX') after 3m at 2025-03-31 12:54:09 UTC