Jupyter Notebook

Query & search registries

This guide walks through different ways of querying & searching LaminDB registries.

Let’s start by creating a few exemplary datasets and saving them into a LaminDB instance (hidden cell).

Hide code cell content
# !pip install 'lamindb[bionty]'
!lamin init --storage ./test-registries --schema bionty

# python
import lamindb as ln
import bionty as bt
from lamindb.core import datasets

ln.track("pd7UR7Z8hoTq0000")

# Create non-curated datasets
ln.Artifact(datasets.file_jpg_paradisi05(), key="images/my_image.jpg").save()
ln.Artifact(datasets.file_fastq(), key="raw/my_fastq.fastq").save()
ln.Artifact.from_df(datasets.df_iris(), key="iris/iris_collection.parquet").save()

# Create a more complex case
# observation-level metadata
ln.Feature(name="cell_medium", dtype="cat[ULabel]").save()
ln.Feature(name="sample_note", dtype="str").save()
ln.Feature(name="cell_type_by_expert", dtype="cat[bionty.CellType]").save()
ln.Feature(name="cell_type_by_model", dtype="cat[bionty.CellType]").save()
# dataset-level metadata
ln.Feature(name="temperature", dtype="float").save()
ln.Feature(name="study", dtype="cat[ULabel]").save()
ln.Feature(name="date_of_study", dtype="date").save()
ln.Feature(name="study_note", dtype="str").save()

## Permissible values for categoricals
ln.ULabel.from_values(["DMSO", "IFNG"], create=True).save()
ln.ULabel.from_values(
    ["Candidate marker study 1", "Candidate marker study 2"], create=True
).save()
bt.CellType.from_values(["B cell", "T cell"], create=True).save()

# Ingest dataset1
adata = datasets.small_dataset1(format="anndata")
curator = ln.Curator.from_anndata(
    adata,
    var_index=bt.Gene.symbol,
    categoricals={
        "cell_medium": ln.ULabel.name,
        "cell_type_by_expert": bt.CellType.name,
        "cell_type_by_model": bt.CellType.name,
    },
    organism="human",
)
artifact = curator.save_artifact(key="example_datasets/dataset1.h5ad")
artifact.features.add_values(adata.uns)

# Ingest dataset2
adata2 = datasets.small_dataset2(format="anndata")
curator = ln.Curator.from_anndata(adata2, var_index=bt.Gene.symbol, categoricals={"cell_medium": ln.ULabel.name, "cell_type_by_model": bt.CellType.name}, organism="human")
artifact2 = curator.save_artifact(key="example_datasets/dataset2.h5ad")
artifact2.features.add_values(adata2.uns)
 connected lamindb: testuser1/test-registries
 connected lamindb: testuser1/test-registries
 created Transform('pd7UR7Z8'), started new Run('Jsnx2RPZ') at 2024-12-20 15:05:00 UTC
! indexing datasets with gene symbols can be problematic: https://docs.lamin.ai/faq/symbol-mapping
 saving validated records of 'var_index'
 added 3 records from public with Gene.symbol for "var_index": 'CD8A', 'CD4', 'CD14'
 "var_index" is validated against Gene.symbol
 "cell_medium" is validated against ULabel.name
 "cell_type_by_expert" is validated against CellType.name
 "cell_type_by_model" is validated against CellType.name
! indexing datasets with gene symbols can be problematic: https://docs.lamin.ai/faq/symbol-mapping
 saving validated records of 'var_index'
 added 1 record from public with Gene.symbol for "var_index": 'CD38'
 "var_index" is validated against Gene.symbol
 "cell_medium" is validated against ULabel.name
 "cell_type_by_model" is validated against CellType.name

Get an overview

The easiest way to get an overview over all artifacts is by typing df(), which returns the 100 latest artifacts in the Artifact registry.

import lamindb as ln

ln.Artifact.df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None .h5ad dataset 22384 yI0uyeBcL20WSAClKeREVA None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:10.331387+00:00 1
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None NaN md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1

You can include fields from other registries.

ln.Artifact.df(include=["created_by__name", "ulabels__name", "cell_types__name", "feature_sets__registry", "suffix"])
Hide code cell output
uid key description created_by__name ulabels__name cell_types__name feature_sets__registry suffix
id
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None Test User1 {DMSO, IFNG, Candidate marker study 2} {T cell, B cell} {Feature, bionty.Gene} .h5ad
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None Test User1 {Candidate marker study 1, DMSO, IFNG} {T cell, B cell} {Feature, bionty.Gene} .h5ad
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None Test User1 {None} {None} {None} .parquet
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None Test User1 {None} {None} {None} .fastq.gz
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None Test User1 {None} {None} {None} .jpg

You can include information about which artifact measures which feature.

df = ln.Artifact.df(features=True)
ln.view(df)  # for clarity, leverage ln.view() to display dtype annotations
Hide code cell output
uidkeydescriptioncell_type_by_expertcell_type_by_modelstudycell_mediumtemperaturestudy_notedate_of_study
idstrstrstrcat[bionty.CellType]cat[bionty.CellType]cat[ULabel]cat[ULabel]floatstrdate
5KhKcEAJZtiVWt8iU0000example_datasets/dataset2.h5adNonenan{'T cell', 'B cell'}{'Candidate marker study 2'}{'DMSO', 'IFNG'}{21.6}{'We had a great time performing this study and the results look compelling.'}{'2024-12-01'}
4pT8fvIBdBaU8Uk2q0000example_datasets/dataset1.h5adNone{'T cell', 'B cell'}{'T cell', 'B cell'}{'Candidate marker study 1'}{'DMSO', 'IFNG'}nannannan
3dSZyTI6lWuG9H2Yi0000iris/iris_collection.parquetNonenannannannannannannan
2dq6W0KAkWQkz9vNu0000raw/my_fastq.fastqNonenannannannannannannan
1WxfROnWxwhbbxddo0000images/my_image.jpgNonenannannannannannannan

The flattened table that includes information from all relevant registries is easier to understand than the normalized data. For comparison, here is how to see the later.

ln.view()
Hide code cell output
****************
* module: core *
****************
Artifact
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None .h5ad dataset 22384 yI0uyeBcL20WSAClKeREVA None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:10.331387+00:00 1
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None NaN md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1
Feature
uid name dtype unit description synonyms run_id created_at created_by_id
id
8 cQa25nG4SfM9 study_note str None None None 1 2024-12-20 15:05:02.890834+00:00 1
7 q2YcnMQV9WDx date_of_study date None None None 1 2024-12-20 15:05:02.886411+00:00 1
6 H24SemTZgqdW study cat[ULabel] None None None 1 2024-12-20 15:05:02.881692+00:00 1
5 MdWUQ3IvfueC temperature float None None None 1 2024-12-20 15:05:02.876742+00:00 1
4 UxQL9vD3GdFZ cell_type_by_model cat[bionty.CellType] None None None 1 2024-12-20 15:05:02.871502+00:00 1
3 9b6vb5hryCMj cell_type_by_expert cat[bionty.CellType] None None None 1 2024-12-20 15:05:02.866847+00:00 1
2 31lhjIjFZxsd sample_note str None None None 1 2024-12-20 15:05:02.861810+00:00 1
FeatureSet
uid name n dtype registry hash run_id created_at created_by_id
id
1 8VyNpI9Cj09csnQbQdTI None 3 int bionty.Gene f2UVeHefaZxXFjmUwo9Ozw 1 2024-12-20 15:05:07.083589+00:00 1
2 MCImfmZRTpkKpB4t51O4 None 4 None Feature 0T1iykjwVvUKC_gKygVJvA 1 2024-12-20 15:05:07.090142+00:00 1
3 4gZKwkXlPhBCmRnW3PL7 None 3 int bionty.Gene QW2rHuIo5-eGNZbRxHMDCw 1 2024-12-20 15:05:10.361202+00:00 1
4 c55fneBk6FK1KH0lgPYV None 2 None Feature RB0WfURMKW3esSfBgUDdgg 1 2024-12-20 15:05:10.367887+00:00 1
FeatureValue
value feature_id run_id created_at created_by_id
id
1 21.6 5 1 2024-12-20 15:05:07.171777+00:00 1
2 2024-12-01 7 1 2024-12-20 15:05:07.171843+00:00 1
3 We had a great time performing this study and ... 8 1 2024-12-20 15:05:07.171884+00:00 1
4 22.6 5 1 2024-12-20 15:05:10.432950+00:00 1
5 2025-02-13 7 1 2024-12-20 15:05:10.433013+00:00 1
Run
uid started_at finished_at is_consecutive reference reference_type transform_id report_id environment_id parent_id created_at created_by_id
id
1 Jsnx2RPZAqPiOSTe3Lh4 2024-12-20 15:05:00.758313+00:00 None True None None 1 None None None 2024-12-20 15:05:00.758380+00:00 1
Storage
uid root description type region instance_uid run_id created_at created_by_id
id
1 5MvePVyaGmkz /home/runner/work/lamindb/lamindb/docs/test-re... None local None hlGq1WkbeSSf None 2024-12-20 15:04:53.723248+00:00 1
Transform
uid name key description type source_code hash reference reference_type _source_code_artifact_id version is_latest created_at created_by_id
id
1 pd7UR7Z8hoTq0000 Query & search registries registries.ipynb None notebook None None None None None None True 2024-12-20 15:05:00.752789+00:00 1
ULabel
uid name description reference reference_type run_id created_at created_by_id
id
5 XfZbGqaN cell_medium None None None 1 2024-12-20 15:05:06.969513+00:00 1
4 hphETtZY Candidate marker study 2 None None None 1 2024-12-20 15:05:02.908345+00:00 1
3 KYz7lm64 Candidate marker study 1 None None None 1 2024-12-20 15:05:02.908262+00:00 1
2 0h52Al8x IFNG None None None 1 2024-12-20 15:05:02.899734+00:00 1
1 bPyqhPw5 DMSO None None None 1 2024-12-20 15:05:02.899635+00:00 1
User
uid handle name created_at
id
1 DzTjkKse testuser1 Test User1 2024-12-20 15:04:53.718852+00:00
******************
* module: bionty *
******************
CellType
uid name ontology_id abbr synonyms description source_id run_id created_at created_by_id
id
2 7gRvACvc T cell None None None None None 1 2024-12-20 15:05:03.228973+00:00 1
1 1m3SGd1l B cell None None None None None 1 2024-12-20 15:05:03.228854+00:00 1
Gene
uid symbol stable_id ensembl_gene_id ncbi_gene_ids biotype synonyms description source_id organism_id run_id created_at created_by_id
id
4 iFxDa8hoEWuW CD38 None ENSG00000004468 952 protein_coding CADPR1 CD38 molecule 11 1 1 2024-12-20 15:05:10.264876+00:00 1
3 3bhNYquOnA4s CD14 None ENSG00000170458 929 protein_coding CD14 molecule 11 1 1 2024-12-20 15:05:06.954110+00:00 1
2 1j4At3x7akJU CD4 None ENSG00000010610 920 protein_coding T4|LEU-3 CD4 molecule 11 1 1 2024-12-20 15:05:06.953952+00:00 1
1 6Aqvc8ckDYeN CD8A None ENSG00000153563 925 protein_coding P32|CD8|CD8ALPHA CD8 subunit alpha 11 1 1 2024-12-20 15:05:06.953743+00:00 1
Organism
uid name ontology_id scientific_name synonyms description source_id run_id created_at created_by_id
id
1 1dpCL6Td human NCBITaxon:9606 homo_sapiens None None 1 1 2024-12-20 15:05:04.083877+00:00 1
Source
uid entity organism name in_db currently_used description url md5 source_website dataframe_artifact_id version run_id created_at created_by_id
id
11 4UGN bionty.Gene human ensembl False True Ensembl s3://bionty-assets/df_human__ensembl__release-... 4ccda4d88720a326737376c534e8446b https://www.ensembl.org None release-112 None 2024-12-20 15:04:53.864058+00:00 1
101 5JnV BioSample all ncbi False True NCBI BioSample attributes s3://bionty-assets/df_all__ncbi__2023-09__BioS... 918db9bd1734b97c596c67d9654a4126 https://www.ncbi.nlm.nih.gov/biosample/docs/at... None 2023-09 None 2024-12-20 15:04:53.874716+00:00 1
100 MJRq bionty.Ethnicity human hancestro False True Human Ancestry Ontology https://github.com/EBISPOT/hancestro/raw/3.0/h... 76dd9efda9c2abd4bc32fc57c0b755dd https://github.com/EBISPOT/hancestro None 3.0 None 2024-12-20 15:04:53.874645+00:00 1
99 6vJm bionty.DevelopmentalStage mouse mmusdv False False Mouse Developmental Stages http://aber-owl.net/media/ontologies/MMUSDV/9/... 5bef72395d853c7f65450e6c2a1fc653 https://github.com/obophenotype/developmental-... None 2020-03-10 None 2024-12-20 15:04:53.874573+00:00 1
98 10va bionty.DevelopmentalStage mouse mmusdv False True Mouse Developmental Stages https://github.com/obophenotype/developmental-... https://github.com/obophenotype/developmental-... None 2024-05-28 None 2024-12-20 15:04:53.874501+00:00 1
97 7Zm9 bionty.DevelopmentalStage human hsapdv False False Human Developmental Stages http://aber-owl.net/media/ontologies/HSAPDV/11... 52181d59df84578ed69214a5cb614036 https://github.com/obophenotype/developmental-... None 2020-03-10 None 2024-12-20 15:04:53.874429+00:00 1
96 1GbF bionty.DevelopmentalStage human hsapdv False True Human Developmental Stages https://github.com/obophenotype/developmental-... https://github.com/obophenotype/developmental-... None 2024-05-28 None 2024-12-20 15:04:53.874357+00:00 1

Auto-complete records

For registries with less than 100k records, auto-completing a Lookup object is the most convenient way of finding a record.

import bionty as bt

# query the database for all ulabels or all cell types
ulabels = ln.ULabel.lookup()
cell_types = bt.CellType.lookup()
Show me a screenshot

With auto-complete, we find a ulabel:

study1 = ulabels.candidate_marker_study_1
study1
Hide code cell output
ULabel(uid='KYz7lm64', name='Candidate marker study 1', created_by_id=1, run_id=1, created_at=2024-12-20 15:05:02 UTC)

Get one record

get errors if more than one matching records are found.

print(study1.uid)

# by uid
ln.ULabel.get(study1.uid)

# by field
ln.ULabel.get(name="Candidate marker study 1")
Hide code cell output
KYz7lm64
ULabel(uid='KYz7lm64', name='Candidate marker study 1', created_by_id=1, run_id=1, created_at=2024-12-20 15:05:02 UTC)

Query multiple records

Filter for all artifacts annotated by a ulabel:

ln.Artifact.filter(ulabels=study1).df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1

To access the results encoded in a filter statement, execute its return value with one of:

  • df(): A pandas DataFrame with each record in a row.

  • all(): A QuerySet.

  • one(): Exactly one record. Will raise an error if there is none. Is equivalent to the .get() method shown above.

  • one_or_none(): Either one record or None if there is no query result.

Note

filter() returns a QuerySet.

The registries in LaminDB are Django Models and any Django query works.

LaminDB re-interprets Django’s API for data scientists.

What does this have to do with SQL?

Under the hood, any .filter() call translates into a SQL select statement.

LaminDB’s registries are object relational mappers (ORMs) that rely on Django for all the heavy lifting.

Of note, .one() and .one_or_none() are the two parts of LaminDB’s API that are borrowed from SQLAlchemy. In its first year, LaminDB built on SQLAlchemy.

Search for records

You can search every registry via search(). For example, the Artifact registry.

ln.Artifact.search("iris").df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None None md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1

Here is more background on search and examples for searching the entire cell type ontology: How does search work?

Filter operators

You can qualify the type of comparison in a query by using a comparator.

Below follows a list of the most import, but Django supports about two dozen field comparators field__comparator=value.

and

ln.Artifact.filter(suffix=".h5ad", ulabels=study1).df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1

less than/ greater than

Or subset to artifacts greater than 10kB. Here, we can’t use keyword arguments, but need an explicit where statement.

ln.Artifact.filter(ulabels=study1, size__gt=1e4).df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1

in

ln.Artifact.filter(suffix__in=[".jpg", ".fastq.gz"]).df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None None md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None None md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1

order by

ln.Artifact.filter().order_by("created_at").df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None NaN md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None .h5ad dataset 22384 yI0uyeBcL20WSAClKeREVA None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:10.331387+00:00 1
# reverse ordering
ln.Artifact.filter().order_by("-created_at").df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None .h5ad dataset 22384 yI0uyeBcL20WSAClKeREVA None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:10.331387+00:00 1
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None NaN md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1
ln.Artifact.filter().order_by("key").df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None .h5ad dataset 22384 yI0uyeBcL20WSAClKeREVA None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:10.331387+00:00 1
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None NaN md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1
# reverse ordering
ln.Artifact.filter().order_by("-key").df()
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None NaN md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None .h5ad dataset 22384 yI0uyeBcL20WSAClKeREVA None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:10.331387+00:00 1
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1

contains

ln.Transform.filter(name__contains="search").df().head(5)
Hide code cell output
uid name key description type source_code hash reference reference_type _source_code_artifact_id version is_latest created_at created_by_id
id
1 pd7UR7Z8hoTq0000 Query & search registries registries.ipynb None notebook None None None None None None True 2024-12-20 15:05:00.752789+00:00 1

And case-insensitive:

ln.Transform.filter(name__icontains="Search").df().head(5)
Hide code cell output
uid name key description type source_code hash reference reference_type _source_code_artifact_id version is_latest created_at created_by_id
id
1 pd7UR7Z8hoTq0000 Query & search registries registries.ipynb None notebook None None None None None None True 2024-12-20 15:05:00.752789+00:00 1

startswith

ln.Transform.filter(name__startswith="Research").df()
Hide code cell output
uid id name key description type source_code hash reference reference_type _source_code_artifact_id version is_latest created_at created_by_id

or

ln.Artifact.filter(ln.Q(suffix=".jpg") | ln.Q(suffix=".fastq.gz")).df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
1 WxfROnWxwhbbxddo0000 images/my_image.jpg None .jpg None 29358 r4tnqmKI_SjrkdLzpuWp4g None None md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.572634+00:00 1
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None None md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1

negate/ unequal

ln.Artifact.filter(~ln.Q(suffix=".jpg")).df()
Hide code cell output
uid key description suffix type size hash n_objects n_observations _hash_type _accessor visibility _key_is_virtual storage_id transform_id version is_latest run_id created_at created_by_id
id
2 dq6W0KAkWQkz9vNu0000 raw/my_fastq.fastq None .fastq.gz None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 None 1 True 1 1 None True 1 2024-12-20 15:05:02.582786+00:00 1
3 dSZyTI6lWuG9H2Yi0000 iris/iris_collection.parquet None .parquet dataset 5088 9_QyZIRSh4ExiWhliEBYyw None NaN md5 DataFrame 1 True 1 1 None True 1 2024-12-20 15:05:02.841080+00:00 1
4 pT8fvIBdBaU8Uk2q0000 example_datasets/dataset1.h5ad None .h5ad dataset 25088 YMNwVfQZ78zwkB4shAQMfQ None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:07.052926+00:00 1
5 KhKcEAJZtiVWt8iU0000 example_datasets/dataset2.h5ad None .h5ad dataset 22384 yI0uyeBcL20WSAClKeREVA None 3.0 md5 AnnData 1 True 1 1 None True 1 2024-12-20 15:05:10.331387+00:00 1