SCMMIB package tutorials
[1]:
from scmmib.metrics import paired_graph_metrics, mosaic_latent_metrics,\
unpaired_latent_metrics
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import os
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display
init_styles()
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing update_display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display
init_styles()
Example 1: A demo for evaluate unpair scRNA and scATAC diagonal integration output.
[2]:
def test_unpair():
# 1. load the latent files and metadata files, all demo files are deposited in the github folder
metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/SHARE_RNA+ATAC_raw_metadata.csv.gz"
meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
myfiles = ["../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-ATAC-latent.csv.gz",
"../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-RNA-latent.csv.gz"]
latent_atac = pd.read_csv(myfiles[0], index_col=0, header=0)
latent_rna = pd.read_csv(myfiles[1], index_col=0, header=0)
latent_atac_reindex=latent_atac.reindex(meta.index)
latent_rna_reindex= latent_rna.reindex(meta.index)
# 2. generate the ann data format and calculate the metrics
adata_unpaired = sc.AnnData(latent_rna_reindex, obs=meta, dtype='float32')
adata_unpaired.obsm['RNA'] = latent_rna_reindex
adata_unpaired.obsm['ATAC'] = latent_atac_reindex
out = unpaired_latent_metrics(adata_unpaired, method = "bindSC", cluster = 'louvain', batch = None, label = 'cell_type', mods = ["RNA","ATAC"], outf=None, embed_acc=True) # outf=None,return stdout, or return the path or "outf" param, for example, outf="./test.txt"
# embed_acc determine wheter calculate the accuracy metrics for each embed in "mods" params
return out
[3]:
test_unpair()
[3]:
| Output | nCell | ARI-RNA | AMI-RNA | graph_cLISI-RNA | ARI-ATAC | AMI-ATAC | graph_cLISI-ATAC | FOSCTTM | nearest_cell_barcode | nearest_cell_barcode-RNA | nearest_cell_barcode-ATAC | nearest_cell_celltype | nearest_cell_celltype-RNA | nearest_cell_celltype-ATAC | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| bindSC | Embedding | 34774.0 | 0.150792 | 0.311001 | 0.909428 | 0.175393 | 0.406811 | 0.948815 | 0.159328 | 0.000532 | 0.000661 | 0.000403 | 0.32507 | 0.281561 | 0.36858 |
[ ]:
Example 2: A demo for method with graph output rather than embedding.
[4]:
def test_graph():
# 1. load the graph files and metadata files, all demo files are deposited in the github folde
metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_p10_metadata.csv.gz"
latent = pd.read_csv("../test/BMMC-CITE_seq-p10-CITE_seq-SeuratV4-multi-graph.csv.gz", index_col=0, header=0)
meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
latent_reordered = latent.reindex(meta.index)
# 2. generate the ann data format and calculate the metrics
adata = sc.AnnData(latent_reordered, obs=meta, dtype='float32')
method = "SeuratV4"
adata.obsp[method] = latent_reordered
out = paired_graph_metrics(adata, method = "SeuratV4", cluster = 'louvain', batch = 'batch', label = 'cell_type', outf=None)
return out
[5]:
test_graph()
WARNING: You’re trying to run this on 9026 dimensions of `.X`, if you really want this, set `use_rep='X'`.
Falling back to preprocessing with `sc.pp.pca` and default params.
[5]:
| Output | nCell | graph_connectivity | graph_connectivity.l1 | graph_iLISI | ARI | ARI.l1 | AMI | AMI.l1 | graph_cLISI | graph_cLISI.l1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| SeuratV4 | Graph | 9026.0 | 0.912629 | 0.959633 | 0.145544 | 0.406453 | 0.379547 | 0.680174 | 0.6496 | 0.987117 | 0.996 |
[ ]:
Example3: A demo for Seurat v5 bridge mosaic scRNA and ADT integration.
We provided two similar demos for output metrics to screen and files.
[4]:
def test_mosaic():
# 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata
metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz"
myfiles = ["../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz"]
meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
paired = "s3d6"
unpaired = "s2d1"
batch="batch"
# 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation
# pair_cells = meta[meta[batch] == paired].index
# unpair_cells = meta[meta[batch] == unpaired].index
metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]
latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)
latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)
latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)
latents = [latent_pair, latent_rna, latent_mod2]
out = mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired="s3d6", unpaired="s2d1", mod2="adt", batch="batch",label="cell_type",latent_path=myfiles[0], method='SeuratV5', writef=False)
# if writef=True, then two metrics files will be generated in same path of input embeddings.
return out
[5]:
mosaic_metrics_all = test_mosaic()
# the output contains 3 metrics dataframe, including 1.pair metrics for all cells, 2. unpair metrics for unpair cells, and 3. pair metrics for CITE+RNA.
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
[6]:
#demo paired metrics output.
mosaic_metrics_all[0]
[6]:
| Output | nCell | Batch_ASW_batch | Batch_ASW_site | graph_connectivity | graph_connectivity.l1 | graph_iLISI | ARI | ARI.l1 | AMI | AMI.l1 | graph_cLISI | graph_cLISI.l1 | isolated_labels_ASW | isolated_labels_ASW.l1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SeuratV5 | Embedding | 31965.0 | 0.697905 | 0.071962 | 0.630851 | 0.648688 | 0.007862 | 0.30419 | 0.259418 | 0.674201 | 0.626528 | 0.995078 | 0.999218 | 0.865326 | 0.485094 |
[7]:
def test_mosaic_to_file():
# 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata
metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz"
myfiles = ["../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz"]
meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
paired = "s3d6"
unpaired = "s2d1"
batch="batch"
# 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation
metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]
latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)
latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)
latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)
latents = [latent_pair, latent_rna, latent_mod2]
mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired="s3d6", unpaired="s2d1", mod2="adt", batch="batch",label="cell_type",latent_path=myfiles[0], method='SeuratV5', writef=True)
# output metrics to latent path in test folder.
[8]:
test_mosaic_to_file()
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_paired-metrics.csv
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_unpaired-metrics.csv
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_noADT_paired-metrics.csv
[ ]: