SCMMIB package tutorials

[1]:
from scmmib.metrics import paired_graph_metrics,  mosaic_latent_metrics,\
    unpaired_latent_metrics
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import os
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display
  init_styles()
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing update_display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display
  init_styles()

Example 1: A demo for evaluate unpair scRNA and scATAC diagonal integration output.

[2]:
def test_unpair():
    # 1. load the latent files and metadata files, all demo files are deposited in the github folder
    metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/SHARE_RNA+ATAC_raw_metadata.csv.gz"
    meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
    myfiles = ["../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-ATAC-latent.csv.gz",
               "../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-RNA-latent.csv.gz"]
    latent_atac = pd.read_csv(myfiles[0], index_col=0, header=0)
    latent_rna = pd.read_csv(myfiles[1], index_col=0, header=0)
    latent_atac_reindex=latent_atac.reindex(meta.index)
    latent_rna_reindex= latent_rna.reindex(meta.index)
    # 2. generate the ann data format and calculate the metrics
    adata_unpaired = sc.AnnData(latent_rna_reindex, obs=meta, dtype='float32')
    adata_unpaired.obsm['RNA'] = latent_rna_reindex
    adata_unpaired.obsm['ATAC'] = latent_atac_reindex
    out = unpaired_latent_metrics(adata_unpaired, method = "bindSC", cluster = 'louvain', batch = None, label = 'cell_type', mods = ["RNA","ATAC"], outf=None, embed_acc=True) # outf=None,return stdout, or return the path or "outf" param, for example, outf="./test.txt"
    # embed_acc determine wheter calculate the accuracy metrics for each embed in "mods" params
    return out
[3]:
test_unpair()
[3]:
Output nCell ARI-RNA AMI-RNA graph_cLISI-RNA ARI-ATAC AMI-ATAC graph_cLISI-ATAC FOSCTTM nearest_cell_barcode nearest_cell_barcode-RNA nearest_cell_barcode-ATAC nearest_cell_celltype nearest_cell_celltype-RNA nearest_cell_celltype-ATAC
bindSC Embedding 34774.0 0.150792 0.311001 0.909428 0.175393 0.406811 0.948815 0.159328 0.000532 0.000661 0.000403 0.32507 0.281561 0.36858
[ ]:

Example 2: A demo for method with graph output rather than embedding.

[4]:
def test_graph():
    # 1. load the graph files and metadata files, all demo files are deposited in the github folde
    metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_p10_metadata.csv.gz"
    latent = pd.read_csv("../test/BMMC-CITE_seq-p10-CITE_seq-SeuratV4-multi-graph.csv.gz", index_col=0, header=0)
    meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
    latent_reordered = latent.reindex(meta.index)

    # 2. generate the ann data format and calculate the metrics
    adata = sc.AnnData(latent_reordered, obs=meta, dtype='float32')
    method = "SeuratV4"
    adata.obsp[method] = latent_reordered
    out = paired_graph_metrics(adata, method = "SeuratV4", cluster = 'louvain', batch = 'batch', label = 'cell_type', outf=None)
    return out
[5]:
test_graph()
WARNING: You’re trying to run this on 9026 dimensions of `.X`, if you really want this, set `use_rep='X'`.
         Falling back to preprocessing with `sc.pp.pca` and default params.
[5]:
Output nCell graph_connectivity graph_connectivity.l1 graph_iLISI ARI ARI.l1 AMI AMI.l1 graph_cLISI graph_cLISI.l1
SeuratV4 Graph 9026.0 0.912629 0.959633 0.145544 0.406453 0.379547 0.680174 0.6496 0.987117 0.996
[ ]:

Example3: A demo for Seurat v5 bridge mosaic scRNA and ADT integration.

  • We provided two similar demos for output metrics to screen and files.

[4]:
def test_mosaic():
    # 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata
    metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz"

    myfiles = ["../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz"]
    meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
    paired = "s3d6"
    unpaired = "s2d1"
    batch="batch"
    # 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation
    # pair_cells = meta[meta[batch] == paired].index
    # unpair_cells = meta[meta[batch] == unpaired].index
    metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]
    latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)
    latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)
    latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)

    latents = [latent_pair, latent_rna, latent_mod2]
    out = mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired="s3d6", unpaired="s2d1", mod2="adt", batch="batch",label="cell_type",latent_path=myfiles[0], method='SeuratV5', writef=False)
    # if writef=True, then two metrics files will be generated in same path of input embeddings.
    return out
[5]:
mosaic_metrics_all = test_mosaic()
# the output contains 3 metrics dataframe, including 1.pair metrics for all cells, 2. unpair metrics for unpair cells, and 3. pair metrics for CITE+RNA.
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
[6]:
#demo paired metrics output.
mosaic_metrics_all[0]
[6]:
Output nCell Batch_ASW_batch Batch_ASW_site graph_connectivity graph_connectivity.l1 graph_iLISI ARI ARI.l1 AMI AMI.l1 graph_cLISI graph_cLISI.l1 isolated_labels_ASW isolated_labels_ASW.l1
SeuratV5 Embedding 31965.0 0.697905 0.071962 0.630851 0.648688 0.007862 0.30419 0.259418 0.674201 0.626528 0.995078 0.999218 0.865326 0.485094
[7]:
def test_mosaic_to_file():
    # 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata
    metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz"

    myfiles = ["../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz"]
    meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
    paired = "s3d6"
    unpaired = "s2d1"
    batch="batch"
    # 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation
    metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]
    latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)
    latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)
    latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)

    latents = [latent_pair, latent_rna, latent_mod2]
    mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired="s3d6", unpaired="s2d1", mod2="adt", batch="batch",label="cell_type",latent_path=myfiles[0], method='SeuratV5', writef=True)
    # output metrics to latent path in test folder.
[8]:
test_mosaic_to_file()
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_paired-metrics.csv
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_unpaired-metrics.csv
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,
writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_noADT_paired-metrics.csv
[ ]: