{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SCMMIB package tutorials"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n",
" init_styles()\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing update_display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n",
" init_styles()\n"
]
},
{
"data": {
"text/html": [
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from scmmib.metrics import paired_graph_metrics, mosaic_latent_metrics,\\\n",
" unpaired_latent_metrics\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"import anndata as ad\n",
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 1: A demo for evaluate unpair scRNA and scATAC diagonal integration output.\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def test_unpair():\n",
" # 1. load the latent files and metadata files, all demo files are deposited in the github folder\n",
" metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/SHARE_RNA+ATAC_raw_metadata.csv.gz\"\n",
" meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n",
" myfiles = [\"../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-ATAC-latent.csv.gz\",\n",
" \"../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-RNA-latent.csv.gz\"]\n",
" latent_atac = pd.read_csv(myfiles[0], index_col=0, header=0)\n",
" latent_rna = pd.read_csv(myfiles[1], index_col=0, header=0)\n",
" latent_atac_reindex=latent_atac.reindex(meta.index)\n",
" latent_rna_reindex= latent_rna.reindex(meta.index)\n",
" # 2. generate the ann data format and calculate the metrics\n",
" adata_unpaired = sc.AnnData(latent_rna_reindex, obs=meta, dtype='float32')\n",
" adata_unpaired.obsm['RNA'] = latent_rna_reindex\n",
" adata_unpaired.obsm['ATAC'] = latent_atac_reindex\n",
" out = unpaired_latent_metrics(adata_unpaired, method = \"bindSC\", cluster = 'louvain', batch = None, label = 'cell_type', mods = [\"RNA\",\"ATAC\"], outf=None, embed_acc=True) # outf=None,return stdout, or return the path or \"outf\" param, for example, outf=\"./test.txt\"\n",
" # embed_acc determine wheter calculate the accuracy metrics for each embed in \"mods\" params\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Output | \n",
" nCell | \n",
" ARI-RNA | \n",
" AMI-RNA | \n",
" graph_cLISI-RNA | \n",
" ARI-ATAC | \n",
" AMI-ATAC | \n",
" graph_cLISI-ATAC | \n",
" FOSCTTM | \n",
" nearest_cell_barcode | \n",
" nearest_cell_barcode-RNA | \n",
" nearest_cell_barcode-ATAC | \n",
" nearest_cell_celltype | \n",
" nearest_cell_celltype-RNA | \n",
" nearest_cell_celltype-ATAC | \n",
"
\n",
" \n",
" \n",
" \n",
" | bindSC | \n",
" Embedding | \n",
" 34774.0 | \n",
" 0.150792 | \n",
" 0.311001 | \n",
" 0.909428 | \n",
" 0.175393 | \n",
" 0.406811 | \n",
" 0.948815 | \n",
" 0.159328 | \n",
" 0.000532 | \n",
" 0.000661 | \n",
" 0.000403 | \n",
" 0.32507 | \n",
" 0.281561 | \n",
" 0.36858 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Output nCell ARI-RNA AMI-RNA graph_cLISI-RNA ARI-ATAC \\\n",
"bindSC Embedding 34774.0 0.150792 0.311001 0.909428 0.175393 \n",
"\n",
" AMI-ATAC graph_cLISI-ATAC FOSCTTM nearest_cell_barcode \\\n",
"bindSC 0.406811 0.948815 0.159328 0.000532 \n",
"\n",
" nearest_cell_barcode-RNA nearest_cell_barcode-ATAC \\\n",
"bindSC 0.000661 0.000403 \n",
"\n",
" nearest_cell_celltype nearest_cell_celltype-RNA \\\n",
"bindSC 0.32507 0.281561 \n",
"\n",
" nearest_cell_celltype-ATAC \n",
"bindSC 0.36858 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_unpair()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2: A demo for method with graph output rather than embedding.\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def test_graph():\n",
" # 1. load the graph files and metadata files, all demo files are deposited in the github folde\n",
" metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_p10_metadata.csv.gz\"\n",
" latent = pd.read_csv(\"../test/BMMC-CITE_seq-p10-CITE_seq-SeuratV4-multi-graph.csv.gz\", index_col=0, header=0)\n",
" meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n",
" latent_reordered = latent.reindex(meta.index)\n",
" \n",
" # 2. generate the ann data format and calculate the metrics\n",
" adata = sc.AnnData(latent_reordered, obs=meta, dtype='float32')\n",
" method = \"SeuratV4\"\n",
" adata.obsp[method] = latent_reordered\n",
" out = paired_graph_metrics(adata, method = \"SeuratV4\", cluster = 'louvain', batch = 'batch', label = 'cell_type', outf=None)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING: You’re trying to run this on 9026 dimensions of `.X`, if you really want this, set `use_rep='X'`.\n",
" Falling back to preprocessing with `sc.pp.pca` and default params.\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Output | \n",
" nCell | \n",
" graph_connectivity | \n",
" graph_connectivity.l1 | \n",
" graph_iLISI | \n",
" ARI | \n",
" ARI.l1 | \n",
" AMI | \n",
" AMI.l1 | \n",
" graph_cLISI | \n",
" graph_cLISI.l1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | SeuratV4 | \n",
" Graph | \n",
" 9026.0 | \n",
" 0.912629 | \n",
" 0.959633 | \n",
" 0.145544 | \n",
" 0.406453 | \n",
" 0.379547 | \n",
" 0.680174 | \n",
" 0.6496 | \n",
" 0.987117 | \n",
" 0.996 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Output nCell graph_connectivity graph_connectivity.l1 \\\n",
"SeuratV4 Graph 9026.0 0.912629 0.959633 \n",
"\n",
" graph_iLISI ARI ARI.l1 AMI AMI.l1 graph_cLISI \\\n",
"SeuratV4 0.145544 0.406453 0.379547 0.680174 0.6496 0.987117 \n",
"\n",
" graph_cLISI.l1 \n",
"SeuratV4 0.996 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_graph()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example3: A demo for Seurat v5 bridge mosaic scRNA and ADT integration.\n",
"- We provided two similar demos for output metrics to screen and files."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def test_mosaic():\n",
" # 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata\n",
" metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz\"\n",
"\n",
" myfiles = [\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz\"]\n",
" meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n",
" paired = \"s3d6\"\n",
" unpaired = \"s2d1\"\n",
" batch=\"batch\"\n",
" # 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation\n",
" # pair_cells = meta[meta[batch] == paired].index\n",
" # unpair_cells = meta[meta[batch] == unpaired].index\n",
" metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]\n",
" latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)\n",
" latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)\n",
" latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)\n",
" \n",
" latents = [latent_pair, latent_rna, latent_mod2]\n",
" out = mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired=\"s3d6\", unpaired=\"s2d1\", mod2=\"adt\", batch=\"batch\",label=\"cell_type\",latent_path=myfiles[0], method='SeuratV5', writef=False)\n",
" # if writef=True, then two metrics files will be generated in same path of input embeddings.\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n",
" utils.warn_names_duplicates(\"obs\")\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n",
" utils.warn_names_duplicates(\"obs\")\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n"
]
}
],
"source": [
"mosaic_metrics_all = test_mosaic() \n",
"# the output contains 3 metrics dataframe, including 1.pair metrics for all cells, 2. unpair metrics for unpair cells, and 3. pair metrics for CITE+RNA."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Output | \n",
" nCell | \n",
" Batch_ASW_batch | \n",
" Batch_ASW_site | \n",
" graph_connectivity | \n",
" graph_connectivity.l1 | \n",
" graph_iLISI | \n",
" ARI | \n",
" ARI.l1 | \n",
" AMI | \n",
" AMI.l1 | \n",
" graph_cLISI | \n",
" graph_cLISI.l1 | \n",
" isolated_labels_ASW | \n",
" isolated_labels_ASW.l1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | SeuratV5 | \n",
" Embedding | \n",
" 31965.0 | \n",
" 0.697905 | \n",
" 0.071962 | \n",
" 0.630851 | \n",
" 0.648688 | \n",
" 0.007862 | \n",
" 0.30419 | \n",
" 0.259418 | \n",
" 0.674201 | \n",
" 0.626528 | \n",
" 0.995078 | \n",
" 0.999218 | \n",
" 0.865326 | \n",
" 0.485094 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Output nCell Batch_ASW_batch Batch_ASW_site \\\n",
"SeuratV5 Embedding 31965.0 0.697905 0.071962 \n",
"\n",
" graph_connectivity graph_connectivity.l1 graph_iLISI ARI \\\n",
"SeuratV5 0.630851 0.648688 0.007862 0.30419 \n",
"\n",
" ARI.l1 AMI AMI.l1 graph_cLISI graph_cLISI.l1 \\\n",
"SeuratV5 0.259418 0.674201 0.626528 0.995078 0.999218 \n",
"\n",
" isolated_labels_ASW isolated_labels_ASW.l1 \n",
"SeuratV5 0.865326 0.485094 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#demo paired metrics output.\n",
"mosaic_metrics_all[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def test_mosaic_to_file():\n",
" # 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata\n",
" metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz\"\n",
"\n",
" myfiles = [\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz\"]\n",
" meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n",
" paired = \"s3d6\"\n",
" unpaired = \"s2d1\"\n",
" batch=\"batch\"\n",
" # 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation\n",
" metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]\n",
" latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)\n",
" latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)\n",
" latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)\n",
" \n",
" latents = [latent_pair, latent_rna, latent_mod2]\n",
" mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired=\"s3d6\", unpaired=\"s2d1\", mod2=\"adt\", batch=\"batch\",label=\"cell_type\",latent_path=myfiles[0], method='SeuratV5', writef=True)\n",
" # output metrics to latent path in test folder."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_paired-metrics.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n",
" utils.warn_names_duplicates(\"obs\")\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n",
" utils.warn_names_duplicates(\"obs\")\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_unpaired-metrics.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n",
"/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n",
" Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_noADT_paired-metrics.csv\n"
]
}
],
"source": [
"test_mosaic_to_file()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 2
}