{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# SCMMIB package tutorials" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n", " init_styles()\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/datatable/__init__.py:181: DeprecationWarning: Importing update_display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n", " init_styles()\n" ] }, { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scmmib.metrics import paired_graph_metrics, mosaic_latent_metrics,\\\n", " unpaired_latent_metrics\n", "import numpy as np\n", "import pandas as pd\n", "import scanpy as sc\n", "import anndata as ad\n", "import os" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example 1: A demo for evaluate unpair scRNA and scATAC diagonal integration output.\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def test_unpair():\n", " # 1. load the latent files and metadata files, all demo files are deposited in the github folder\n", " metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/SHARE_RNA+ATAC_raw_metadata.csv.gz\"\n", " meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n", " myfiles = [\"../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-ATAC-latent.csv.gz\",\n", " \"../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-RNA-latent.csv.gz\"]\n", " latent_atac = pd.read_csv(myfiles[0], index_col=0, header=0)\n", " latent_rna = pd.read_csv(myfiles[1], index_col=0, header=0)\n", " latent_atac_reindex=latent_atac.reindex(meta.index)\n", " latent_rna_reindex= latent_rna.reindex(meta.index)\n", " # 2. generate the ann data format and calculate the metrics\n", " adata_unpaired = sc.AnnData(latent_rna_reindex, obs=meta, dtype='float32')\n", " adata_unpaired.obsm['RNA'] = latent_rna_reindex\n", " adata_unpaired.obsm['ATAC'] = latent_atac_reindex\n", " out = unpaired_latent_metrics(adata_unpaired, method = \"bindSC\", cluster = 'louvain', batch = None, label = 'cell_type', mods = [\"RNA\",\"ATAC\"], outf=None, embed_acc=True) # outf=None,return stdout, or return the path or \"outf\" param, for example, outf=\"./test.txt\"\n", " # embed_acc determine wheter calculate the accuracy metrics for each embed in \"mods\" params\n", " return out" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OutputnCellARI-RNAAMI-RNAgraph_cLISI-RNAARI-ATACAMI-ATACgraph_cLISI-ATACFOSCTTMnearest_cell_barcodenearest_cell_barcode-RNAnearest_cell_barcode-ATACnearest_cell_celltypenearest_cell_celltype-RNAnearest_cell_celltype-ATAC
bindSCEmbedding34774.00.1507920.3110010.9094280.1753930.4068110.9488150.1593280.0005320.0006610.0004030.325070.2815610.36858
\n", "
" ], "text/plain": [ " Output nCell ARI-RNA AMI-RNA graph_cLISI-RNA ARI-ATAC \\\n", "bindSC Embedding 34774.0 0.150792 0.311001 0.909428 0.175393 \n", "\n", " AMI-ATAC graph_cLISI-ATAC FOSCTTM nearest_cell_barcode \\\n", "bindSC 0.406811 0.948815 0.159328 0.000532 \n", "\n", " nearest_cell_barcode-RNA nearest_cell_barcode-ATAC \\\n", "bindSC 0.000661 0.000403 \n", "\n", " nearest_cell_celltype nearest_cell_celltype-RNA \\\n", "bindSC 0.32507 0.281561 \n", "\n", " nearest_cell_celltype-ATAC \n", "bindSC 0.36858 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_unpair()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example 2: A demo for method with graph output rather than embedding.\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def test_graph():\n", " # 1. load the graph files and metadata files, all demo files are deposited in the github folde\n", " metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_p10_metadata.csv.gz\"\n", " latent = pd.read_csv(\"../test/BMMC-CITE_seq-p10-CITE_seq-SeuratV4-multi-graph.csv.gz\", index_col=0, header=0)\n", " meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n", " latent_reordered = latent.reindex(meta.index)\n", " \n", " # 2. generate the ann data format and calculate the metrics\n", " adata = sc.AnnData(latent_reordered, obs=meta, dtype='float32')\n", " method = \"SeuratV4\"\n", " adata.obsp[method] = latent_reordered\n", " out = paired_graph_metrics(adata, method = \"SeuratV4\", cluster = 'louvain', batch = 'batch', label = 'cell_type', outf=None)\n", " return out" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING: You’re trying to run this on 9026 dimensions of `.X`, if you really want this, set `use_rep='X'`.\n", " Falling back to preprocessing with `sc.pp.pca` and default params.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OutputnCellgraph_connectivitygraph_connectivity.l1graph_iLISIARIARI.l1AMIAMI.l1graph_cLISIgraph_cLISI.l1
SeuratV4Graph9026.00.9126290.9596330.1455440.4064530.3795470.6801740.64960.9871170.996
\n", "
" ], "text/plain": [ " Output nCell graph_connectivity graph_connectivity.l1 \\\n", "SeuratV4 Graph 9026.0 0.912629 0.959633 \n", "\n", " graph_iLISI ARI ARI.l1 AMI AMI.l1 graph_cLISI \\\n", "SeuratV4 0.145544 0.406453 0.379547 0.680174 0.6496 0.987117 \n", "\n", " graph_cLISI.l1 \n", "SeuratV4 0.996 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_graph()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example3: A demo for Seurat v5 bridge mosaic scRNA and ADT integration.\n", "- We provided two similar demos for output metrics to screen and files." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def test_mosaic():\n", " # 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata\n", " metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz\"\n", "\n", " myfiles = [\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz\"]\n", " meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n", " paired = \"s3d6\"\n", " unpaired = \"s2d1\"\n", " batch=\"batch\"\n", " # 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation\n", " # pair_cells = meta[meta[batch] == paired].index\n", " # unpair_cells = meta[meta[batch] == unpaired].index\n", " metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]\n", " latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)\n", " latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)\n", " latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)\n", " \n", " latents = [latent_pair, latent_rna, latent_mod2]\n", " out = mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired=\"s3d6\", unpaired=\"s2d1\", mod2=\"adt\", batch=\"batch\",label=\"cell_type\",latent_path=myfiles[0], method='SeuratV5', writef=False)\n", " # if writef=True, then two metrics files will be generated in same path of input embeddings.\n", " return out" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", " utils.warn_names_duplicates(\"obs\")\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", " utils.warn_names_duplicates(\"obs\")\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n" ] } ], "source": [ "mosaic_metrics_all = test_mosaic() \n", "# the output contains 3 metrics dataframe, including 1.pair metrics for all cells, 2. unpair metrics for unpair cells, and 3. pair metrics for CITE+RNA." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OutputnCellBatch_ASW_batchBatch_ASW_sitegraph_connectivitygraph_connectivity.l1graph_iLISIARIARI.l1AMIAMI.l1graph_cLISIgraph_cLISI.l1isolated_labels_ASWisolated_labels_ASW.l1
SeuratV5Embedding31965.00.6979050.0719620.6308510.6486880.0078620.304190.2594180.6742010.6265280.9950780.9992180.8653260.485094
\n", "
" ], "text/plain": [ " Output nCell Batch_ASW_batch Batch_ASW_site \\\n", "SeuratV5 Embedding 31965.0 0.697905 0.071962 \n", "\n", " graph_connectivity graph_connectivity.l1 graph_iLISI ARI \\\n", "SeuratV5 0.630851 0.648688 0.007862 0.30419 \n", "\n", " ARI.l1 AMI AMI.l1 graph_cLISI graph_cLISI.l1 \\\n", "SeuratV5 0.259418 0.674201 0.626528 0.995078 0.999218 \n", "\n", " isolated_labels_ASW isolated_labels_ASW.l1 \n", "SeuratV5 0.865326 0.485094 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#demo paired metrics output.\n", "mosaic_metrics_all[0]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def test_mosaic_to_file():\n", " # 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata\n", " metadata = \"../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz\"\n", "\n", " myfiles = [\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz\",\"../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz\"]\n", " meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')\n", " paired = \"s3d6\"\n", " unpaired = \"s2d1\"\n", " batch=\"batch\"\n", " # 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation\n", " metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]\n", " latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)\n", " latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)\n", " latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)\n", " \n", " latents = [latent_pair, latent_rna, latent_mod2]\n", " mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired=\"s3d6\", unpaired=\"s2d1\", mod2=\"adt\", batch=\"batch\",label=\"cell_type\",latent_path=myfiles[0], method='SeuratV5', writef=True)\n", " # output metrics to latent path in test folder." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_paired-metrics.csv\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", " utils.warn_names_duplicates(\"obs\")\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", " utils.warn_names_duplicates(\"obs\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_unpaired-metrics.csv\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:271: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,\n", "/home/fusl/miniconda2/envs/scmmib/lib/python3.9/site-packages/scmmib/metrics.py:277: DeprecationWarning: Keyword argument 'group_key' has been deprecated in favour of 'label_key'. 'group_key' will be removed in a future version.\n", " Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "writing to ../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_noADT_paired-metrics.csv\n" ] } ], "source": [ "test_mosaic_to_file()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 2 }