Entering edit mode
2.6 years ago
akilabioinfo
▴
10
Hello All,
I have generated a count matrix using kallisto bustools, and i would like to remove empty droplets based on knee plot.I would like to know how to identify i) Inflection ii) Knee point threshold cutoff using python code.
i know we can use "dropletutils" from R., but i would like to see any module available to identify the knee cut-off for sparse expression matrix using python. Please help me!!!
enter code here
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
from sklearn.decomposition import TruncatedSVD
import matplotlib
import matplotlib.pyplot as plt
import scrublet as scr
from scipy import sparse, io
import statistics
list1=[f1,f2,f3,f4,f5]
for i in range(len(list1)):
#read Anndata
adata= anndata.read_h5ad(~/Akila/unsplice_counts/"+str(list1[i])+".h5ad")
# # Create the flipped and rotated "knee plot"
knee = np.sort((np.array(adata.X.sum(axis=1))).flatten())[::-1]
fig, ax = plt.subplots(figsize=(10, 7))
ax.loglog(range(len(knee)), knee,linewidth=5, color="g")
ax.set_xlabel("Set of Barcodes")
ax.set_ylabel("UMI Counts")
plt.grid(True, which="both")
# calculate stats on knee
print(list1[i],min(knee),np.median(knee),max(knee),np.mean(knee))
#save plot
plt.savefig("~/Akila/unsplice_knee/"+str(list1[i])+".png")
Thanks Akila