Entering edit mode
4.1 years ago
yzhao140
•
0
Hi, all! I did a project using this dataset https://support.10xgenomics.com/single-cell-multiome-atac-gex/datasets/1.0.0/human_brain_3k. You can download the data using this link https://cf.10xgenomics.com/samples/cell-arc/1.0.0/human_brain_3k/human_brain_3k_filtered_feature_bc_matrix.tar.gz. But when I analyze single cell ATAC-seq data, I found the max count can be 400. Why? The code I used is listed below.
import time
import numpy as np
import csv
import gzip
import os
import scipy.io
import codecs
import torch
from torch.utils.data import Dataset
from scipy.sparse import coo_matrix
path = r"F:\zym\filtered_feature_bc_matrix"
mat = scipy.io.mmread(os.path.join(path, "matrix.mtx.gz"))
mat = mat.todense()
features_path = os.path.join(path, "features.tsv.gz")
feature_ids = [row[0] for row in csv.reader(codecs.iterdecode(gzip.open(features_path), 'utf-8'), delimiter="\t")]
gene_names = [row[1] for row in csv.reader(codecs.iterdecode(gzip.open(features_path), 'utf-8'), delimiter="\t")]
feature_types = [row[2] for row in csv.reader(codecs.iterdecode(gzip.open(features_path), 'utf-8'), delimiter="\t")]
barcodes_path = os.path.join(path, "barcodes.tsv.gz")
barcodes = [row[0] for row in csv.reader(codecs.iterdecode(gzip.open(barcodes_path), 'utf-8'), delimiter="\t")]
#36601
feature_types.count('Gene Expression')
X = mat[:36601,]
Y = mat[36601:,]
Y.max()