Hi there,
I am attempting to follow the 10xGenomics tutorial for running trajectory analysis on scRNAseq data through velocyto and scvelo. I first ran velocyto on each sample I have through velocyto run10x ${10x_path} ${gtf_path}
. I've exported my Seurat object as a loom file and exported the UMAP embeddings and cluster info as .csvs as follows:
# Load in seurat object
load(seurat_object)
# Run standard normalization
DefaultAssay(object = s) <- "RNA"
s <- NormalizeData(s, verbose = FALSE)
# Generate loom object
s.loom <- SeuratDisk::as.loom(s, filename = paste0(output_dir, "s.loom"), verbose = TRUE)
s.loom$close_all()
# Isolate UMAP coordinates
umap <- Embeddings(s[["umap"]])
colnames(umap) <- c("X Coordinate", "Y Coordinate")
write.csv(umap, file = paste0(output_dir, "umap.csv"))
# Isolate cell types
clusters <- s[["cluster_ident"]]
write.csv(clusters, file = paste0(output_dir, "clusters.csv"))
I then loaded in the Seurat object, embeddings, and clusters in python and merged them with the velocyto loom files into one:
# Define celltypes of interest
celltypes = ["T Regulatory Cells"]
# Read in Seurat data
s = scv.read_loom(input_dir + "s.loom")
# Add UMAP information to s
umap = pd.read_csv(input_dir + "umap.csv", index_col = 0)
s.obsm["X_umap"] = umap
# Add cluster information to s
clusters = pd.read_csv(input_dir + "clusters.csv", index_col = 0)
s.obs['Clusters'] = clusters
# Subset for clusters of interest
barcodes = clusters[clusters["cluster_ident"].isin(celltypes)].index
s = s[barcodes]
# Merge loom files if necessary
if not os.path.exists(output_dir + "transcript.loom"):
# Specify samples to exclude
excluded_samples = ["A3", "C7", "D7", "E6", "F5"]
# Find paths to Velocyto loom files
loom_files = []
for path, folders, files in os.walk(transcript_dir):
for file in files:
if ".loom" in file:
if os.path.split(os.path.split(path)[0])[1] not in excluded_samples:
loom_files.append(os.path.join(path, file))
# Merge Velocyto loom files
lmp.combine(loom_files, output_dir + "transcript.loom", key = "Accession")
# Read velocyto output
loom = scv.read(output_dir + 'transcript.loom')
# Modify looom index to match seurat
loom.obs.index = [index.replace(":", "_") for index in loom.obs.index]
loom.obs.index = [index.replace("x", "-1") for index in loom.obs.index]
# Merge velocyto with Seurat data
s = scv.utils.merge(s, loom)
I then ran standard processing and velocity calculations as so:
# Standard scvelo processing to run Dynamical Mode
scv.pp.filter_and_normalize(s, min_shared_counts = 30, n_top_genes = 2000)
scv.pp.moments(s, n_pcs = 30, n_neighbors = 30)
# Estimate RNA velocity and latent time
scv.tl.recover_dynamics(s, n_jobs = 16)
scv.tl.velocity(s, mode = 'dynamical')
scv.tl.velocity_graph(s, approx=True)
scv.tl.recover_latent_time(s)
print(s)
The resulting object has the following summary:
AnnData object with n_obs × n_vars = 1751 × 2000
obs: 'DF.classifications', 'Diagnosis', 'Genderclonal', 'ID', 'RNA_snn_res.0.3', 'SCT_snn_res.0.3', 'age', 'age_bin', 'clonal', 'clonotype_id', 'cluster_ident', 'disease.clonal', 'frequency', 'frequency_filtered', 'integrated_snn_res.0.3', 'manual_dublet', 'nCount_RNA', 'nCount_SCT', 'nFeature_RNA', 'nFeature_SCT', 'normalized_frequency', 'orig.ident', 'percent.mt', 'seurat_clusters', 'seurat_clusters_allcells', 'seurat_clusters_monocytes', 'seurat_clusters_tcells', 'sex', 'sort_day', 'sort_round', 'tra_cdr3s', 'trb_cdr3s', 'Clusters', '_X', '_Y', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'
var: 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'pca', 'neighbors', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg'
obsm: 'X_umap', 'X_pca'
varm: 'PCs', 'loss'
layers: 'counts', 'matrix', 'ambiguous', 'spliced', 'unspliced', 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'velocity', 'velocity_u'
obsp: 'distances', 'connectivities'
I am able to create a violin plot of latent_time, but any plot I attempt to make including velocity measurements (i.e. with scv.pl.velocity_embedding_stream
) fails with the following error:
computing velocity embedding
Traceback (most recent call last):
File "code/trajectory_analysis/scvelo_CD4/trajectory_analysis-scvelo_CD4.py", line 116, in <module>
scv.pl.velocity_embedding_stream(s, basis = "umap", color_map="plasma", show = False,
File "/home/ncp2306/.local/lib/python3.8/site-packages/scvelo/plotting/velocity_embedding_stream.py", line 130, in velocity_embedding_stream
velocity_embedding(adata, basis=basis, vkey=key)
File "/home/ncp2306/.local/lib/python3.8/site-packages/scvelo/tools/velocity_embedding.py", line 164, in velocity_embedding
dX = X_emb[indices] - X_emb[i, None] # shape (n_neighbors, 2)
File "/software/python/3.8.4/lib/python3.8/site-packages/pandas/core/frame.py", line 2806, in __getitem__
indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
File "/software/python/3.8.4/lib/python3.8/site-packages/pandas/core/indexing.py", line 1552, in _get_listlike_indexer
self._validate_read_indexer(
File "/software/python/3.8.4/lib/python3.8/site-packages/pandas/core/indexing.py", line 1640, in _validate_read_indexer
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Int64Index([1749, 1726, 1720, 1718, 1712, 1703, 1696, 1693, 1688, 1685,\n ...\n 60, 53, 52, 41, 38, 22, 16, 12, 9, 6],\n dtype='int64', length=300)] are in the [columns]"
I really have no idea what to do about this error and would appreciate any advice 🙏