Entering edit mode
7 months ago
sooni
▴
20
Hello.
I want to integrate the 7 Seurat objects and then convert them to cds objects to perform batch correction. First of all, this is the code I ran before changing to cds.
donor1 <- Read10X_h5("GSM5723843_Donor1_raw_feature_bc_matrix.h5")
donor2 <- Read10X_h5("GSM5723844_Donor2_raw_feature_bc_matrix.h5")
donor3 <- Read10X_h5("GSM5723845_Donor3_raw_feature_bc_matrix.h5")
donor4 <- Read10X_h5("GSM5723846_Donor4_raw_feature_bc_matrix.h5")
donor5 <- Read10X_h5("GSM5723847_Donor5_raw_feature_bc_matrix.h5")
donor6 <- Read10X_h5("GSM5723848_Donor6_raw_feature_bc_matrix.h5")
donor7 <- Read10X_h5("GSM5723849_Donor7_raw_feature_bc_matrix.h5")
donor1 <- CreateSeuratObject(donor1, project = "Donor1", min.cells = 3,
min.features = 200)
donor2 <- CreateSeuratObject(donor2, project = "Donor2", min.cells = 3,
min.features = 200)
donor3 <- CreateSeuratObject(donor3, project = "Donor3", min.cells = 3,
min.features = 200)
donor4 <- CreateSeuratObject(donor4, project = "Donor4", min.cells = 3,
min.features = 200)
donor5 <- CreateSeuratObject(donor5, project = "Donor5", min.cells = 3,
min.features = 200)
donor6 <- CreateSeuratObject(donor6, project = "Donor6", min.cells = 3,
min.features = 200)
donor7 <- CreateSeuratObject(donor7, project = "Donor7", min.cells = 3,
min.features = 200)
donor1 <- PercentageFeatureSet(donor1, pattern = "^MT-", col.name = "percent.mt")
donor2 <- PercentageFeatureSet(donor2, pattern = "^MT-", col.name = "percent.mt")
donor3 <- PercentageFeatureSet(donor3, pattern = "^MT-", col.name = "percent.mt")
donor4 <- PercentageFeatureSet(donor4, pattern = "^MT-", col.name = "percent.mt")
donor5 <- PercentageFeatureSet(donor5, pattern = "^MT-", col.name = "percent.mt")
donor6 <- PercentageFeatureSet(donor6, pattern = "^MT-", col.name = "percent.mt")
donor7 <- PercentageFeatureSet(donor7, pattern = "^MT-", col.name = "percent.mt")
donors <- merge(donor1, y = c(donor2, donor3, donor4,
donor5, donor6, donor7))
VlnPlot(donors, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3, pt.size = 0)
donors <- subset(donors, nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
donors.list <- SplitObject(donors, split.by = "orig.ident")
donors.list <- lapply(X = donors.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
features <- SelectIntegrationFeatures(object.list = donors.list)
donors.anchors <- FindIntegrationAnchors(object.list = donors.list,
anchor.features = features)
saydonors.combined <- IntegrateData(anchorset = donors.anchors)
DefaultAssay(donors.combined) <- "integrated"
donors.combined <- ScaleData(donors.combined, verbose = FALSE)
donors.combined <- RunPCA(donors.combined, npcs = 30, verbose = FALSE)
donors.combined <- RunUMAP(donors.combined, reduction = "pca", dims = 1:30)
donors.combined <- FindNeighbors(donors.combined, reduction = "pca", dims = 1:30)
#### another way to convert seurat to cds ####
my.so <- donors.combined
# Project PC dimensions to whole data set
my.so <- ProjectDim(my.so, reduction = "pca")
expression_matrix <- my.so@assays$RNA@counts
cell_metadata <- my.so@meta.data
if (all.equal(colnames(expression_matrix), rownames(cell_metadata))) {
print(sprintf("Cell identifiers match"))
} else {
print(sprintf("Cell identifier mismatch - %i cells in expression matrix, %i cells in metadata",
ncol(expression_matrix), nrow(cell_metadata)))
print("If the counts are equal, sort differences will throw this error")
}
gene_annotation <- data.frame(gene_short_name = rownames(my.so@assays$RNA),
row.names = rownames(my.so@assays$RNA))
if (all.equal(rownames(expression_matrix), rownames(gene_annotation))) {
print(sprintf("Gene identifiers all match"))
} else {
print(sprintf("Gene identifier mismatch - %i genes in expression matrix, %i gene in gene annotation",
nrow(expression_matrix), nrow(gene_annotation)))
print("If the counts are equal, sort differences will throw this error")
}
cell_metadata$Samples <- rep(NA)
cell_metadata <- cell_metadata %>%
mutate(Samples = case_when(
grepl("Donor1", orig.ident) ~ "Donor1",
grepl("Donor2", orig.ident) ~ "Donor2",
grepl("Donor3", orig.ident) ~ "Donor3",
grepl("Donor4", orig.ident) ~ "Donor4",
grepl("Donor5", orig.ident) ~ "Donor5",
grepl("Donor6", orig.ident) ~ "Donor6",
grepl("Donor7", orig.ident) ~ "Donor7",
TRUE ~ NA_character_
))
my.cds <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
I would like to perform batch correction on the cds object created this way. The code that I batch corrected in the cds object is as follows.
my.cds <- preprocess_cds(my.cds, num_dim = 100)
reduce_my.cds <- reduce_dimension(my.cds)
plot_cells(reduce_my.cds, color_cells_by = "Samples", label_cell_groups = F,
group_label_size = 5)
batch_my.cds <- align_cds(reduce_my.cds, num_dim = 100, alignment_group = "Samples")
batch_my.cds <- reduce_dimension(batch_my.cds)
plot_cells(batch_my.cds, color_cells_by = "Samples", label_cell_groups = F)
When UMAP is drawn after performing batch correction with the code above, the UMAP is drawn poorly. Each cluster is held together like a single lump.
Is my code wrong? If there is a site with useful code, please let me know.
Thank you for help!