Hi everyone, I'm trying to understand in which of phase of cell cycle the cells are using Seurat. I have two datasets (one for condition). I'm doing this on each dataset indipendently and after merging/integrating, obtaining different results (same results after merging or integrating). Why? I was expecting some differences between the merged or integrated datasets but not between the merged or analyzed a single one.
These are my codes
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "SD", min.cells = 3, min.features = 50)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 10)
pbmc1 <- NormalizeData(pbmc)
pbmc2 <- FindVariableFeatures(pbmc1, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(pbmc2), 10)
all.genes <- rownames(pbmc2)
pbmc2 <- ScaleData(pbmc2, features = all.genes)
pbmc2<-RunPCA(pbmc2, features = VariableFeatures(object = pbmc2))
pbmc3 <- FindNeighbors(pbmc2, dims = 1:15)
pbmc3 <- FindClusters(pbmc3, resolution = 0.5)
pbmc3 <- RunUMAP(pbmc3, dims = 1:15)
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
cellcyclec_clust664<- CellCycleScoring(object = pbmc3, s.features = s.genes, g2m.features = g2m.genes, set.ident = T)
Merged
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "01.Standard diet", min.cells = 3, min.features = 50)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 10)
pbmc<-NormalizeData(pbmc)
pbmc665<-CreateSeuratObject(counts = pbmc.data665, project = "02.Caloric restriction", min.cells = 3, min.features = 50)
pbmc665[["percent.mt"]] <- PercentageFeatureSet(pbmc665, pattern = "^MT-")
pbmc665 <- subset(pbmc665, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 10)
pbmc665<-NormalizeData(pbmc665)
pbmc$sample <- "Standard Diet"
pbmc665$sample <- "Caloric restriction"
#cmerged
library(scmap)
pbmc1c<- merge(pbmc, y = pbmc665, add.cell.ids = c("Standard Diet", "Caloric restriction"), project = "Glioblastoma")
pbmc2c <- FindVariableFeatures(pbmc1c, selection.method = "vst", nfeatures = 2000)
top10c <- head(VariableFeatures(pbmc2c), 10)
all.genesc <- rownames(pbmc2c)
pbmc2c <- ScaleData(pbmc2c, features = all.genesc)
pbmc2c<-RunPCA(pbmc2c, features = VariableFeatures(object = pbmc2c))
pbmc3c <- FindNeighbors(pbmc2c, dims = 1:15)
pbmc3c <- FindClusters(pbmc3c, resolution = 0.5)
pbmc3c <- RunUMAP(pbmc3c, dims = 1:15)
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
cellcyclec_clust<- CellCycleScoring(object = pbmc3c, s.features = s.genes, g2m.features = g2m.genes, set.ident = T)
Integrated
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "01.Standard diet", min.cells = 3, min.features = 50)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 10)
pbmc<-NormalizeData(pbmc)
pbmc665<-CreateSeuratObject(counts = pbmc.data665, project = "02.Caloric restriction", min.cells = 3, min.features = 50)
pbmc665[["percent.mt"]] <- PercentageFeatureSet(pbmc665, pattern = "^MT-")
pbmc665 <- subset(pbmc665, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 10)
pbmc665<-NormalizeData(pbmc665)
pbmc$sample <- "Standard Diet"
pbmc665$sample <- "Caloric restriction"
pbmcCNT<-FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
pbmcCR<-FindVariableFeatures(pbmc665, selection.method = "vst", nfeatures = 2000)
anchors <- FindIntegrationAnchors(object.list = list(pbmcCNT, pbmcCR), dims = 1:15, anchor.features=2000)
combined <- IntegrateData(anchorset = anchors, dims = 1:15)
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, verbose = FALSE)
combined1 <- RunUMAP(combined, dims = 1:15)
combined1 <- FindNeighbors(combined1, dims = 1:15)
combined1 <- FindClusters(combined1, resolution = 0.5)
DefaultAssay(combined1) <- "RNA"
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
cellcyclec_clust<- CellCycleScoring(object = combined1, s.features = s.genes, g2m.features = g2m.genes, set.ident = T)
What am I missing? Can you help me to understand why. Francesca
Can you describe what you mean by different results?
I have a different total Number (or percentage if you prefer) of cells in each phase