Entering edit mode
2.2 years ago
wanaga3166
▴
10
Hi all,
I have these data.frame:
module_df
structure(list(gene_id = c("A1BG", "A1CF", "A2LD1", "A2M", "A2ML1",
"A4GALT", "A4GNT", "AAA1", "AAAS", "AACS", "AADAC", "AADACL2",
"AADACL3", "AADACL4", "AADAT"), colors = c("turquoise", "blue",
"turquoise", "grey60", "turquoise", "blue", "turquoise", "turquoise",
"blue", "yellow", "magenta", "turquoise", "grey", "turquoise",
"red")), row.names = c("A1BG", "A1CF", "A2LD1", "A2M", "A2ML1",
"A4GALT", "A4GNT", "AAA1", "AAAS", "AACS", "AADAC", "AADACL2",
"AADACL3", "AADACL4", "AADAT"), class = "data.frame")
submod
structure(list(gene_id = c("A2M", "AASS", "ABCD2", "ABCG1", "ABHD14A",
"ABHD15", "ABHD5", "ACAA2", "ACACA", "ACACB", "ACAD11", "ACAD8",
"ACADM", "ACADS", "ACADSB"), colors = c("grey60", "brown", "brown",
"grey60", "brown", "brown", "brown", "brown", "brown", "brown",
"brown", "brown", "brown", "brown", "brown")), row.names = c(4L,
25L, 65L, 72L, 83L, 85L, 89L, 109L, 110L, 111L, 113L, 114L, 117L,
118L, 119L), class = "data.frame")
subexpr
structure(list(GeneSymbol = c("A2M", "AASS", "ABCD2", "ABCG1",
"ABHD14A", "ABHD15", "ABHD5", "ACAA2", "ACACA", "ACACB"), R012.EGM2 = c(50.2764951996428,
101.665447606096, 5.14281915226989, 12.1175440994071, 127.400290446813,
50.6721806650688, 187.444869718312, 125.028697858068, 124.640594250484,
14.559615204624), R016.EGM2 = c(46.6399755559676, 73.1954332331196,
5.99306742010033, 11.9097840149146, 123.360595739224, 55.5747760958692,
195.751905602931, 130.587362354726, 162.118704445471, 15.098902880636
), V06.EGM2 = c(51.8486405122139, 70.9836654034483, 5.36299847633948,
11.8503634313783, 112.439818697326, 51.4168701523525, 280.399638597663,
120.828626173478, 129.803136030716, 12.9402241789224), V07.EGM2 = c(117.955683199945,
86.7501578979337, 5.0894935767762, 12.1221347970042, 114.720275503257,
50.1008498711308, 194.799263873044, 116.159360477615, 149.444200673643,
14.6041181366271), V08.EGM2 = c(94.9274274162506, 70.5808603502644,
5.49133134976474, 12.2734830659801, 105.231609450716, 44.7581009650209,
204.585401390909, 104.92616313392, 100.125365909659, 13.4964153291074
), N179 = c(192.745422746882, 74.3515057108856, 29.365366693371,
10.4388992737693, 104.897752588442, 57.5620930848904, 206.669342475002,
137.362576209483, 70.0328556923676, 128.151538222808), N181 = c(215.662725276862,
58.1513722826225, 25.2175610232832, 14.9148176681094, 98.5597745615199,
50.3461555048319, 382.111329968073, 138.065978007562, 52.5064295370523,
130.508317256036), N184 = c(101.949872053494, 98.4591638219055,
28.2773164342233, 11.5115144425951, 109.929603150963, 39.3030585235286,
176.159896274476, 167.023033236092, 120.143842776412, 132.777016208134
), V181 = c(174.216915831257, 108.294596352281, 25.9726969260387,
13.1836455007548, 121.667169254219, 61.5397177077925, 206.185640237808,
116.408716711222, 185.906463762304, 129.926726255828)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
I would like to obtain a data.frame like that:
GeneSymbol | name | value | module
AASS | ASC2.jo | 54.23 | grey60
AAS2 | MSC | 24.12 | brown
However, when I execute my code, the column module is empty (full of NA). Also, I don't want gene_id column. How to correct that? Thank you for your help.
#-------------------------------------------------------------------
# Examine Expression Profiles
#-------------------------------------------------------------------
# pick out a few modules of interest here
modules_of_interest = c("grey60", "brown")
# Pull out list of genes in that module
submod = module_df %>%
subset(colors %in% modules_of_interest)
row.names(module_df) = module_df$gene_id
subexpr = data_BE[which(data_BE$GeneSymbol%in%submod$gene_id),]
subexpr = subset(subexpr, select = -c(V179))
submod_df = data.frame(subexpr) %>%
mutate(gene_id = row.names(.)) %>%
pivot_longer(c(-GeneSymbol, -gene_id)) %>%
mutate(module = module_df[gene_id,]$colors)
My quick guess is that your error is coming from these lines:
Have you run your code line-by-line and confirmed that each step is doing what you think?