I have a dataframe with genes and samples (cancer vs. normal) and I already did LASSO and cross validation to choose best lambda, as well as finding the genes with non-zero coefficients (x in code below is my dataframe containing these). What I want to do next is add another column to x which contains the gene symbols (symbol from original dataframe daf) corresponding to those gene with non-zero coefficients in x such that my final dataframe with have probe_id as first column, followed by symbol and finally, the last column contains the non-zero coefficients. I realise this is probably super basic but I have been trying for over an hour to figure out a way to make this work but haven't had success. Any suggestions on what is the best way to do that? Below is my code:
probeID<-c("213456_at", "217428_s_at", "219230_at", "226228_at","230030_at")
symbol<-c("SOSTDC1","COL10A1", "TMEM100", "AQP4", "HS6ST2")
BCR1<-c(28.005966, 30.806433, 17.341375, 17.40666, 30.039436)
BCR2<-c(30.973469, 29.236025, 30.41161, 20.914383, 20.904331)
BCR3<-c(26.322796, 25.542833, 22.460772, 19.972183, 30.409641)
BCR4<-c(26.441898, 25.837685, 23.158352, 20.379173, 33.81327)
BCR5<-c(39.750206, 19.901133, 28.180124, 22.668673, 25.748884)
CTL6<-c(23.004385, 28.472675, 23.81621, 26.433413, 28.851719)
CTL7<-c(22.239546, 28.741674, 23.754929, 26.015385, 28.16368)
CTL8<-c(29.590443, 30.041988, 21.323061, 24.272501, 18.099016)
CTL9<-c(15.856442, 22.64224, 29.629637, 25.374926, 22.356894)
CTL10<-c(38.137985, 24.753338, 26.986668, 24.578161, 19.223558)
daf<-data.frame(probeID, symbol, BCR1, BCR2, BCR3, BCR4, BCR5, CTL6, CTL7, CTL8, CTL9, CTL10)
daf1<-t(daf[,3:12])
colnames(daf1)<-daf$probeID
Type<-c("cancer", "cancer", "cancer", "cancer", "cancer", "normal", "normal", "normal", "normal", "normal")
Sample<-c("BCR1", "BCR2", "BCR3", "BCR4", "BCR5", "CTL6", "CTL7", "CTL8", "CTL9", "CTL10")
type.df<-data.frame(Sample, Type)
daf2<-data.frame(daf1, type.df$Type)
names(daf2)[names(daf2) == "type.df.Type"] <- "Type"
daf2$Type<-as.factor(daf2$Type)
lassoModel <- glmnet(
x=data.matrix(daf2[,-6]),
y=daf2$Type,
alpha=1,
family="binomial")
cv.lassoModel<- cv.glmnet(
x=data.matrix(daf2[,-6]),
y=daf2$Type,
alpha=1, family="binomial")
idealLambda <- cv.lassoModel$lambda.min
co <- coef(cv.lassoModel, s=idealLambda, exact=TRUE)
cv.glm.probe<-coef(cv.lassoModel, s="lambda.min")
x<-data.frame(cv.glm.probe[cv.glm.probe[,1]!=0,])