Entering edit mode
20 days ago
shakyaram079
•
0
Bladder Cancer has "SBS1", "SBS2", "SBS5", "SBS8", "SBS13", "SBS29", and "SBS40" COSMIC signatures. I want to reconstruct only these signatures from the mutation matrix data of bladder cancer. I used the Standard refit technique for only these 6 mutational signatures but I am getting less cosine similarity values between the original and reconstructed.
Please check this code and fix it.
# Read the cleaned mutation matrix
mut_mat2 <- read.csv("Cleaned_BLCA_PCAWG_MutationMatrix.csv")
# Convert to matrix and remove the first column (assuming it's an identifier)
mut_mat4 <- as.matrix(mut_mat2[,-1])
# Small pseudocount to avoid zeros
mut_mat4 <- mut_mat4 + 0.0001
# Load the known COSMIC signatures
signatures <- get_known_signatures()
# Select the signatures found in bladder cancer
selected_signatures <- signatures[, c("SBS1", "SBS2", "SBS5", "SBS8", "SBS13", "SBS29", "SBS40")]
#Standard refit for selected signatures found in bladder cancer
selected_fit_res <- fit_to_signatures(mut_mat4, selected_signatures)
#Transpose the file and save this file in csv
selected_fit_SBS_transposed_data <- t(selected_fit_res$contribution)
# Save the transposed data as a CSV file
write.csv(selected_fit_SBS_transposed_data, "best_fit_SBS_transposed_data.csv")
#Relative value of each signature for each 108 samples
# Compute relative contribution for transposed data
SBS_relative_contribution <- selected_fit_SBS_transposed_data / apply(selected_fit_SBS_transposed_data, 1, sum)
# If there are NaN values because of division by zero, replace them with 0
SBS_relative_contribution[is.nan(SBS_relative_contribution)] <- 0
# Save relative contribution of transposed data to CSV
write.csv(SBS_relative_contribution, file = "SBS_relative_BLCA_PCAWG_contribution.csv")
# Load the CSV file into R
data <- read.csv("SBS_relative_BLCA_PCAWG_contribution.csv", header = TRUE)
# Add 'Sample_ID' as the name of the first column
colnames(data)[1] <- "Sample_ID"
# Write the modified data back to a new CSV file
write.csv(data, "Bladder_SBS_relative_BLCA_PCAWG_contribution.csv", row.names = FALSE)
#Cosine-similarity between original and reconstructed
library(Matrix)
#fit_res <- fit_to_signatures(mut_mat4, selected_signatures)
cosine_similarity_function <- function(mut_mat4, reconstructed) {
# Get the number of columns
n <- ncol(mut_mat4)
# Preallocate a vector to store the cosine similarities
cosine_similarity <- numeric(n)
for (i in seq_len(n)) {
# Get the columns
x <- mut_mat4[, i]
y <- reconstructed[, i]
# Compute the cosine similarity and store it
cosine_similarity[i] <- sum(x * y) / (sqrt(sum(x^2)) * sqrt(sum(y^2)))
}
# Return the cosine similarities
return(cosine_similarity)
}
# Now you can call this function to get cosine similarity
cosine_similarity <- cosine_similarity_function(mut_mat4, selected_fit_res$reconstructed)
# Then you can save it into a dataframe and then to a csv file
df <- data.frame(sample_names = colnames(mut_mat4), cosine_similarity = cosine_similarity)
write.csv(df, "BLCA_PCAWG_General_Selected_refit_cosine_similarity.csv", row.names = FALSE)
# Load the CSV file
data <- read.csv("BLCA_PCAWG_General_refit_cosine_similarity.csv", header = TRUE)
# Replace 'sample_names' with 'Sample_ID'
colnames(data)[colnames(data) == "sample_names"] <- "Sample_ID"
# Save the modified data back to a new CSV file
write.csv(data, "Bladder_BLCA_PCAWG_General_Selected_refit_cosine_similarity.csv", row.names = FALSE)