Question

Mutational Signatures, General Standard Refit, Cosine similarity

0

Entering edit mode

7 months ago

shakyaram079 • 0

Bladder Cancer has "SBS1", "SBS2", "SBS5", "SBS8", "SBS13", "SBS29", and "SBS40" COSMIC signatures. I want to reconstruct only these signatures from the mutation matrix data of bladder cancer. I used the Standard refit technique for only these 6 mutational signatures but I am getting less cosine similarity values between the original and reconstructed.

Please check this code and fix it.


# Read the cleaned mutation matrix
mut_mat2 <- read.csv("Cleaned_BLCA_PCAWG_MutationMatrix.csv")
# Convert to matrix and remove the first column (assuming it's an identifier)
mut_mat4 <- as.matrix(mut_mat2[,-1])

# Small pseudocount to avoid zeros
mut_mat4 <- mut_mat4 + 0.0001

# Load the known COSMIC signatures
signatures <- get_known_signatures()

# Select the signatures found in bladder cancer
selected_signatures <- signatures[, c("SBS1", "SBS2", "SBS5", "SBS8", "SBS13", "SBS29", "SBS40")]

#Standard refit for selected signatures found in bladder cancer
selected_fit_res <- fit_to_signatures(mut_mat4, selected_signatures)


#Transpose the file and save  this file in csv

selected_fit_SBS_transposed_data <- t(selected_fit_res$contribution)


# Save the transposed data as a CSV file
write.csv(selected_fit_SBS_transposed_data, "best_fit_SBS_transposed_data.csv")


#Relative value of each signature for each 108 samples

# Compute relative contribution for transposed data
SBS_relative_contribution <- selected_fit_SBS_transposed_data / apply(selected_fit_SBS_transposed_data, 1, sum)

# If there are NaN values because of division by zero, replace them with 0
SBS_relative_contribution[is.nan(SBS_relative_contribution)] <- 0

# Save relative contribution of transposed data to CSV
write.csv(SBS_relative_contribution, file = "SBS_relative_BLCA_PCAWG_contribution.csv")

# Load the CSV file into R
data <- read.csv("SBS_relative_BLCA_PCAWG_contribution.csv", header = TRUE)

# Add 'Sample_ID' as the name of the first column
colnames(data)[1] <- "Sample_ID"

# Write the modified data back to a new CSV file
write.csv(data, "Bladder_SBS_relative_BLCA_PCAWG_contribution.csv", row.names = FALSE)



#Cosine-similarity between original and reconstructed

library(Matrix)


#fit_res <- fit_to_signatures(mut_mat4, selected_signatures)

cosine_similarity_function <- function(mut_mat4, reconstructed) {
  # Get the number of columns
  n <- ncol(mut_mat4)

  # Preallocate a vector to store the cosine similarities
  cosine_similarity <- numeric(n)

  for (i in seq_len(n)) {
    # Get the columns
    x <- mut_mat4[, i]
    y <- reconstructed[, i]

    # Compute the cosine similarity and store it
    cosine_similarity[i] <- sum(x * y) / (sqrt(sum(x^2)) * sqrt(sum(y^2)))
  }

  # Return the cosine similarities
  return(cosine_similarity)
}

# Now you can call this function to get cosine similarity
cosine_similarity <- cosine_similarity_function(mut_mat4, selected_fit_res$reconstructed)

# Then you can save it into a dataframe and then to a csv file
df <- data.frame(sample_names = colnames(mut_mat4), cosine_similarity = cosine_similarity)
write.csv(df, "BLCA_PCAWG_General_Selected_refit_cosine_similarity.csv", row.names = FALSE)


# Load the CSV file
data <- read.csv("BLCA_PCAWG_General_refit_cosine_similarity.csv", header = TRUE)

# Replace 'sample_names' with 'Sample_ID'
colnames(data)[colnames(data) == "sample_names"] <- "Sample_ID"

# Save the modified data back to a new CSV file
write.csv(data, "Bladder_BLCA_PCAWG_General_Selected_refit_cosine_similarity.csv", row.names = FALSE)

Standard Refit selected in signatures • 336 views

ADD COMMENT • link 7 months ago by shakyaram079 • 0