Mutational Signatures, General Standard Refit, Cosine similarity
0
0
Entering edit mode
20 days ago

Bladder Cancer has "SBS1", "SBS2", "SBS5", "SBS8", "SBS13", "SBS29", and "SBS40" COSMIC signatures. I want to reconstruct only these signatures from the mutation matrix data of bladder cancer. I used the Standard refit technique for only these 6 mutational signatures but I am getting less cosine similarity values between the original and reconstructed.

Please check this code and fix it.


# Read the cleaned mutation matrix
mut_mat2 <- read.csv("Cleaned_BLCA_PCAWG_MutationMatrix.csv")
# Convert to matrix and remove the first column (assuming it's an identifier)
mut_mat4 <- as.matrix(mut_mat2[,-1])

# Small pseudocount to avoid zeros
mut_mat4 <- mut_mat4 + 0.0001

# Load the known COSMIC signatures
signatures <- get_known_signatures()

# Select the signatures found in bladder cancer
selected_signatures <- signatures[, c("SBS1", "SBS2", "SBS5", "SBS8", "SBS13", "SBS29", "SBS40")]

#Standard refit for selected signatures found in bladder cancer
selected_fit_res <- fit_to_signatures(mut_mat4, selected_signatures)


#Transpose the file and save  this file in csv

selected_fit_SBS_transposed_data <- t(selected_fit_res$contribution)


# Save the transposed data as a CSV file
write.csv(selected_fit_SBS_transposed_data, "best_fit_SBS_transposed_data.csv")


#Relative value of each signature for each 108 samples

# Compute relative contribution for transposed data
SBS_relative_contribution <- selected_fit_SBS_transposed_data / apply(selected_fit_SBS_transposed_data, 1, sum)

# If there are NaN values because of division by zero, replace them with 0
SBS_relative_contribution[is.nan(SBS_relative_contribution)] <- 0

# Save relative contribution of transposed data to CSV
write.csv(SBS_relative_contribution, file = "SBS_relative_BLCA_PCAWG_contribution.csv")

# Load the CSV file into R
data <- read.csv("SBS_relative_BLCA_PCAWG_contribution.csv", header = TRUE)

# Add 'Sample_ID' as the name of the first column
colnames(data)[1] <- "Sample_ID"

# Write the modified data back to a new CSV file
write.csv(data, "Bladder_SBS_relative_BLCA_PCAWG_contribution.csv", row.names = FALSE)



#Cosine-similarity between original and reconstructed

library(Matrix)


#fit_res <- fit_to_signatures(mut_mat4, selected_signatures)

cosine_similarity_function <- function(mut_mat4, reconstructed) {
  # Get the number of columns
  n <- ncol(mut_mat4)

  # Preallocate a vector to store the cosine similarities
  cosine_similarity <- numeric(n)

  for (i in seq_len(n)) {
    # Get the columns
    x <- mut_mat4[, i]
    y <- reconstructed[, i]

    # Compute the cosine similarity and store it
    cosine_similarity[i] <- sum(x * y) / (sqrt(sum(x^2)) * sqrt(sum(y^2)))
  }

  # Return the cosine similarities
  return(cosine_similarity)
}

# Now you can call this function to get cosine similarity
cosine_similarity <- cosine_similarity_function(mut_mat4, selected_fit_res$reconstructed)

# Then you can save it into a dataframe and then to a csv file
df <- data.frame(sample_names = colnames(mut_mat4), cosine_similarity = cosine_similarity)
write.csv(df, "BLCA_PCAWG_General_Selected_refit_cosine_similarity.csv", row.names = FALSE)


# Load the CSV file
data <- read.csv("BLCA_PCAWG_General_refit_cosine_similarity.csv", header = TRUE)

# Replace 'sample_names' with 'Sample_ID'
colnames(data)[colnames(data) == "sample_names"] <- "Sample_ID"

# Save the modified data back to a new CSV file
write.csv(data, "Bladder_BLCA_PCAWG_General_Selected_refit_cosine_similarity.csv", row.names = FALSE)
Standard Refit selected in signatures • 197 views
ADD COMMENT

Login before adding your answer.

Traffic: 1636 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6