Entering edit mode
29 days ago
rls_08
▴
40
Is there a way to sum or average 2 or more .cool
files corresponding to replicates in a hic experiment ?
Is there a way to sum or average 2 or more .cool
files corresponding to replicates in a hic experiment ?
This can be done with cooler
and numpy
, for example:
import cooler
import numpy as np
from cooler.create import create_cooler
# List of replicate .cool files
reps = ["rep_1.cool", "rep_2.cool", "rep_3.cool"]
# Open the first cooler to get bins and shape
c = cooler.Cooler(reps[0])
bins = c.bins()[:]
shape = c.shape
# Initialize sum matrix
sum_mat = np.zeros(shape, dtype=np.float64)
# Loop through .cool files and sum matrices
for cf in reps:
c = cooler.Cooler(cf)
# Ensure resolution consistency
assert c.shape == shape, f"Resolution mismatch in {cf}"
# Convert NaNs to 0 before summing
mat = c.matrix(balance=False)[:]
mat = np.nan_to_num(mat, nan=0)
sum_mat += mat
# Convert summed matrix to cooler-compatible pixel format
pixels = np.column_stack(np.where(sum_mat > 0))
counts = sum_mat[pixels[:, 0], pixels[:, 1]]
# Save summed matrix
create_cooler(
"summed.cool",
bins,
{"bin1_id": pixels[:, 0], "bin2_id": pixels[:, 1], "count": counts}
)
# Compute and save averaged matrix
avg_mat = sum_mat / len(reps)
counts = avg_mat[pixels[:, 0], pixels[:, 1]]
create_cooler(
"average.cool",
bins,
{"bin1_id": pixels[:, 0], "bin2_id": pixels[:, 1], "count": counts}
)
print("Summed and averaged .cool files saved.")
Note: I converted NaN
values to zeroes, which may not be ideal.
Use of this site constitutes acceptance of our User Agreement and Privacy Policy.