Yes, there's a cooltools
(link) utility, random-sample
(link), to do so.
You can also use, for example, Numpy to downsample cool files:
#!/usr/bin/env python
"""
Randomly sample (without replacement) a .cool file.
"""
import argparse
import cooler
import h5py
import numpy as np
import sys
def main():
ap = argparse.ArgumentParser(
description='Fast random sampling of a .cool file.'
'Required memory: 4 * total_reads bytes.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
ap.add_argument(
"-i",
"--infile",
dest="infile",
required=True,
type=str,
help="Input *.cool to downsample"
)
ap.add_argument(
"-o",
"--outfile",
dest="outfile",
required=True,
type=str,
help="Output downsampled *.cool"
)
ap.add_argument(
"-s",
"--sample_size",
dest="sample_size",
required=True,
type=float,
help="If integer, number of reads to sample;"
"if float, fraction of reads to sample"
)
args = ap.parse_args()
infile = args.infile
outfile = args.outfile
sample_size = args.sample_size
fh = h5py.File(infile, 'r')
w = fh['pixels/count'][:]
w = w.astype('int64')
wlen = len(w)
wsum = np.sum(w)
if sample_size <= 1:
sample_size = sample_size * wsum
sample_size = int(sample_size)
print(
infile, '; sampling', sample_size, 'reads from', wsum, '...',
file=sys.stderr
)
ws_bins, ws_counts = np.unique(
np.random.choice(
np.repeat(np.arange(wlen), w), size=sample_size, replace=False
),
return_counts=True
)
y = np.zeros(wlen)
y[ws_bins] = ws_counts
print('Writing...', file=sys.stderr)
c = cooler.Cooler(fh)
cooler.create_cooler(
outfile,
bins=c.bins()[:],
pixels={
'bin1_id': fh['pixels/bin1_id'][:][ws_bins],
'bin2_id': fh['pixels/bin2_id'][:][ws_bins],
'count': ws_counts
},
assembly=fh.attrs['genome-assembly'],
ordered=True
)
fh.close()
print('Done.', file=sys.stderr)
main()
The program FAN-C also has a utitlity for random subsampling of a matrix:
fanc downsample
/fanc hic --downsample
For example, via the CLI:
Or, for example, via the Python API: