In python you read the files as binary files ....
for the tile metrics:
bit_len = os.path.getsize(tile_file) * 8
fh = open(tile_file, 'rb')
file_ver = int.from_bytes(fh.read(1), ENDIAN) # version number == "2"
recordlen = int.from_bytes(fh.read(1), ENDIAN) # length of each record == 10 (for TileMetrics)
if file_ver == 2:
fh.pos = 16 # skip the above bytes which are invariant for this
elif file_ver == 3:
fh.pos = 48
else:
print("- error: unhandled file version: %s" % file_ver)
sys.exit(2)
lane_density = 0 # 100
lane_density_pf = 0 # 101
cluster_cnt = 0 # 102
cluster_pf_cnt = 0 # 103
align_cnt = 0 # 300+
align = 0 # 300+
#read records bytewise per specs in technote_rta_theory_operations.pdf from ILMN
r = int((bit_len - 16) / (recordlen * 8))
for i in range(0, r):
if file_ver == 2:
#2 bytes: lane number (uint16)
#2 bytes: tile number (uint16)
#2 bytes: code (uint16)
#4 bytes: value (float32)
lane = int.from_bytes(fh.read(2), ENDIAN)
tile = int.from_bytes(fh.read(2), ENDIAN)
metric = int.from_bytes(fh.read(2), ENDIAN)
val = struct.unpack("f", fh.read(4))[0] # 4
# 100 = cluster denisty
# 101 = cluster denisty passing filters
# 102 = number of clusters
# 103 = clusters passing filters
#if metric == 102 or metric == 103:
#if metric == 1001:
#if metric >= 300 and metric < 399:
if target_lane == 0 or lane == target_lane:
#if metric == 1003:
# pass
# #print "Lane: %s, tile: %s, metric: %s, val: %s" % (lane, tile, metric, val)
if metric == 100:
lane_density += val
tile_cnt += 1
elif metric == 101:
lane_density_pf += val
elif metric == 102:
cluster_cnt += val
elif metric == 103:
cluster_pf_cnt += val
elif metric >= 300 and metric < 399:
align_cnt += 1
align += val
elif file_ver == 3:
#2 bytes: lane number (uint16)
#4 bytes: tile number (uint32)
#1 byte: code (char)
#if code == 't' 74 in hex, 116 ascii
# 4 bytes: cluster count (float32)
# 4 bytes: pf cluster count (float32)
#if code == 'r' 72 in hex, 114 ascii
# 4 bytes: read number (uint32)
# 4 bytes: percent aligned (float32)
#0400 (lane/16) 8e060000 (tile/32) 72 (code/8)
lane = int.from_bytes(fh.read(2), ENDIAN)
tile = int.from_bytes(fh.read(4), ENDIAN) # 1678 = 068e
code = int.from_bytes(fh.read(1), ENDIAN)
if code == 116: # t, hex = 74
if report_lane:
cluster_cnt += struct.unpack("f", fh.read(4))[0] # 4
cluster_pf_cnt += struct.unpack("f", fh.read(4))[0] # 4
tile_cnt += 1
else:
_ = struct.unpack("f", fh.read(4))[0] # 4
_ = struct.unpack("f", fh.read(4))[0] # 4
elif code == 114: # r, hex = 72
# 04000000 0000c07f
# read 1673, lane 4 = 0x0689 - 8906 0000
#0000 0029 58b5 3e = 0.354188233614
read_num = int.from_bytes(fh.read(4), ENDIAN) # 1 or 4? - not used
a = struct.unpack("f", fh.read(4))[0] # 4 # nan for most values
#print "- a = %s" % a
if a > 0:
align_cnt += 1
align += a
fh.close()
In fact those commands work, though they are shell commands. Or do they have anything to do with the python module that I installed (interop)? Also, the synthax for calling
interop_summary
, for instance, is:interop_summary run_folder > path/to/my/output
(sourceinterop_summary -help
)Just fixed my typo in example - thanks. Yes those are shell command as alternative to parsing SAV data.
great, thanks very much, that does exactly what I wanted.
Glad to help you (I was working on the same task last week :-))!!!
At work we have another server in which anaconda is not installed, so that we installed the interop library with
pip install interop
. Although we managed to import the interop module within python, the same shell commands that you showed (and did the job for me) could not be called from the shell. Any idea of how/ if I can get the shell commands (e.g.interop_summary
) withoutconda install -c bioconda illumina-interop
, or only withpip install
? Thanks again