Entering edit mode
6.7 years ago
tfhahn
▴
50
I need help to cut down on computation time. I have been trying in vain for the past 10 hours to use only 7 instead of 63 decimals. I keep getting the error messages that there is nothing to plot. From my limited understanding of Python 2.7, I believe that the plotting function is expecting float64 instead of float8. I have copy-pasted my Python 2.7 code below. Can somebody please change it so that it will plot float8 time series trajectories? Thanks a lot in advance.
from __future__ import print_function
import os
import timeit
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import gpl
import conf.settings
from util import count_samples
from correlation import CorrelationMatrix
class ExpressionMatrix(object):
def __init__(self, platform=None, series=None, invert=False, limit=0,
top=10, **kwargs):
data_path = conf.settings.DATA_PATH
self.sample_number = 0
self.invert = invert
self.top = top
if series:
file_path = os.path.join(data_path, series+'.csv')
self.df = pd.read_csv(file_path, index_col=0)
sample_number = count_samples(self.df)
#print(self.df)
print(self.df.dtypes)
print(self.df.shape)
self.df.iloc[:,:sample_number] = self.df.iloc[:,:sample_number].astype('float32')
printself.df.info)
elif platform:
count = 0
platform = gpl.Platform(args.platform, parse=False, meta_only=True)
series = platform.get_series(download=False)
for index, dataset in enumerate(series):
file_path = os.path.join(data_path, dataset+'.csv')
if not os.path.exists(file_path):
file_path = os.path.join(data_path, dataset+'.tar.csv')
if not os.path.exists(file_path):
continue
df = pd.read_csv(file_path, index_col=0)
count += 1
sample_number = count_samples(df)
expression_matrix = df.iloc[:,:sample_number]
if count == 1:
matrix = expression_matrix
else:
matrix = pd.concat([matrix, expression_matrix], axis=1)
print('Concated matrix: %s' % dataset, matrix.shape)
if limit:
if count > limit:
break
annotations = df.iloc[:,sample_number:]
self.df = pd.concat([matrix, annotations], axis=1)
self.sample_number = count_samples(self.df)
for key, value in kwargs.items():
setattr(self, key, value)
if self.unlog:
self.df.iloc[:,:sample_number] = 2**self.df.iloc[:,:sample_number]
def correlations(self):
return CorrelationMatrix(self)
def main(args):
expressions = ExpressionMatrix(**vars(args))
if args.load:
correlations = CorrelationMatrix(expressions, calc=False)
correlations.load()
else:
correlations = expressions.correlations()
if args.save:
correlations.save()
print(correlations.df.shape)
print(args.similarity)
times = []
if args.choices:
for i in range(args.trials):
start_time = timeit.default_timer()
correlations.correlate(args.choices)
stop_time = timeit.default_timer()
difference = stop_time - start_time
times.append(difference)
print('Average duration: ', sum(times)/len(times))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--series', '-s', type=str)
parser.add_argument('--platform', '-p', type=str)
parser.add_argument('--invert', '-i', action='store_true')
parser.add_argument('--choices', '-c', type=str, nargs='+', default='')
parser.add_argument('--limit', '-l', type=int, default=0)
parser.add_argument('--top', '-t', type=int, default=10)
parser.add_argument('--similarity', '-sim', type=str, default='pearson',
help='''Method of similarity measure which can be either pearson, kendall, spearman (default: pearson).''')
parser.add_argument('--trials', '-tr', type=int, default=1)
parser.add_argument('--plot', '-plt', action='store_true')
parser.add_argument('--unlog', '-ul', action='store_true')
parser.add_argument('--save', '-sa', action='store_true')
parser.add_argument('--load', '-lo', action='store_true')
args = parser.parse_args()
main(args)
First, I'm not aware of a
float8
datatype in numpy. Second, in a minimal example, I wasn't able to recreate the issue withfloat16
:Maybe it might be worth adding some example input data for others to test with.
Why would you want to reduce the float size? As far as I'm aware there's no performance difference using 64 bit floating point numbers other than the amount of memory required to store them. All reducing your float size will do is increase your rounding errors.
If you computation is too slow, the problem is more likely the algorithm or the size of the dataset.