Create Excel file for experiment with random data
Input is an Excel file with the following columns:
genome name, c1, c2, c3, c4, c5, c6
- c1-c3 are numbers of cond1
- c4-c6 are numbers of cond2
We would like to filter to the lines that fulfill the following equations:
log2(avg(1-3) / avg(4-6)) > limit other_limit > p.value( )
examples/pandas/genome_create_excel.py
import numpy as np import pandas as pd import datetime import sys if len(sys.argv) < 2: exit("Need number of rows") rows_num = int(sys.argv[1]) cols_num = 6 start_time = datetime.datetime.now() matrix = np.random.rand(rows_num, cols_num) #print(matrix) genome_names = list(map(lambda i: f'g{i}', range(rows_num))) column_names = list(map(lambda i: f'm{i}', range(cols_num))) df = pd.DataFrame(matrix, index=genome_names, columns=column_names) df.index.name = 'genome name' print(df.head()) end_generate_time = datetime.datetime.now() print(end_generate_time - start_time) df.to_excel('raw_data.xlsx') end_save_time = datetime.datetime.now() print(end_save_time - end_generate_time)
m0 m1 m2 m3 m4 m5 genome name g0 0.775167 0.120102 0.813921 0.284670 0.074309 0.978062 g1 0.449572 0.556647 0.851609 0.711773 0.052198 0.543029 g2 0.592324 0.350038 0.273521 0.248995 0.773113 0.998779 0:00:00.007476 0:00:00.140074