Entering edit mode
2.4 years ago
melissachua90
▴
70
I want to isolate VEGF-related and HSP-related genes for microarray time-series analysis. I'm not sure if my code is correct or whether I'm missing some genes.
coding_gene_df = df[~df.index.str.contains("---")] # Assume that genes must have Gene Symbols; remove rows without gene symbols
hsp_df = coding_gene_df[coding_gene_df["mrna_assignment"].str.contains("Hsp")]
vegf_df = coding_gene_df[coding_gene_df["mrna_assignment"].str.contains("VEGF")]
Original df
:
df.head().to_dict()
{'0_P1_T1_TimeC1_PIDC4_Non-Survivor': {'---': '4.43'},
'0_P2_T1_TimeC2_PIDC2_Survivor': {'---': '4.17'},
'0_P3_T1_TimeC1_PIDC1_Survivor': {'---': '3.88'},
'0_P4_T1_TimeC1_PIDC1_Survivor': {'---': '4.4'},
'0_P5_T1_TimeC4_PIDC3_Survivor': {'---': '3.94'},
'12_P1_T4_TimeC2_PIDC4_Non-Survivor': {'---': '4.56'},
'12_P2_T4_TimeC3_PIDC2_Survivor': {'---': '3.98'},
'12_P3_T4_TimeC2_PIDC1_Survivor': {'---': '4.17'},
'12_P4_T4_TimeC2_PIDC2_Survivor': {'---': '4.63'},
'12_P5_T4_TimeC2_PIDC3_Survivor': {'---': '4.03'},
'24_P1_T5_TimeC4_PIDC4_Non-Survivor': {'---': '4.78'},
'24_P2_T5_TimeC3_PIDC2_Survivor': {'---': '3.26'},
'24_P3_T5_TimeC3_PIDC1_Survivor': {'---': '3.53'},
'24_P4_T5_TimeC3_PIDC2_Survivor': {'---': '4.54'},
'24_P5_T5_TimeC3_PIDC3_Survivor': {'---': '4.31'},
'48_P1_T6_TimeC3_PIDC1_Non-Survivor': {'---': '3.69'},
'48_P2_T6_TimeC3_PIDC3_Survivor': {'---': '3.78'},
'48_P3_T6_TimeC3_PIDC1_Survivor': {'---': '5.14'},
'48_P4_T6_TimeC3_PIDC2_Survivor': {'---': '4.35'},
'48_P5_T6_TimeC3_PIDC3_Survivor': {'---': '4.33'},
'4_P1_T2_TimeC1_PIDC4_Non-Survivor': {'---': '3.47'},
'4_P2_T2_TimeC2_PIDC1_Survivor': {'---': '4.1'},
'4_P3_T2_TimeC2_PIDC1_Survivor': {'---': '4.8'},
'4_P5_T2_TimeC2_PIDC3_Survivor': {'---': '3.63'},
'8_P1_T3_TimeC4_PIDC4_Non-Survivor': {'---': '3.69'},
'8_P2_T3_TimeC2_PIDC2_Survivor': {'---': '3.78'},
'8_P3_T3_TimeC4_PIDC1_Survivor': {'---': '3.99'},
'8_P4_T3_TimeC4_PIDC1_Survivor': {'---': '3.38'},
'8_P5_T3_TimeC4_PIDC3_Survivor': {'---': '4.53'},
'Cytoband': {'---': '---'},
'Entrez Gene': {'---': '---'},
'GO Biological Process': {'---': nan},
'GO Cellular Component': {'---': nan},
'GO Molecular Function': {'---': nan},
'GO_biological_process': {'---': '---'},
'GO_cellular_component': {'---': '---'},
'GO_molecular_function': {'---': '---'},
'Gene Title': {'---': '---'},
'Pathway': {'---': nan},
'Probe ID': {'---': '7892505'},
'Protein Domains': {'---': nan},
'Swiss-Prot': {'---': '---'},
'UniGene': {'---': nan},
'category': {'---': 'normgene->intron'},
'crosshyb_type': {'---': '---'},
'gene_assignment': {'---': '---'},
'mrna_assignment': {'---': '--- // --- // neg_control // --- // --- // --- // --- // --- // ---'},
nan: {'---': nan},
'pathway': {'---': '---'},
'protein_domains': {'---': '---'},
'seqname': {'---': '---'},
'start': {'---': '---'},
'stop': {'---': '---'},
'strand': {'---': '---'},
'swissprot': {'---': '---'},
'total_probes': {'---': '4'},
'unigene': {'---': '---'}}