Hi,
This is my first post here, and I am a newbie to the world of genome sequencing and using the relevant databases, so sorry in advance for this!
Problem statement- I need SNP variants for a specific list of bacterial species and genes. I need location of variant within the gene, plus the wt and mutant variant identity. Bonus if I can get mutation effect information.
Now I don't know if this is impossible or trivial but this is code I was trying to get data from PATRIC:
def fetch_feature_with_variants(feature_id):
"""
Fetch detailed information about a specific feature, including associated variants.
Parameters:
feature_id (str): The feature ID to retrieve.
Returns:
dict: Detailed feature information including variants.
"""
feature_url = f"https://www.patricbrc.org/api/genome_feature/{feature_id}/"
response = requests.get(feature_url, headers={"Accept": "application/json"})
if response.ok:
return response.json()
else:
print(f"Failed to retrieve feature data for feature_id: {feature_id}")
return {}
def fetch_patric_snps_with_effects(genome_name):
snp_base_url = "https://www.patricbrc.org/api/genome_feature/"
headers = {"Accept": "application/json"}
snp_params = {"genome_name": genome_name}
response = requests.get(snp_base_url, params=snp_params, headers=headers)
if response.ok:
all_snp_data = response.json()
else:
print(f"Error retrieving SNP data: {response.status_code} - {response.text}")
return []
enriched_snp_data = []
for snp in all_snp_data:
snp_record = {
"patric_id": snp.get("patric_id"),
"genome_name": snp.get("genome_name"),
"position": snp.get("location"),
"feature_id": snp.get("feature_id"),
"alt_nt": snp.get("alt_nt"),
"ref_nt": snp.get("ref_nt"),
"snp_type": snp.get("snp_type"),
}
feature_id = snp.get("feature_id")
if feature_id:
feature_data = fetch_feature_with_variants(feature_id)
if 'variants' in feature_data:
snp_record['variant_details'] = feature_data['variants']
else:
snp_record['variant_details'] = {}
enriched_snp_data.append(snp_record)
return enriched_snp_data
genome_name = "Mycobacterium Tuberculosis"
snp_with_effects = fetch_patric_snps_with_effects(genome_name)
Example output:
[{'patric_id': None,
'genome_name': 'Mycobacterium tuberculosis 3067CJ',
'position': 'complement(154769..155632)',
'feature_id': 'RefSeq.1438872.3.JKBJ01000023.CDS.154769.155632.rev',
'alt_nt': None,
'ref_nt': None,
'snp_type': None,
'variant_details': {}},
{'patric_id': None,
'genome_name': 'Mycobacterium tuberculosis 2995AB',
'position': 'complement(34785..35084)',
'feature_id': 'RefSeq.1438871.3.JKBE01000037.CDS.34785.35084.rev',
'alt_nt': None,
'ref_nt': None,
'snp_type': None,
'variant_details': {}},
{'patric_id': None,
'genome_name': 'Mycobacterium tuberculosis 1010SM',
'position': 'complement(87712..88197)',
'feature_id': 'RefSeq.1438833.3.JKAO01000022.CDS.87712.88197.rev',
'alt_nt': None,
'ref_nt': None,
'snp_type': None,
'variant_details': {}},
I have tried a few different bacteria but the variant details are empty. I would appreciate any advice on this approach or a better alternative approach to get this information.
Thank you,
Jen
May be easier to use the command line utility https://www.bv-brc.org/docs/cli_tutorial/index.html ?