Solution: Count Amino Acids
Generate random DNA sequence
examples/dictionary/generate_dna.py
import sys import random if len(sys.argv) != 2: exit("Need a number") count = int(sys.argv[1]) dna = [] for _ in range(count): dna.append(random.choice(['A', 'C', 'T', 'G'])) print(''.join(dna))
examples/dictionary/count_amino_acids.py
dna = 'CACCCATGAGATGTCTTAACGCTGCTTTCATTATAGCCG' aa_by_codon = { 'ACG' : '?', 'CAC' : 'Histidin', 'CAU' : 'Histidin', 'CCA' : 'Proline', 'CCG' : 'Proline', 'GAT' : '?', 'GTC' : '?', 'TGA' : '?', 'TTA' : '?', 'CTG' : '?', 'CTT' : '?', 'TCA' : '?', 'TAG' : '?', #... } count = {} for i in range(0, len(dna)-2, 3): codon = dna[i:i+3] #print(codon) aa = aa_by_codon[codon] if aa not in count: count[aa] = 0 count[aa] += 1 for aa in sorted(count.keys()): print("{} {}".format(aa, count[aa]))
examples/dictionary/amino_acid_counter.py
seq = input('Type your DNA sequence here: ').upper() codon_table = { 'Phe' : ['TTT', 'TTC'], 'Leu' : ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'], 'Ile' : ['ATT', 'ATC', 'ATA'], 'Met' : ['ATG'], 'Val' : ['GTT', 'GTC', 'GTA', 'GTG'], 'Ser' : ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'], 'Pro' : ['CCT', 'CCC', 'CCA', 'CCG'], 'Thr' : ['ACT', 'ACC', 'ACA', 'ACG'], 'Ala' : ['GCT', 'GCC', 'GCA', 'GCG'], 'Tyr' : ['TAT', 'TAC'], 'His' : ['CAT', 'CAC'], 'Gln' : ['CAA', 'CAG'], 'Asn' : ['AAT', 'AAC'], 'Lys' : ['AAA', 'AAG'], 'Asp' : ['GAT', 'GAC'], 'Glu' : ['GAA', 'GAG'], 'Cys' : ['TGT', 'TGC'], 'Trp' : ['TGG'], 'Arg' : ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], 'Gly' : ['GGT', 'GGC', 'GGA', 'GGG'], 'STOP' : ['TAA', 'TAG', 'TGA'] } amino_acids = [] counter = {} protein_sequence = [] while seq: amino_acids.append(seq[:3]) seq = seq[3:] for codon in amino_acids: if len(codon) < 3: print('The remaining bases: {} are not coding for an amino acid'.format(codon)) for aa in codon_table: if codon in codon_table[aa]: if aa in counter: counter[aa] += 1 else: counter[aa] = 1 protein_sequence.append(aa) break print(''.join(protein_sequence)) ordered = sorted(counter.keys()) for aa in ordered: print('{} {} - {:>5.2f} %'.format(aa, counter[aa], counter[aa]/len(protein_sequence)*100))