πŸ’» Π‘8: Π’Ρ€Π°Π½ΡΠ»Π°Ρ†ΠΈΡ˜Π° Π½Π° сСквСнца¢

Опис Π½Π° Π±Π°Ρ€Π°ΡšΠ΅

Од ΠΏΡ€ΠΎΠΈΠ·Π²ΠΎΠ»Π½ΠΈΠΎΡ‚ CDS ΡˆΡ‚ΠΎ Π³ΠΎ ΠΈΠ·Π±Ρ€Π°Π²Ρ‚Π΅ Π΄ΠΎΠ±ΠΈΠ²Ρ‚Π΅ стоп/старт ΠΊΠΎΠ΄ΠΎΠ½ΠΈ. НаправСтС Ρ‚Ρ€Π°Π½ΡΠ»Π°Ρ†ΠΈΡ˜Π° Π·Π° Π΄Π° Π³ΠΈ Π΄ΠΎΠ±ΠΈΠ΅Ρ‚Π΅ ситС ΠΌΠΎΠΆΠ½ΠΈ протСински сСквСнции.

from Bio import SeqIO
from Bio.Data import CodonTable

gene_record = SeqIO.read("yersinia-pestis-fasta/NC_005816.gb", "genbank")
mito_table = CodonTable.unambiguous_rna_by_id[11]

# Π§ΠΈΡ‚Π°ΡšΠ΅ Π½Π° 'pim'
sub_record = gene_record[4342:4780]
# Π’Ρ€Π°Π½ΡΠΊΡ€ΠΈΠΏΡ†ΠΈΡ˜Π° Π½Π° 'pim'
sub_rna = sub_record.seq.transcribe()
# Листи Π·Π° ΠΏΠΎΠ·ΠΈΡ†ΠΈΠΈ Π½Π° старт ΠΈ стоп ΠΊΠΎΠ΄ΠΎΠ½ΠΈ Π·Π° 'pim'
start_codon_positions = []
stop_codon_positions = []
# Листи Π·Π° ΠΈΠΌΠΈΡšΠ°Ρ‚Π° Π½Π° Π½Π°Ρ˜Π΄Π΅Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ Π·Π° 'pim'
start_found = []
stop_found = []
# ΠšΠΎΠ½Π²Π΅Ρ€Π·ΠΈΡ˜Π° Π²ΠΎ стринг, Π·Π° полСсна Ρ€Π°Π±ΠΎΡ‚Π°
rna_string = str(sub_rna)

"""
ИдСја Π½Π° ΠΏΡ€Π΅Π±Π°Ρ€ΡƒΠ²Π°ΡšΠ΅: 
- Π—Π°ΠΏΠΎΡ‡Π½ΠΈ ΠΎΠ΄ 0 ΠΈ Π΄Π²ΠΈΠΆΠΈ сС Π΄ΠΎ Ρ†Π΅Π»Π°Ρ‚Π° сСквСнца -2
- Π—Π΅ΠΌΠΈ Π³ΠΎ ΠΌΠΎΠΌΠ΅Π½Ρ‚Π°Π»Π½ΠΈΠΎΡ‚ Ρ‚Ρ€ΠΈΠΏΠ»Π΅Ρ‚ 
- ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΈ Π΄Π°Π»ΠΈ Π΅ Π²ΠΎ старт ΠΊΠΎΠ΄ΠΎΠ½ΠΈΡ‚Π΅ -> Ако Π΅ Π·Π°Ρ‡ΡƒΠ²Π°Ρ˜ Ρ‚ΠΎΡ€ΠΊΠ° (ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°, Ρ‚Ρ€ΠΈΠΏΠ»Π΅Ρ‚)
- Ако Π½Π΅, ΠΏΡ€ΠΎΠ²Π΅Ρ€ΠΈ Π΄Π°Π»ΠΈ Π΅ Π²ΠΎ стоп ΠΊΠΎΠ΄ΠΎΠ½ΠΈΡ‚Π΅ -> Ако Π΅ Π·Π°Ρ‡ΡƒΠ²Π°Ρ˜ Ρ‚ΠΎΡ€ΠΊΠ° (ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°, Ρ‚Ρ€ΠΈΠΏΠ»Π΅Ρ‚)
- Π—Π³ΠΎΠ»Π΅ΠΌΠΈ Π³ΠΎ Π±Ρ€ΠΎΡ˜Π°Ρ‡ΠΎΡ‚ 
"""

k = 0
while k < len(rna_string)-2:
    # Π΅ΠΊΡΡ‚Ρ€Π°Ρ…ΠΈΡ€Π°Ρ˜ ΠΌΠΎΠΌΠ΅Π½Ρ‚Π°Π»Π½Π° Ρ‚Ρ€ΠΈ-Π½ΡƒΠΊΠ»Π΅ΠΎΡ‚ΠΈΠ΄Π½Π° суб-сСквСнца
    current_codon = rna_string[k:k+3]
    if current_codon in mito_table.start_codons:
        start_found.append((k, current_codon))
    elif current_codon in mito_table.stop_codons:
        stop_found.append((k, current_codon))
    k += 1

Π’ΠΎ ΠΏΡ€Π΅Ρ‚Ρ…ΠΎΠ΄Π½ΠΎΡ‚ΠΎ Π±Π°Ρ€Π°ΡšΠ΅ (πŸ’» Π‘7) Π³ΠΈ Π΄ΠΎΠ·Π·Π½Π°Π²ΠΌΠ΅ ситС 64 старт ΠΊΠΎΠ΄ΠΎΠ½ΠΈ ΠΈ 21 стоп ΠΊΠΎΠ΄ΠΎΠ½ΠΈ. ОвиС Π΄Π²Π΅ Π±Ρ€ΠΎΡ˜ΠΊΠΈ Π½ΠΈ ΡƒΠΊΠ°ΠΆΡƒΠ²Π°Π°Ρ‚ Π½Π° Ρ‚ΠΎΠ°Π΄Π΅ΠΊΠ° ΠΌΠΎΠΆΠ΅ΠΌΠ΅ Π΄Π° ΠΊΠΎΠΌΠ±ΠΈΠ½ΠΈΡ€Π°ΠΌΠ΅ Π½Π° Ρ€Π°Π·Π»ΠΈΡ‡Π½ΠΈ Π½Π°Ρ‡ΠΈΠ½ΠΈ Π·Π° Π΄Π° СкстрахирамС протСински сСквСнци Ρ‚.Π΅. Π΄Π° Π³ΠΈ Π΄ΠΎΠ±ΠΈΠ΅ΠΌΠ΅ соодвСтнитС Π°ΠΌΠΈΠ½ΠΎ-кисСлСни Π·Π° протСинската сСквСнца ΠΎΠ΄ Ρ‚ΠΎΠ° ΠΏΠ°Ρ€Ρ‡Π΅ Π½Π° РНА.

print(f'Π’ΠΊΡƒΠΏΠ΅Π½ Π±Ρ€ΠΎΡ˜ Π½Π° старт ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: {len(start_found)}')
print(f'Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½ со ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°: {start_found}')
print()
print(f'Π’ΠΊΡƒΠΏΠ΅Π½ Π±Ρ€ΠΎΡ˜ Π½Π° стоп ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: {len(stop_found)}')
print(f'Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½ со ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°: {stop_found}')
Π’ΠΊΡƒΠΏΠ΅Π½ Π±Ρ€ΠΎΡ˜ Π½Π° старт ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: 64
Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½ со ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°: [(0, 'AUG'), (12, 'AUG'), (15, 'AUC'), (26, 'AUU'), (29, 'UUG'), (33, 'UUG'), (42, 'AUA'), (44, 'AUU'), (50, 'AUC'), (53, 'AUC'), (58, 'GUG'), (64, 'UUG'), (80, 'AUA'), (82, 'AUA'), (96, 'AUC'), (99, 'UUG'), (113, 'AUU'), (118, 'AUA'), (124, 'UUG'), (130, 'AUU'), (134, 'AUU'), (135, 'UUG'), (142, 'AUG'), (147, 'AUC'), (151, 'AUG'), (190, 'AUA'), (192, 'AUU'), (216, 'AUU'), (217, 'UUG'), (225, 'AUC'), (229, 'AUA'), (242, 'AUG'), (246, 'AUG'), (249, 'AUA'), (251, 'AUC'), (254, 'AUU'), (261, 'AUA'), (270, 'AUU'), (280, 'AUG'), (286, 'AUA'), (289, 'CUG'), (293, 'CUG'), (298, 'UUG'), (304, 'AUU'), (315, 'AUA'), (329, 'AUA'), (331, 'AUG'), (339, 'AUA'), (343, 'AUC'), (354, 'AUA'), (368, 'AUC'), (370, 'CUG'), (381, 'AUA'), (387, 'GUG'), (396, 'AUA'), (404, 'AUC'), (408, 'AUC'), (412, 'CUG'), (416, 'UUG'), (421, 'CUG'), (425, 'AUA'), (427, 'AUU'), (430, 'AUC'), (433, 'AUU')]

Π’ΠΊΡƒΠΏΠ΅Π½ Π±Ρ€ΠΎΡ˜ Π½Π° стоп ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: 21
Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½ со ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°: [(13, 'UGA'), (109, 'UAG'), (115, 'UAA'), (194, 'UAA'), (230, 'UAA'), (247, 'UGA'), (256, 'UAA'), (265, 'UAG'), (272, 'UAA'), (281, 'UGA'), (299, 'UGA'), (313, 'UAA'), (316, 'UAA'), (334, 'UAG'), (355, 'UAA'), (364, 'UAA'), (382, 'UAG'), (385, 'UAG'), (388, 'UGA'), (397, 'UAA'), (435, 'UAA')]

Π‘Π΅Π³Π° бидСјќи ΠΈΠΌΠ° Π³ΠΎΠ»Π΅ΠΌ Π±Ρ€ΠΎΡ˜ Π½Π° ΠΊΠΎΠΌΠ±ΠΈΠ½Π°Ρ†ΠΈΠΈ ќС ΠΈΠ·Π±Π΅Ρ€Π°ΠΌ Π½Π΅ΠΊΠΎΠ»ΠΊΡƒ (3-4) Π·Π° Π΄Π° дСмонстрирам ΠΊΠ°ΠΊΠΎ ΠΈΠ·Π³Π»Π΅Π΄Π° процСсот Π½Π° Ρ‚Ρ€Π°Π½ΡΠ»Π°Ρ†ΠΈΡ˜Π° ΠΎΠ΄ Ρ‚Π°Π° РНА-субсСквСнца, која:

  • ΠΏΠΎΡ‡Π½ΡƒΠ²Π° Π½Π° ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°Ρ‚Π° Π½Π° ΠΏΡ€Π²Π°Ρ‚Π° Π±Π°Π·Π° ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΠΎΡ‚ старт ΠΊΠΎΠ΄ΠΎΠ½

  • Π·Π°Π²Ρ€ΡˆΡƒΠ²Π° Π½Π° ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π°Ρ‚Π° Π½Π° послСдната Π±Π°Π·Π° ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΠΎΡ‚ стоп ΠΊΠΎΠ΄ΠΎΠ½ (ΠΏΠΎΠ·ΠΈΡ†ΠΈΡ˜Π° + 2; Π·Π° послСднитС 2 Π±Π°Π·ΠΈ ΠΎΠ΄ стоп ΠΊΠΎΠ΄ΠΎΠ½ΠΎΡ‚)

Π’Ρ€Π°ΡΠ½Π»Π°Ρ†ΠΈΡ˜Π° 1ΒΆ

print(f"Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: {start_found[0]}")
print(f"Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: {stop_found[0]}")
read_seq1 = sub_rna[start_found[0][0]:stop_found[0][0]+2]
print(f'Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: {read_seq1}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(read_seq1)}')
Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: (0, 'AUG')
Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: (13, 'UGA')
Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: AUGGGAGGGGGAAUG
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 15

ИзлСз ΠΏΠΎ ΠΏΡ€Π΅Π²Π΅Π΄ΡƒΠ²Π°ΡšΠ΅:

protein_seq1 = read_seq1.translate(table='Bacterial')
print(f'ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 1:\n{protein_seq1}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(protein_seq1)}')
ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 1:
MGGGM
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 5

Π’Ρ€Π°ΡΠ½Π»Π°Ρ†ΠΈΡ˜Π° 2ΒΆ

print(f"Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: {start_found[2]}")
print(f"Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: {stop_found[13]}")
read_seq2 = sub_rna[start_found[2][0]:stop_found[12][0]+2]
print(f'Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: {read_seq2}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(read_seq2)}')
Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: (15, 'AUC')
Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: (334, 'UAG')
Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: AUCUCAAAGUUAUUUUGCUUGGCUCUCAUAUUUUUAUCAUCAAGUGGCCUUGCAGAAAAAAACACAUAUACAGCAAAAGACAUCUUGCAAAACCUAGAAUUAAAUACCUUUGGCAAUUCAUUGUCUCAUGGCAUCUAUGGGAAACAGACAACCUUCAAGCAAACCGAGUUUACAAAUAUUAAAAGCAACACCAAAAAACACAUUGCACUUAUCAAUAAAGACAACUCAUGGAUGAUAUCAUUAAAAAUACUAGGAAUUAAGAGAGAUGAGUAUACUGUCUGUUUUGAAGAUUUCUCUCUAAUA
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 303

ИзлСз ΠΏΠΎ ΠΏΡ€Π΅Π²Π΅Π΄ΡƒΠ²Π°ΡšΠ΅:

protein_seq2 = read_seq2.translate(table='Bacterial')
print(f'ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 2:\n{protein_seq2}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(protein_seq2)}')
ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 2:
ISKLFCLALIFLSSSGLAEKNTYTAKDILQNLELNTFGNSLSHGIYGKQTTFKQTEFTNIKSNTKKHIALINKDNSWMISLKILGIKRDEYTVCFEDFSLI
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 101

Π’Ρ€Π°ΡΠ½Π»Π°Ρ†ΠΈΡ˜Π° 3ΒΆ

print(f"Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: {start_found[34]}")
print(f"Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: {stop_found[20]}")
read_seq3 = sub_rna[start_found[34][0]:stop_found[20][0]+2]
print(f'Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: {read_seq3}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(read_seq3)}')
Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: (251, 'AUC')
Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: (435, 'UAA')
Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: AUCAUUAAAAAUACUAGGAAUUAAGAGAGAUGAGUAUACUGUCUGUUUUGAAGAUUUCUCUCUAAUAAGACCGCCAACAUAUGUAGCCAUACAUCCUCUACUUAUAAAAAAAGUAAAAUCUGGAAACUUUAUAGUAGUGAAAGAAAUAAAGAAAUCUAUCCCUGGUUGCACUGUAUAUUAUCAUUA
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 186

ИзлСз ΠΏΠΎ ΠΏΡ€Π΅Π²Π΅Π΄ΡƒΠ²Π°ΡšΠ΅:

protein_seq3 = read_seq3.translate(table='Bacterial')
print(f'ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 3:\n{protein_seq3}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(protein_seq3)}')
ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 3:
IIKNTRN*ER*VYCLF*RFLSNKTANICSHTSSTYKKSKIWKLYSSERNKEIYPWLHCILSL
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 62

Π’Ρ€Π°Π½ΡΠ»Π°Ρ†ΠΈΡ˜Π° 4ΒΆ

print(f"Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: {start_found[0]}")
print(f"Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: {stop_found[-1]}")
read_seq4 = sub_rna[start_found[0][0]:stop_found[20][0]+2]
print(f'Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: {read_seq4}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(read_seq4)}')
Π‘Ρ‚Π°Ρ€Ρ‚ ΠΊΠΎΠ΄ΠΎΠ½: (0, 'AUG')
Π‘Ρ‚ΠΎΠΏ ΠΊΠΎΠ΄ΠΎΠ½: (435, 'UAA')
Екстрахирана сСквСнца ΠΎΠ΄ ΠΈΠ·Π±Ρ€Π°Π½ΠΈΡ‚Π΅ ΠΊΠΎΠ΄ΠΎΠ½ΠΈ: AUGGGAGGGGGAAUGAUCUCAAAGUUAUUUUGCUUGGCUCUCAUAUUUUUAUCAUCAAGUGGCCUUGCAGAAAAAAACACAUAUACAGCAAAAGACAUCUUGCAAAACCUAGAAUUAAAUACCUUUGGCAAUUCAUUGUCUCAUGGCAUCUAUGGGAAACAGACAACCUUCAAGCAAACCGAGUUUACAAAUAUUAAAAGCAACACCAAAAAACACAUUGCACUUAUCAAUAAAGACAACUCAUGGAUGAUAUCAUUAAAAAUACUAGGAAUUAAGAGAGAUGAGUAUACUGUCUGUUUUGAAGAUUUCUCUCUAAUAAGACCGCCAACAUAUGUAGCCAUACAUCCUCUACUUAUAAAAAAAGUAAAAUCUGGAAACUUUAUAGUAGUGAAAGAAAUAAAGAAAUCUAUCCCUGGUUGCACUGUAUAUUAUCAUUA
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 437

ИзлСз ΠΏΠΎ ΠΏΡ€Π΅Π²Π΅Π΄ΡƒΠ²Π°ΡšΠ΅:

protein_seq4 = read_seq4.translate(table='Bacterial')
print(f'ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 4:\n{protein_seq4}')
print(f'Π”ΠΎΠ»ΠΆΠΈΠ½Π°: {len(protein_seq4)}')
ΠŸΡ€ΠΎΡ‚Π΅ΠΈΠ½ΡΠΊΠ° сСквСнца 4:
MGGGMISKLFCLALIFLSSSGLAEKNTYTAKDILQNLELNTFGNSLSHGIYGKQTTFKQTEFTNIKSNTKKHIALINKDNSWMISLKILGIKRDEYTVCFEDFSLIRPPTYVAIHPLLIKKVKSGNFIVVKEIKKSIPGCTVYYH
Π”ΠΎΠ»ΠΆΠΈΠ½Π°: 145