![]() |
Python¥Ð¥¤¥ª/¥Ä¡¼¥ë/ÇÛÎó¤ÎÀڽФ·¡¦²Ã¹©https://pepper.is.sci.toho-u.ac.jp:443/pepper/index.php?Python%A5%D0%A5%A4%A5%AA%2F%A5%C4%A1%BC%A5%EB%2F%C7%DB%CE%F3%A4%CE%C0%DA%BD%D0%A4%B7%A1%A6%B2%C3%B9%A9 |
![]() |
Python¥Ð¥¤¥ª?¡¡Python¥Ð¥¤¥ª/¥Ä¡¼¥ë?
4180¡¡¡¡¡¡2019-02-21 (ÌÚ) 16:56:44
Seq·¿¤ÏDNA¡¢RNA¡¢¥¿¥ó¥Ñ¥¯¤Ê¤É¤ÎÇÛÎó¤ò°·¤¦¤Î¤ËÊØÍø¤Ê·¿¤Ç¤¹¡£¤Þ¤º¤ÏÎã¤ò¸«¤Æ¤ß¤Þ¤·¤ç¤¦¡£
from Bio.Alphabet import IUPAC from Bio.Seq import Seq dna_seq = Seq('ATGAAACGCATTAGCACCACC', IUPAC.ambiguous_dna) dna_seq ## Seq('ATGAAACGCATTAGCACCACC', IUPACAmbiguousDNA()) rna_seq = Seq('AUGAAACGCAUUAGCACCACC', IUPAC.ambiguous_rna) rna_seq ## Seq('AUGAAACGCAUUAGCACCACC', IUPACAmbiguousRNA()) aa_seq = Seq("MKRISTT", IUPAC.protein) aa_seq ## Seq('MKRISTT', IUPACProtein())
Seq·¿¤Î¥Ç¡¼¥¿¤òºî¤ë¤Ë¤Ï¡¢Â裱°ú¿ô¤ËÇÛÎó¤òɽ¤¹Ê¸»úÎó¡¢Â裲°ú¿ô¤ËÇÛÎó¤Î¥¿¥¤¥×¤ò»ØÄꤷ¤Þ¤¹¡£ÇÛÎó¤Î¥¿¥¤¥×¤Ï¥â¥¸¥å¡¼¥ëIUPAC¤ÎÀâÌÀ¤Ë¤¢¤ë¤è¤¦¤Ë¡¢´ðËÜŪ¤ËDNA¡¢RNA¡¢¥¿¥ó¥Ñ¥¯¤Î£³¼ïÎà¤Ç¡¢Ambiguous¡Ê¥¢¥ë¥Õ¥¡¥Ù¥Ã¥È¤Ê¤é²¿¤Ç¤â²Ä¡Ë¤«Unambiguous¡ÊGATC¤Î¤ß¡Ë¤«¤¬»ØÄê¤Ç¤¤Þ¤¹¡£Ã¢¤·Seq·¿ÊÑ¿ô¤òºî¤ë¤È¤¤Ë¥¢¥ë¥Õ¥¡¥Ù¥Ã¥È¤ò¥Á¥§¥Ã¥¯¤·¤Æ¤¤¤ë¤Î¤Ç¤Ï¤Ê¤¯¡¢ÊÌÅÓ¥×¥í¥°¥é¥à¤Ç¥Á¥§¥Ã¥¯¤¹¤ë»ÅÁȤߤ¬ÍѰդµ¤ì¤Æ¤¤¤Þ¤¹¡£
from Bio.Alphabet import _verify_alphabet wrong_seq = Seq('PQR', IUPAC.unambiguous_dna) _verify_alphabet(wrong_seq) ## False
¼ê¸µ¥·¥¹¥Æ¥à¤ËÇÛÎó¥Õ¥¡¥¤¥ë¤¬¤¢¤ë¾ì¹ç¤Ï¥Õ¥¡¥¤¥ëÆÉ¤ß½Ð¤·¤Ë¤è¤Ã¤Æ¼è¤ê¹þ¤ó¤À¤Î¤Á¡¢ÀÚ¤ê½Ð¤·¤ÎÁàºî¤ò¤¹¤ë¡£
Fasta·Á¼°»²¾È: ¤Î¥Õ¥¡¥¤¥ë¤òÆÉ¤ß¹þ¤à¤Ë¤Ï¡¢²¼µ¤Î¤è¤¦¤ËSeqIO.parse¤¬ÊØÍø¤Ç¤¢¤ë¡£
from Bio import SeqIO for seq_record in SeqIO.parse("ls_orchid.fasta", "fasta"): print(seq_record.id) print(repr(seq_record.seq)) print(len(seq_record))
¤³¤ÎÎã¤Ç¤ÏFasta¥Õ¥¡¥¤¥ëÃæ¤ËÊ£¿ô¤ÎÇÛÎó¥Ç¡¼¥¿¤¬´Þ¤Þ¤ì¤Æ¤¤¤ë¤Î¤Ç¡¢for¥ë¡¼¥×¤Ë¤è¤Ã¤Æ£±¤Ä¤º¤Ä¸«¤Æ¡¢¤½¤ÎidÉôʬ¡¢ÇÛÎó¼«ÂΡ¢ÇÛÎó¤ÎŤµ¤òɽ¼¨¤·¤Æ¤¤¤ë¡£
Ê£¿ô¤ÎÇÛÎó¤ò´Þ¤àFasta¥Õ¥¡¥¤¥ë¤«¤ékÈÖÌܤÎÇÛÎó¤ò¼è½Ð¤¹¤È¤¤Ï¡¢¼¡¤Î¤è¤¦¤Ëlist´Ø¿ô¤ò»È¤Ã¤Æ°ìö¥ê¥¹¥È¤Ë¤·¤¿¤Î¤Á¡¢kÈÖÌܡʤ³¤³¤Ç¤Ï2ÈÖÌܡˤÎÇÛÎó¤òseq.seq¤È¤·¤Æ¼è½Ð¤¹¡£seq.seq¤È¤·¤Æ¼è½Ð¤·¤¿¤â¤Î¤Ï¡¢Python¤Î¥ê¥¹¥È¡Êʸ»úÎó¡Ë¤ÈƱÍͤˡ¢¥¤¥ó¥Ç¥Ã¥¯¥¹¤ä¥¤¥ó¥Ç¥Ã¥¯¥¹ÈϰϤò»È¤Ã¤ÆÇÛÎó¤Î°ìÉô¤òÀÚ¤ê½Ð¤¹¤³¤È¤¬¤Ç¤¤ë¡£
from Bio import SeqIO seq_records = list(SeqIO.parse("ls_orchid.fasta", "fasta")) seq = seq_records[2] print(seq.seq) print(seq.seq[3:6])
Python¤Îʸ»úÎó¤Î°·¤¤¤¬»È¤¨¤ë¡£¤¿¤È¤¨¤Ð¤µ¤Þ¤¶¤Þ¤Ê¥¹¥é¥¤¥·¥ó¥°¡¢£²¤Ä¤ÎÇÛÎó¤Î·ë¹ç
seq[::-1]
ʸ»úÎó¤È¸«¤Ê¤·¤Æ¡¢Âçʸ»ú¡¦¾®Ê¸»ú¤òÊÑ´¹¤¹¤ëupper¤älower¤¬»È¤¨¤ë¡£
¸ø¶¦¥Ç¡¼¥¿¥Ù¡¼¥¹¤«¤éÇÛÎó¤ò¼è¤ê¹þ¤à¾ì¹ç¤Ï¡¢¼è¹þ¤ß¤Î¼ê½ç¤Ë¤è¤Ã¤Æ¼è¤ê¹þ¤ó¤À¤Î¤Á¡¢ÀÚ¤ê½Ð¤·¤ÎÁàºî¤ò¤¹¤ë¡£
Seq·¿¤ÏÊѹ¹¤¬¤Ç¤¤Ê¤¤·¿¡ÊPython¤Çimmutable¤È¸Æ¤Ð¤ì¤ë·¿¡Ë¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£¤¿¤È¤¨¤Ð¡¢
from Bio.Seq import Seq from Bio.Alphabet import IUPAC my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous_dna) my_seq[5] = 'G'
¤È¤¹¤ë¤È¡¢
TypeError: 'Seq' object does not support item assignment
¤Î¤è¤¦¤Ê¥¨¥é¡¼¤È¤Ê¤ë¡£
ÇÛÎó¤ò½ñ¤ÊѤ¨¤¿¤¤¾ì¹ç¡¢Êѹ¹²Äǽ¤Ê·¿¡Êmutable sequence, MutableSeq¥ª¥Ö¥¸¥§¥¯¥È¡Ë¤ËÊѹ¹¤¹¤ë¤«¡¢¤µ¤â¤Ê¤±¤ì¤Ð°ìöÆâÍÆ¤òʸ»úÎó¤ËÆÉ¤ß½Ð¤·¤Æ¤½¤ì¤òʸ»úÎó¤È¤·¤Æ²Ã¹©¤·¤¿¾å¤ÇºÆÅÙSeq·¿¤Ëºî¤ëɬÍפ¬¤¢¤ë¡£
Êѹ¹²Äǽ¤ÊMutableSeq·¿¤ËÊÑ´¹¤¹¤ë¤Ë¤Ï¡¢Seq·¿¤Î¥ª¥Ö¥¸¥§¥¯¥È¤òtomutable()¤ÇMutableSeq¥ª¥Ö¥¸¥§¥¯¥È¤ËÊÑ´¹¤¹¤ë¤«¡¢¤Þ¤¿¤ÏľÀܤËMutableSeq¥ª¥Ö¥¸¥§¥¯¥È¤È¤·¤ÆÀ¸À®¤¹¤ë¡£
mut_seq = my_seq.tomutable() mut_seq ### MutableSeq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA())
¤Þ¤¿¤ÏľÀܤËMutableSeq¥ª¥Ö¥¸¥§¥¯¥È¤È¤·¤ÆÀ¸À®¤¹¤ë¡£
from Bio.Seq import MutableSeq from Bio.Alphabet import IUPAC mut_seq = MutableSeq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPAC.unambiguous_dna) mut_seq ### MutableSeq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA())
¤³¤Î¤è¤¦¤Ë¤·¤ÆÆÀ¤é¤ì¤¿MutatlbeSeq¥ª¥Ö¥¸¥§¥¯¥È¤Ï¡¢Í×ÁǤÎÊѹ¹¤ò¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
mut_seq[5] = 'G' mut_seq ### MutableSeq('GATCGGTGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA())
¤Ç¤ÏÂ裶±ö´ð¤¬A¤«¤éG¤Ë½ñ¤´¹¤ï¤Ã¤Æ¤¤¤ë¡£¤Þ¤¿
mut_seq.remove('T') mut_seq ### MutableSeq('GACGGTGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA())
¤Î¤è¤¦¤Ë¤¹¤ë¤ÈºÇ½é¤Ë½Ð¸½¤¹¤ëT¤¬¼è¤ê½ü¤«¤ì¤ë¡£
¤Ê¤ª¡¢MutableSeq¤«¤éµÕ¤ËÊѹ¹¤Ç¤¤Ê¤¤Seq·¿¤ËÌ᤹¤Ë¤Ïtoseq()¤òÍѤ¤¤Æ
new_seq = mut_seq.toseq() new_seq ### Seq('GACGGTGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA())
¤È¤¹¤ì¤Ð¤è¤¤¡£
¤Ê¤ª¡¢Àè½Ò¤·¤¿¤è¤¦¤Ëstr¤òÍѤ¤¤ÆÊ¸»úÎó¤Ë½ñ¤´¹¤¨¤Æ¤«¤é¡¢Ê¸»úÎó¤Î´Ø¿ô¤ò»È¤Ã¤ÆÊѹ¹¤ò²Ã¤¨¡¢Seq¤ò»È¤Ã¤ÆSeq·¿¤ËÌ᤹¤È¤¤¤¦¤³¤È¤â²Äǽ¤Ç¤¢¤ë¡£
my_string = str(my_seq) # my_seq¤òʸ»úÎó¤ËÊÑ´¹¤¹¤ë my_string ### 'GATCGATGGGCCTATATAGGATCGAAAATCGC' new_string = my_string.replace('T', '', 1) # 'T'¤ò''¤Ë£±²ó¤À¤±ÃÖ´¹¤¹¤ë¡ÊºÇ¸å¤Î°ú¿ô1¤Ï²ó¿ô»ØÄê¡Ë new_string ### 'GACGATGGGCCTATATAGGATCGAAAATCGC' # ²ó¿ô»ØÄꤷ¤Ê¤¤¤È¤¹¤Ù¤Æ¤ÎT¤òÃÖ´¹¤¹¤ë¤Î¤ÇÃí°Õ new_seq = Seq(new_string, IUPAC.unambiguous_dna) # ºÆÅÙSeq·¿¤ËÌ᤹ new_seq Seq('GACGATGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA())
acc_name = 'Z78444.1' for seq_record in SeqIO.parse("ls_orchid.fasta", "fasta"): if acc_name in seq_record.id: print(seq_record.id) print(repr(seq_record.seq)) print(len(seq_record))
from Bio.Seq import Seq from Bio.Alphabet import IUPAC my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous_dna) my_seq ### Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA()) my_seq.complement() # ÁêÊ亿 ### Seq('CTAGCTACCCGGATATATCCTAGCTTTTAGCG', IUPACUnambiguousDNA()) my_seq[::-1] # µÕº¿ ## Seq('CGCTAAAAGCTAGGATATATCCGGGTAGCTAG', IUPACUnambiguousDNA()) my_seq.reverse_complement() # µÕÁêÊ亿 ### Seq('GCGATTTTCGATCCTATATAGGCCCATCGATC', IUPACUnambiguousDNA())
from Bio.Seq import Seq from Bio.Alphabet import IUPAC my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous_dna) my_seq ### Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPACUnambiguousDNA()) messenger_rna = my_seq.transcribe() messenger_rna ### Seq('GAUCGAUGGGCCUAUAUAGGAUCGAAAAUCGC', IUPACUnambiguousRNA())
from Bio.Seq import Seq from Bio.Alphabet import IUPAC my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATC", IUPAC.unambiguous_dna) my_seq ### Seq('GATCGATGGGCCTATATAGGATCGAAAATC', IUPACUnambiguousDNA()) translated_protein = my_seq.translate() translated_protein ### Seq('DRWAYIGSKI', IUPACProtein())
ËÝÌõɽ¡ÊTranslation Table¡Ë¤ÏNCBI¤ÎÄêµÁ¤¹¤ëɽ¤¬ÍѰդµ¤ì¤Æ¤¤¤ë¡£É¸½à¤Îɽ¡Ê¥Ç¥Õ¥©¥ë¥È¡Ë¤ÏStandard Code¤Ç¤¢¤ë¤¬¡¢¤¿¤È¤¨¤ÐVertebrate Mitochondrial Code¤ò»È¤¤¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢translate¤Î°ú¿ô¤Ëtable='Vertebrate Mitochondrial'¤ò»ØÄꤹ¤ë¡£
translated_protein = my_seq.translate(table='Vertebrate Mitochondrial') translated_protein ### Seq('DRWAYMGSKI', IUPACProtein())
¤Þ¤¿¡¢½ª»ß¥³¥É¥ó¡Êstop codon¡Ë¤¬¸½¤ì¤¿¤È¤¤ËËÝÌõ¤òÄä»ß¤¹¤ë¤è¤¦¤Ë¤¹¤ë¤Ë¤Ï¡¢°ú¿ô¤Ëto_stop=True¤ò»ØÄꤹ¤ë¡£
my_seq = Seq('GATCGATGGGCCTAAATAGGATCGAAAATC', IUPAC.unambiguous_dna) translated_protein = my_seq.translate(to_stop=True) translated_protein Seq('DRWA', IUPACProtein())