Bioinformatics/Global alignment: Difference between revisions
m
→{{header|Phix}}: added syntax colouring the hard way
No edit summary |
m (→{{header|Phix}}: added syntax colouring the hard way) |
||
Line 499:
=={{header|Phix}}==
<!--<lang Phix>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">printcounts<span style="color: #0000FF;">(<span style="color: #004080;">sequence</span> <span style="color: #000000;">ss<span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">-- Given DNA sequence(s), report the sequence, length and base counts</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">ss<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ss<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">acgt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat<span style="color: #0000FF;">(<span style="color: #000000;">0<span style="color: #0000FF;">,<span style="color: #000000;">6<span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">j<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">acgt<span style="color: #0000FF;">[<span style="color: #7060A8;">find<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">[<span style="color: #000000;">j<span style="color: #0000FF;">]<span style="color: #0000FF;">,<span style="color: #008000;">"ACGT"<span style="color: #0000FF;">)<span style="color: #0000FF;">+<span style="color: #000000;">1<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #000000;">acgt<span style="color: #0000FF;">[<span style="color: #0000FF;">$<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sum<span style="color: #0000FF;">(<span style="color: #000000;">acgt<span style="color: #0000FF;">)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">ncf</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"Nucleotide counts for :"</span>
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%s%s\n"<span style="color: #0000FF;">,<span style="color: #0000FF;">{<span style="color: #000000;">ncf<span style="color: #0000FF;">,<span style="color: #7060A8;">join<span style="color: #0000FF;">(<span style="color: #7060A8;">split_by<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">,<span style="color: #000000;">50<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #008000;">"\n"<span style="color: #0000FF;">&<span style="color: #7060A8;">repeat<span style="color: #0000FF;">(<span style="color: #008000;">' '<span style="color: #0000FF;">,<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">ncf<span style="color: #0000FF;">)<span style="color: #0000FF;">)<span style="color: #0000FF;">)<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"Base counts: Other:%d, A:%d, C:%d, G:%d, T:%d, total:%d\n\n"<span style="color: #0000FF;">,<span style="color: #000000;">acgt<span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">deduplicate<span style="color: #0000FF;">(<span style="color: #004080;">sequence</span> <span style="color: #000000;">ss<span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">-- Remove any strings contained within a larger string from a set of strings</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">filtered</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{<span style="color: #0000FF;">}</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">ss<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">si</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ss<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]</span>
<span style="color: #004080;">bool</span> <span style="color: #000000;">found</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">j<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">ss<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i<span style="color: #0000FF;">!=<span style="color: #000000;">j</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">match<span style="color: #0000FF;">(<span style="color: #000000;">si<span style="color: #0000FF;">,<span style="color: #000000;">ss<span style="color: #0000FF;">[<span style="color: #000000;">j<span style="color: #0000FF;">]<span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">found</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">found</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">filtered</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append<span style="color: #0000FF;">(<span style="color: #000000;">filtered<span style="color: #0000FF;">,</span> <span style="color: #000000;">si<span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">filtered</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">shortest_common_superstring<span style="color: #0000FF;">(<span style="color: #004080;">sequence</span> <span style="color: #000000;">ss<span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">-- Returns shortest common superstring of a set of strings</span>
<span style="color: #000000;">ss</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">deduplicate<span style="color: #0000FF;">(<span style="color: #7060A8;">unique<span style="color: #0000FF;">(<span style="color: #000000;">ss<span style="color: #0000FF;">)<span style="color: #0000FF;">)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">shortestsuper</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{<span style="color: #7060A8;">join<span style="color: #0000FF;">(<span style="color: #000000;">ss<span style="color: #0000FF;">,<span style="color: #008000;">""<span style="color: #0000FF;">)<span style="color: #0000FF;">}</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">shortest</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">shortestsuper<span style="color: #0000FF;">[<span style="color: #000000;">1<span style="color: #0000FF;">]<span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">p<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">factorial<span style="color: #0000FF;">(<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">ss<span style="color: #0000FF;">)<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">perm</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">permute<span style="color: #0000FF;">(<span style="color: #000000;">p<span style="color: #0000FF;">,<span style="color: #000000;">ss<span style="color: #0000FF;">)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">sup</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">perm<span style="color: #0000FF;">[<span style="color: #000000;">1<span style="color: #0000FF;">]</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i<span style="color: #0000FF;">=<span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">perm<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">pi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">perm<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">j<span style="color: #0000FF;">=<span style="color: #0000FF;">-<span style="color: #7060A8;">min<span style="color: #0000FF;">(<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">pi<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">sup<span style="color: #0000FF;">)<span style="color: #0000FF;">)</span> <span style="color: #008080;">to</span> <span style="color: #000000;">0</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">overlap</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sup<span style="color: #0000FF;">[<span style="color: #000000;">j<span style="color: #0000FF;">..<span style="color: #0000FF;">$<span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">overlap</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pi<span style="color: #0000FF;">[<span style="color: #000000;">1<span style="color: #0000FF;">..<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">overlap<span style="color: #0000FF;">)<span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">sup</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">pi<span style="color: #0000FF;">[<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">overlap<span style="color: #0000FF;">)<span style="color: #0000FF;">+<span style="color: #000000;">1<span style="color: #0000FF;">..<span style="color: #0000FF;">$<span style="color: #0000FF;">]</span>
<span style="color: #000000;">pi</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">pi<span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?<span style="color: #000000;">9<span style="color: #0000FF;">/<span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">-- (sanity chk)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">sup<span style="color: #0000FF;">)</span> <span style="color: #0000FF;"><</span> <span style="color: #000000;">shortest</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">shortest</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">sup<span style="color: #0000FF;">)</span>
<span style="color: #000000;">shortestsuper</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{<span style="color: #000000;">sup<span style="color: #0000FF;">}</span>
<span style="color: #008080;">elsif</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">sup<span style="color: #0000FF;">)</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">shortest</span>
<span style="color: #008080;">and</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find<span style="color: #0000FF;">(<span style="color: #000000;">sup<span style="color: #0000FF;">,<span style="color: #000000;">shortestsuper<span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">shortestsuper</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append<span style="color: #0000FF;">(<span style="color: #000000;">shortestsuper<span style="color: #0000FF;">,<span style="color: #000000;">sup<span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #000000;">printcounts<span style="color: #0000FF;">(<span style="color: #000000;">shortestsuper<span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span>
<span style="color: #0000FF;">{<span style="color: #008000;">"TA"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"AAG"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"TA"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"GAA"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"TA"<span style="color: #0000FF;">}<span style="color: #0000FF;">,</span>
<span style="color: #0000FF;">{<span style="color: #008000;">"CATTAGGG"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"ATTAG"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"GGG"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"TA"<span style="color: #0000FF;">}<span style="color: #0000FF;">,</span>
<span style="color: #0000FF;">{<span style="color: #008000;">"AAGAUGGA"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"GGAGCGCAUC"<span style="color: #0000FF;">,</span> <span style="color: #008000;">"AUCGCAAUAAGGA"<span style="color: #0000FF;">}<span style="color: #0000FF;">,</span>
<span style="color: #0000FF;">{<span style="color: #008000;">"ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA"<span style="color: #0000FF;">,</span>
<span style="color: #008000;">"TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA"<span style="color: #0000FF;">}</span>
<span style="color: #0000FF;">}</span>
<span style="color: #7060A8;">papply<span style="color: #0000FF;">(<span style="color: #000000;">tests<span style="color: #0000FF;">,</span> <span style="color: #000000;">shortest_common_superstring<span style="color: #0000FF;">)
<!--</lang>-->
{{out}}
(Shows three length-6 results for the first test)
|