Bioinformatics/Global alignment: Difference between revisions

Line 192:

string ncf = "Nucleotide counts for :"

printf(1,"%s%s\n",{ncf,join(split_by(dna,50),"\n"&repeat(' ',length(ncf)))})

printf(1,"~~\nBase~~ counts: Other:%d, A:%d, C:%d, G:%d, T:%d, total:%d\n\n",acgt)

printf(1,"Base counts: Other:%d, A:%d, C:%d, G:%d, T:%d, total:%d\n\n",acgt)

end for

end procedure

function deduplicate(sequence ss)

-- Remove ~~duplicates and~~ strings contained within a larger string from ~~vector~~ of strings

-- Remove any strings contained within a larger string from a set of strings

sequence filtered = {}

for i=1 to length(ss) do

Line 216:

procedure shortest_common_superstring(sequence ss)

-- Returns shortest common superstring of a ~~vector~~ of strings

-- Returns shortest common superstring of a set of strings

ss = deduplicate(unique(ss))

sequence shortestsuper = {join(ss,"")}

Line 266:

papply(tests, shortest_common_superstring)</lang>

(Shows three length-6 results for the first test)

<pre>

Nucleotide counts for :TAAGAA

Base counts: Other:0, A:4, C:0, G:1, T:1, total:6

Nucleotide counts for :GAAGTA

Base counts: Other:0, A:3, C:0, G:2, T:1, total:6

Nucleotide counts for :TAGAAG

Base counts: Other:0, A:3, C:0, G:2, T:1, total:6

Nucleotide counts for :CATTAGGG

Base counts: Other:0, A:2, C:1, G:3, T:2, total:8

Nucleotide counts for :AAGAUGGAGCGCAUCGCAAUAAGGA

Base counts: Other:3, A:10, C:4, G:8, T:0, total:25

Line 293:

Line 289:

CGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGG

TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA

Base counts: Other:0, A:74, C:57, G:75, T:94, total:300

</pre>