Bioinformatics/Global alignment: Difference between revisions
Content added Content deleted
(Added Go) |
|||
Line 54: | Line 54: | ||
<br><br> |
<br><br> |
||
=={{header|Go}}== |
|||
{{trans|Julia}} |
|||
<lang go>package main |
|||
import ( |
|||
"fmt" |
|||
"strings" |
|||
) |
|||
/* Gets n! for small n. */ |
|||
func factorial(n int) int { |
|||
fact := 1 |
|||
for i := 2; i <= n; i++ { |
|||
fact *= i |
|||
} |
|||
return fact |
|||
} |
|||
/* Gets all permutations of a list of strings. */ |
|||
func getPerms(input []string) [][]string { |
|||
perms := [][]string{input} |
|||
le := len(input) |
|||
a := make([]string, le) |
|||
copy(a, input) |
|||
n := le - 1 |
|||
fact := factorial(n + 1) |
|||
for c := 1; c < fact; c++ { |
|||
i := n - 1 |
|||
j := n |
|||
for i >= 0 && a[i] > a[i+1] { |
|||
i-- |
|||
} |
|||
if i == -1 { |
|||
i = n |
|||
} |
|||
for a[j] < a[i] { |
|||
j-- |
|||
} |
|||
a[i], a[j] = a[j], a[i] |
|||
j = n |
|||
i++ |
|||
if i == n+1 { |
|||
i = 0 |
|||
} |
|||
for i < j { |
|||
a[i], a[j] = a[j], a[i] |
|||
i++ |
|||
j-- |
|||
} |
|||
b := make([]string, le) |
|||
copy(b, a) |
|||
perms = append(perms, b) |
|||
} |
|||
return perms |
|||
} |
|||
/* Returns all distinct elements from a list of strings. */ |
|||
func distinct(slist []string) []string { |
|||
distinctSet := make(map[string]int, len(slist)) |
|||
i := 0 |
|||
for _, s := range slist { |
|||
if _, ok := distinctSet[s]; !ok { |
|||
distinctSet[s] = i |
|||
i++ |
|||
} |
|||
} |
|||
result := make([]string, len(distinctSet)) |
|||
for s, i := range distinctSet { |
|||
result[i] = s |
|||
} |
|||
return result |
|||
} |
|||
/* Given a DNA sequence, report the sequence, length and base counts. */ |
|||
func printCounts(seq string) { |
|||
bases := [][]rune{{'A', 0}, {'C', 0}, {'G', 0}, {'T', 0}} |
|||
for _, c := range seq { |
|||
for _, base := range bases { |
|||
if c == base[0] { |
|||
base[1]++ |
|||
} |
|||
} |
|||
} |
|||
sum := 0 |
|||
fmt.Println("\nNucleotide counts for", seq, "\b:\n") |
|||
for _, base := range bases { |
|||
fmt.Printf("%10c%12d\n", base[0], base[1]) |
|||
sum += int(base[1]) |
|||
} |
|||
le := len(seq) |
|||
fmt.Printf("%10s%12d\n", "Other", le-sum) |
|||
fmt.Printf(" ____________________\n%14s%8d\n", "Total length", le) |
|||
} |
|||
/* Return the position in s1 of the start of overlap of tail of string s1 with head of string s2. */ |
|||
func headTailOverlap(s1, s2 string) int { |
|||
for start := 0; ; start++ { |
|||
ix := strings.IndexByte(s1[start:], s2[0]) |
|||
if ix == -1 { |
|||
return 0 |
|||
} else { |
|||
start += ix |
|||
} |
|||
if strings.HasPrefix(s2, s1[start:]) { |
|||
return len(s1) - start |
|||
} |
|||
} |
|||
} |
|||
/* Remove duplicates and strings contained within a larger string from a list of strings. */ |
|||
func deduplicate(slist []string) []string { |
|||
var filtered []string |
|||
arr := distinct(slist) |
|||
for i, s1 := range arr { |
|||
withinLarger := false |
|||
for j, s2 := range arr { |
|||
if j != i && strings.Contains(s2, s1) { |
|||
withinLarger = true |
|||
break |
|||
} |
|||
} |
|||
if !withinLarger { |
|||
filtered = append(filtered, s1) |
|||
} |
|||
} |
|||
return filtered |
|||
} |
|||
/* Returns shortest common superstring of a list of strings. */ |
|||
func shortestCommonSuperstring(slist []string) string { |
|||
ss := deduplicate(slist) |
|||
shortestSuper := strings.Join(ss, "") |
|||
for _, perm := range getPerms(ss) { |
|||
sup := perm[0] |
|||
for i := 0; i < len(ss)-1; i++ { |
|||
overlapPos := headTailOverlap(perm[i], perm[i+1]) |
|||
sup += perm[i+1][overlapPos:] |
|||
} |
|||
if len(sup) < len(shortestSuper) { |
|||
shortestSuper = sup |
|||
} |
|||
} |
|||
return shortestSuper |
|||
} |
|||
func main() { |
|||
testSequences := [][]string{ |
|||
{"TA", "AAG", "TA", "GAA", "TA"}, |
|||
{"CATTAGGG", "ATTAG", "GGG", "TA"}, |
|||
{"AAGAUGGA", "GGAGCGCAUC", "AUCGCAAUAAGGA"}, |
|||
{ |
|||
"ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT", |
|||
"GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT", |
|||
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", |
|||
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", |
|||
"AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT", |
|||
"GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC", |
|||
"CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT", |
|||
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", |
|||
"CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC", |
|||
"GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT", |
|||
"TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", |
|||
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", |
|||
"TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA", |
|||
}, |
|||
} |
|||
for _, test := range testSequences { |
|||
scs := shortestCommonSuperstring(test) |
|||
printCounts(scs) |
|||
} |
|||
}</lang> |
|||
{{out}} |
|||
<pre> |
|||
Nucleotide counts for TAAGAA: |
|||
A 4 |
|||
C 0 |
|||
G 1 |
|||
T 1 |
|||
Other 0 |
|||
____________________ |
|||
Total length 6 |
|||
Nucleotide counts for CATTAGGG: |
|||
A 2 |
|||
C 1 |
|||
G 3 |
|||
T 2 |
|||
Other 0 |
|||
____________________ |
|||
Total length 8 |
|||
Nucleotide counts for AAGAUGGAGCGCAUCGCAAUAAGGA: |
|||
A 10 |
|||
C 4 |
|||
G 8 |
|||
T 0 |
|||
Other 3 |
|||
____________________ |
|||
Total length 25 |
|||
Nucleotide counts for CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA: |
|||
A 74 |
|||
C 57 |
|||
G 75 |
|||
T 94 |
|||
Other 0 |
|||
____________________ |
|||
Total length 300 |
|||
</pre> |
|||
=={{header|Julia}}== |
=={{header|Julia}}== |