Bioinformatics/base count: Difference between revisions
Content added Content deleted
Catskill549 (talk | contribs) (added AWK) |
|||
Line 444: | Line 444: | ||
450 : GACCGGGGAC TTGCATGATG GGAGCAGCTT TGTTAAACTA CGAACGTAAT |
450 : GACCGGGGAC TTGCATGATG GGAGCAGCTT TGTTAAACTA CGAACGTAAT |
||
Total count => A: 129 T: 155 G: 119 C: 97</pre> |
Total count => A: 129 T: 155 G: 119 C: 97</pre> |
||
=={{header|AWK}}== |
|||
<lang AWK> |
|||
# syntax: GAWK -f BIOINFORMATICS_BASE_COUNT.AWK |
|||
# converted from FreeBASIC |
|||
# |
|||
# sorting: |
|||
# PROCINFO["sorted_in"] is used by GAWK |
|||
# SORTTYPE is used by Thompson Automation's TAWK |
|||
# |
|||
BEGIN { |
|||
dna = "CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATG" \ |
|||
"CTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTG" \ |
|||
"AGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGAT" \ |
|||
"GGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT" \ |
|||
"CGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGG" \ |
|||
"TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA" \ |
|||
"TTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTAT" \ |
|||
"CGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTG" \ |
|||
"TCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGAC" \ |
|||
"GACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT" |
|||
curr = first = 1 |
|||
while (curr <= length(dna)) { |
|||
curr_base = substr(dna,curr,1) |
|||
base_arr[curr_base]++ |
|||
rec = sprintf("%s%s",rec,curr_base) |
|||
curr++ |
|||
if (curr % 10 == 1) { |
|||
rec = sprintf("%s ",rec) |
|||
} |
|||
if (curr % 50 == 1) { |
|||
printf("%3d-%3d: %s\n",first,curr-1,rec) |
|||
rec = "" |
|||
first = curr |
|||
} |
|||
} |
|||
PROCINFO["sorted_in"] = "@ind_str_asc" ; SORTTYPE = 1 |
|||
printf("\nBase count\n") |
|||
for (i in base_arr) { |
|||
printf("%s %8d\n",i,base_arr[i]) |
|||
total += base_arr[i] |
|||
} |
|||
printf("%10d total\n",total) |
|||
exit(0) |
|||
} |
|||
</lang> |
|||
{{out}} |
|||
<pre> |
|||
1- 50: CGTAAAAAAT TACAACGTCC TTTGGCTATC TCTTAAACTC CTGCTAAATG |
|||
51-100: CTCGTGCTTT CCAATTATGT AAGCGTTCCG AGACGGGGTG GTCGATTCTG |
|||
101-150: AGGACAAAGG TCAAGATGGA GCGCATCGAA CGCAATAAGG ATCATTTGAT |
|||
151-200: GGGACGTTTC GTCGACAAAG TCTTGTTTCG AGAGTAACGG CTACCGTCTT |
|||
201-250: CGATTCTGCT TATAACACTA TGTTCTTATG AAATGGATGT TCTGAGTTGG |
|||
251-300: TCAGTCCCAA TGTGCGGGGT TTCTTTTAGT ACGTCGGGAG TGGTATTATA |
|||
301-350: TTTAATTTTT CTATATAGCG ATCTGTATTT AAGCAATTCA TTTAGGTTAT |
|||
351-400: CGCCGCGATG CTCGGTTCGG ACCGCCAAGC ATCTGGCTCC ACTGCTAGTG |
|||
401-450: TCCTAAATTT GAATGGCAAA CACAAATAAG ATTTAGCAAT TCGTGTAGAC |
|||
451-500: GACCGGGGAC TTGCATGATG GGAGCAGCTT TGTTAAACTA CGAACGTAAT |
|||
Base count |
|||
A 129 |
|||
C 97 |
|||
G 119 |
|||
T 155 |
|||
500 total |
|||
</pre> |
|||
=={{header|C}}== |
=={{header|C}}== |