Bioinformatics/Sequence mutation: Difference between revisions

← Older edit

Bioinformatics/Sequence mutation (view source)

Revision as of 01:34, 14 March 2024

38,296 bytes added , 2 months ago

m

→‎{{header|jq}}: 0 .. 300

Peak

2,451

edits

Revision as of 17:34, 24 August 2021 (view source) GordonCharlton (talk \| contribs) (Added Quackery.) ← Older edit		Latest revision as of 01:34, 14 March 2024 (view source) Peak (talk \| contribs) m (→‎{{header\|jq}}: 0 .. 300)
(19 intermediate revisions by 15 users not shown)
Line 15: * Give more information on the individual mutations applied. * Allow mutations to be weighted and/or chosen. =={{header\|11l}}== {{trans\|Python}} <syntaxhighlight lang="11l">UInt32 seed = 0 F nonrandom(n) :seed = 1664525 * :seed + 1013904223 R Int(:seed >> 16) % n F nonrandom_choice(lst) R lst[nonrandom(lst.len)] F basecount(dna) DefaultDict[Char, Int] d L(c) dna d[c]++ R sorted(d.items()) F seq_split(dna, n = 50) R (0 .< dna.len).step(n).map(i -> @dna[i .< i + @n]) F seq_pp(dna, n = 50) L(part) seq_split(dna, n) print(‘#5: #.’.format(L.index * n, part)) print("\n BASECOUNT:") V tot = 0 L(base, count) basecount(dna) print(‘ #3: #.’.format(base, count)) tot += count V (base, count) = (‘TOT’, tot) print(‘ #3= #.’.format(base, count)) F seq_mutate(String =dna; count = 1, kinds = ‘IDSSSS’, choice = ‘ATCG’) [(String, Int)] mutation V k2txt = [‘I’ = ‘Insert’, ‘D’ = ‘Delete’, ‘S’ = ‘Substitute’] L 0 .< count V kind = nonrandom_choice(kinds) V index = nonrandom(dna.len + 1) I kind == ‘I’ dna = dna[0 .< index]‘’nonrandom_choice(choice)‘’dna[index..] E I kind == ‘D’ & !dna.empty dna = dna[0 .< index]‘’dna[index+1..] E I kind == ‘S’ & !dna.empty dna = dna[0 .< index]‘’nonrandom_choice(choice)‘’dna[index+1..] mutation.append((k2txt[kind], index)) R (dna, mutation) print(‘SEQUENCE:’) V sequence = ‘TCAATCATTAATCGATTAATACATTCAATTTGAACATCTCCAGGAGAAGGCAGGGTAATCTCGTGTAGCCGTGCTTGGGGCCTCCGATATGGCCGGGGAATTTCAAAGTATAGTGTGCATCCCCTCATAATACATAGATCTATAGGTAAGTATATGGGTTGACGTTGTTAGATGCGATACACGTGCACACTTTATGAATTTTACGTTCCTCTGCCTAGAGTGCCAAGTTTCAATTTGCTACGGTTCCTCA’ seq_pp(sequence) print("\n\nMUTATIONS:") V (mseq, m) = seq_mutate(sequence, 10) L(kind, index) m print(‘ #10 @#.’.format(kind, index)) print() seq_pp(mseq)</syntaxhighlight> {{out}} <pre> SEQUENCE: 0: TCAATCATTAATCGATTAATACATTCAATTTGAACATCTCCAGGAGAAGG 50: CAGGGTAATCTCGTGTAGCCGTGCTTGGGGCCTCCGATATGGCCGGGGAA 100: TTTCAAAGTATAGTGTGCATCCCCTCATAATACATAGATCTATAGGTAAG 150: TATATGGGTTGACGTTGTTAGATGCGATACACGTGCACACTTTATGAATT 200: TTACGTTCCTCTGCCTAGAGTGCCAAGTTTCAATTTGCTACGGTTCCTCA BASECOUNT: A: 66 C: 51 G: 55 T: 78 TOT= 250 MUTATIONS: Substitute @184 Substitute @70 Substitute @28 Substitute @6 Substitute @25 Substitute @197 Substitute @81 Substitute @130 Substitute @76 Delete @76 0: TCAATCTTTAATCGATTAATACATTCAATTTGAACATCTCCAGGAGAAGG 50: CAGGGTAATCTCGTGTAGCCCTGCTTGGGCATCCGATATGGCCGGGGAAT 100: TTCAAAGTATAGTGTGCATCCCCTCATAACACATAGATCTATAGGTAAGT 150: ATATGGGTTGACGTTGTTAGATGCGATACACGTACACACTTTATGATTTT 200: TACGTTCCTCTGCCTAGAGTGCCAAGTTTCAATTTGCTACGGTTCCTCA BASECOUNT: A: 66 C: 52 G: 52 T: 79 TOT= 249 </pre> =={{header\|Ada}}== <syntaxhighlight lang="ada">with Ada.Containers.Vectors; with Ada.Numerics.Discrete_Random; with Ada.Text_Io; procedure Mutations is Width : constant := 60; type Nucleotide_Type is (A, C, G, T); type Operation_Type is (Delete, Insert, Swap); type Position_Type is new Natural; package Position_Io is new Ada.Text_Io.Integer_Io (Position_Type); package Nucleotide_Io is new Ada.Text_Io.Enumeration_Io (Nucleotide_Type); package Operation_Io is new Ada.Text_Io.Enumeration_Io (Operation_Type); use Ada.Text_Io, Position_Io, Nucleotide_Io, Operation_Io; package Sequence_Vectors is new Ada.Containers.Vectors (Index_Type => Position_Type, Element_Type => Nucleotide_Type); package Nucleotide_Generators is new Ada.Numerics.Discrete_Random (Result_Subtype => Nucleotide_Type); package Operation_Generators is new Ada.Numerics.Discrete_Random (Result_Subtype => Operation_Type); procedure Pretty_Print (Sequence : Sequence_Vectors.Vector) is First : Position_Type := Sequence.First_Index; Last : Position_Type; Count : array (Nucleotide_Type) of Natural := (others => 0); begin Last := Position_Type'Min (First + Width - 1, Sequence.Last_Index); loop Position_Io.Put (First, Width => 4); Put (": "); for N in First .. Last loop declare Nucleotide : Nucleotide_Type renames Sequence (N); begin Put (Nucleotide); Count (Nucleotide) := Count (Nucleotide) + 1; end; end loop; New_Line; exit when Last = Sequence.Last_Index; First := Last + 1; Last := Position_Type'Min (First + Width - 1, Sequence.Last_Index); end loop; for N in Count'Range loop Put ("Count of "); Put (N); Put (" is "); Put (Natural'Image (Count (N))); New_Line; end loop; end Pretty_Print; function Random_Position (First, Last : Position_Type) return Position_Type is subtype Position_Range is Position_Type range First .. Last; package Position_Generators is new Ada.Numerics.Discrete_Random (Result_Subtype => Position_Range); Generator : Position_Generators.Generator; begin Position_Generators.Reset (Generator); return Position_Generators.Random (Generator); end Random_Position; Nucleotide_Generator : Nucleotide_Generators.Generator; Operation_Generator : Operation_Generators.Generator; Sequence : Sequence_Vectors.Vector; Position : Position_Type; Nucleotide : Nucleotide_Type; Operation : Operation_Type; begin Nucleotide_Generators.Reset (Nucleotide_Generator); Operation_Generators.Reset (Operation_Generator); for A in 1 .. 200 loop Sequence.Append (Nucleotide_Generators.Random (Nucleotide_Generator)); end loop; Put_Line ("Initial sequence:"); Pretty_Print (Sequence); New_Line; Put_Line ("Mutations:"); for Mutate in 1 .. 10 loop Operation := Operation_Generators.Random (Operation_Generator); case Operation is when Delete => Position := Random_Position (Sequence.First_Index, Sequence.Last_Index); Sequence.Delete (Index => Position); Put (Operation); Put (" at position "); Put (Position, Width => 0); New_Line; when Insert => Position := Random_Position (Sequence.First_Index, Sequence.Last_Index + 1); Nucleotide := Nucleotide_Generators.Random (Nucleotide_Generator); Sequence.Insert (Before => Position, New_Item => Nucleotide); Put (Operation); Put (" "); Put (Nucleotide); Put (" at position "); Put (Position, Width => 0); New_Line; when Swap => Position := Random_Position (Sequence.First_Index, Sequence.Last_Index); Nucleotide := Nucleotide_Generators.Random (Nucleotide_Generator); Sequence.Replace_Element (Index => Position, New_Item => Nucleotide); Put (Operation); Put (" at position "); Put (Position, Width => 0); Put (" to "); Put (Nucleotide); New_Line; end case; end loop; New_Line; Put_Line ("Mutated sequence:"); Pretty_Print (Sequence); end Mutations;</syntaxhighlight> {{out}} <pre>Initial sequence: 0: GCTGAGTCCGAATTAGTATTCATGAGATACGCATGTCAGTACGGCGACGACACGGGAAGA 60: GCAGATGAAAACTACTGGGGAGCTACCGAGCTGCCGTCGATTGTACGGATGTTATATTTC 120: CCATAGAACTACGAAGTTTTAGGATCCTTTCGGCGATGTGATAAGCAGGTATCAGTAGTA 180: AGCGAAGCGTTGACGTTTTT Count of A is 55 Count of C is 37 Count of G is 56 Count of T is 52 Mutations: DELETE at position 129 SWAP at position 172 to T SWAP at position 28 to T INSERT A at position 193 DELETE at position 164 SWAP at position 165 to G DELETE at position 91 INSERT A at position 169 INSERT C at position 72 DELETE at position 146 Mutated sequence: 0: GCTGAGTCCGAATTAGTATTCATGAGATTCGCATGTCAGTACGGCGACGACACGGGAAGA 60: GCAGATGAAAACCTACTGGGGAGCTACCGAGCGCCGTCGATTGTACGGATGTTATATTTC 120: CCATAGAACACGAAGTTTTAGGATCCTTCGGCGATGTGATAAGAGGTATACTGTAGTAAG 180: CGAAGCGTTGACAGTTTTT Count of A is 55 Count of C is 37 Count of G is 56 Count of T is 51</pre> =={{header\|Arturo}}== <~~lang~~syntaxhighlight lang="rebol">bases: ["A" "T" "G" "C"] dna: map 1..200 => [sample bases] Line 85 ⟶ 331: print "------------------------------" prettyPrint dna print ""</~~lang~~syntaxhighlight> {{out}} Line 121 ⟶ 367: 200 : CC Total count => A: 46 T: 47 G: 55 C: 54</pre> =={{header\|BBC BASIC}}== {{works with\|BBC BASIC for Windows}} <syntaxhighlight lang="bbcbasic"> Mutations = 10 InitialLength = 400 @%=3 REM Generate sequence and Pretty Print result. FOR I%=1 TO InitialLength Sequence$ += FNRandomBase NEXT PROCDisplaySequence(Sequence$, 50) REM Make mutations and Pretty Print result. PRINT '"Mutating..." FOR I%=1 TO Mutations Position = RND(LENSequence$) CurBase$ = MID$(Sequence$, Position, 1) NewBase$ = FNRandomBase CASE RND(3) OF WHEN 1 REM Change a base PRINT "Change base " CurBase$ " at position " Position " to base " NewBase$ MID$(Sequence$, Position, 1)=NewBase$ WHEN 2 REM Delete a base PRINT "Delete base " CurBase$ " at position " Position Sequence$=LEFT$(Sequence$, Position - 1) + MID$(Sequence$, Position + 1) WHEN 3 REM Insert a base PRINT "Insert base " NewBase$ " at position " Position Sequence$=LEFT$(Sequence$, Position) + NewBase$ + MID$(Sequence$, Position + 1) ENDCASE NEXT PROCDisplaySequence(Sequence$, 50) END DEF FNRandomBase = MID$("ACGT", RND(4), 1) DEF PROCDisplaySequence(seq$, snap%) LOCAL a, c, g, t, i%, p% p% = !^seq$ FOR i%=0 TO LENseq$ - 1 IF i% MOD snap% == 0 PRINT 'i% ": "; VDU p%?i% CASE p%?i% OF WHEN ASC"A" a += 1 WHEN ASC"C" c += 1 WHEN ASC"G" g += 1 WHEN ASC"T" t += 1 ENDCASE NEXT PRINT ' "A: " a ' "C: " c ' "G: " g ' "T: " t PRINT "Total: "; a + c + g + t ENDPROC</syntaxhighlight> {{out}} <pre> 0: CATGGAAGCTACGTGACTGAGGTACCCGTCGCAGGTTCGAATAAATGATA 50: CTAAAATATCGACGCTAGATACAATATAATGTCTGTAGAAAGCGTCCCTT 100: ATGTTTACATAGGAAAGTATGTGTCGGGCGCCCATGCATTTTCTTAGGCA 150: GCGGAAGCCCCGTGGCGCTCGGCCTCCGCTTTTATTACTTTTAACGTAAC 200: GAGGCGCGGGCGTTGCTTTCTTCCGGCTACCGGCGTCGCACCTAACGCCG 250: GCTGCGAATCGCGCGTTTGTAATTACAAGTTAATTACGATATGCCTCGCA 300: AGTTTTGGCTACCGCTGCCCGGATACTTGGGACGTACGGTATTTCACGCA 350: TCAACAGGTATCCCCCTCCCCTTAGTCTTCCACGACTACTTATTTGAGGG A: 90 C: 104 G: 97 T: 109 Total: 400 Mutating... Delete base A at position 69 Change base A at position 154 to base T Delete base T at position 342 Delete base T at position 83 Insert base G at position 278 Insert base G at position 336 Delete base A at position 48 Insert base T at position 233 Change base T at position 233 to base C Delete base G at position 148 0: CATGGAAGCTACGTGACTGAGGTACCCGTCGCAGGTTCGAATAAATGTAC 50: TAAAATATCGACGCTAGTACAATATAATGTCGTAGAAAGCGTCCCTTATG 100: TTTACATAGGAAAGTATGTGTCGGGCGCCCATGCATTTTCTTAGGCACGG 150: TAGCCCCGTGGCGCTCGGCCTCCGCTTTTATTACTTTTAACGTAACGAGG 200: CGCGGGCGTTGCTTTCTTCCGGCTACCGGCGCTCGCACCTAACGCCGGCT 250: GCGAATCGCGCGTTTGTAATTACAAGTGTAATTACGATATGCCTCGCAAG 300: TTTTGGCTACCGCTGCCCGGATACTTGGGACGTACGGGTATTCACGCATC 350: AACAGGTATCCCCCTCCCCTTAGTCTTCCACGACTACTTATTTGAGGG A: 87 C: 105 G: 98 T: 108 Total: 398</pre> =={{header\|C}}== Adenine ( A ) is always swapped for Thymine ( T ) and vice versa. Similarly with Cytosine ( C ) and Guanine ( G ). <syntaxhighlight lang="c"> ~~<lang C>~~ #include<stdlib.h> #include<stdio.h> Line 336 ⟶ 676: return 0; } </syntaxhighlight> ~~</lang>~~ Sample run : <pre> Line 423 ⟶ 763: Total:513 </pre> =={{header\|C++}}== <~~lang~~syntaxhighlight lang="cpp">#include <array> #include <iomanip> #include <iostream> Line 548 ⟶ 887: sequence_generator::print_sequence(std::cout, sequence); return 0; }</~~lang~~syntaxhighlight> {{out}} Line 580 ⟶ 919: A: 65, C: 66, G: 64, T: 56, Total: 251 </pre> =={{header\|Common Lisp}}== <b>Usage :</b> Line 587 ⟶ 925: :: :genome <i><Genome Sequence></i>) <b>All keys are optional. <i><Genome length></i> is discarded when :genome is set.</b> <~~lang~~syntaxhighlight lang="lisp"> (defun random_base () (random 4)) Line 661 ⟶ 999: (t (delete_base genome))))) (output_genome_info genome "MUTATED")) </syntaxhighlight> ~~</lang>~~ {{out}} <pre> Line 730 ⟶ 1,068: T : 137 G : 119 </pre> =={{header\|Factor}}== <~~lang~~syntaxhighlight lang="factor">USING: assocs combinators.random formatting grouping io kernel macros math math.statistics namespaces prettyprint quotations random sequences sorting ; Line 805 ⟶ 1,142: [ mutate ] curry times nl "MUTATED " write show-dna ; MAIN: main</~~lang~~syntaxhighlight> {{out}} <pre> Line 852 ⟶ 1,189: TOTAL: 204 </pre> =={{header\|FreeBASIC}}== {{trans\|Yabasic}} <syntaxhighlight lang="vb">'' Rosetta Code problem: https://rosettacode.org/wiki/Bioinformatics/Sequence_mutation '' by Jjuanhdez, 05/2023 Randomize Timer Dim As Integer r, i r = Int(Rnd * (300)) Dim Shared As String dnaS For i = 1 To 200 + r : dnaS += Mid("ACGT", Int(Rnd * (4))+1, 1) : Next Sub show() Dim As Integer acgt(4), i, j, x, total For i = 1 To Len(dnaS) x = Instr("ACGT", Mid(dnaS, i, 1)) acgt(x) += 1 Next For i = 1 To 4 : total += acgt(i) : Next For i = 1 To Len(dnaS) Step 50 Print i; ":"; !"\t"; For j = 0 To 49 Step 10 Print Mid(dnaS, i+j, 10); " "; Next Print Next Print !"\nBase counts: A:"; acgt(1); ", C:"; acgt(2); ", G:"; acgt(3); ", T:"; acgt(4); ", total:"; total End Sub Sub mutate() Dim As Integer i, p Dim As String sdiS, repS, wasS Print For i = 1 To 10 p = Int(Rnd * (Len(dnaS))) + 1 sdiS = Mid("SDI", Int(Rnd * (3)) + 1, 1) repS = Mid("ACGT", Int(Rnd * (4)) + 1, 1) wasS = Mid(dnaS, p, 1) Select Case sdiS Case "S" Mid(dnaS, p, 1) = repS Print "swapped "; wasS; " at "; p; " for "; repS Case "D" dnaS = Left(dnaS, p - 1) + Right(dnaS, Len(dnaS) - p) Print "deleted "; wasS; " at "; p Case "I" dnaS = Left(dnaS, p - 1) + repS + Right(dnaS, (Len(dnaS) - p + 1)) Print "inserted "; repS; " at "; p; ", before "; wasS End Select Next Print End Sub show() mutate() show() Sleep</syntaxhighlight> {{out}} <pre> 1: GAAATGATTT GTATCGAGCA GACTGGAGAA AGCACTTATT TAAGCACCGT 51: TTCAAAGCCA CTCTGTTAGG AAGCTAATCC GTAGGTACGT AGGGACGACT 101: CGATCGGACC CTTGCTTCGG TGTCTTCGTT CATCCCGGTT TCCGCGCTCA 151: GCTGCATTTT GGTCGAGCCA GGCGATCGAC AATGTTCGAC GCAATAACGC 201: GCCGGATAGG CACCTGGTGT AGTTTAGGCT GTGTCCGCTT CTGCATCTCC 251: GTTTTGAACA ATGAATTTCC ACGCGTCCAA CAGAAAGATT TGCGCCTGTC 301: TGGAGTGGTC GGAACTTAGG TATTCCGTCG TCAGTCGCGC AGAGATCAGC 351: GACCCTCTTG CTCGTGGCCC TGGACGCGTT TCCTCGTTTT AACTCGACAT 401: CCCTGACCAG CATCACTA Base counts: A: 88, C: 112, G: 106, T: 112, total: 418 swapped T at 246 for C swapped T at 90 for G inserted C at 141, before T deleted T at 62 swapped T at 63 for G deleted T at 381 deleted T at 389 swapped T at 81 for G inserted G at 149, before C swapped T at 256 for T 1: GAAATGATTT GTATCGAGCA GACTGGAGAA AGCACTTATT TAAGCACCGT 51: TTCAAAGCCA CCGGTTAGGA AGCTAATCCG GAGGTACGGA GGGACGACTC 101: GATCGGACCC TTGCTTCGGT GTCTTCGTTC ATCCCGGTTC TCCGCGCTGC 151: AGCTGCATTT TGGTCGAGCC AGGCGATCGA CAATGTTCGA CGCAATAACG 201: CGCCGGATAG GCACCTGGTG TAGTTTAGGC TGTGTCCGCT TCTGCACCTC 251: CGTTTTGAAC AATGAATTTC CACGCGTCCA ACAGAAAGAT TTGCGCCTGT 301: CTGGAGTGGT CGGAACTTAG GTATTCCGTC GTCAGTCGCG CAGAGATCAG 351: CGACCCTCTT GCTCGTGGCC CTGGACGCGT TCCTCGTTTA ACTCGACATC 401: CCTGACCAGC ATCACTA Base counts: A: 88, C: 114, G: 110, T: 105, total: 417</pre> =={{header\|Go}}== <~~lang~~syntaxhighlight lang="go">package main import ( Line 952 ⟶ 1,389: fmt.Println() prettyPrint(dna, 50) }</~~lang~~syntaxhighlight> {{out}} Line 1,008 ⟶ 1,445: </pre> =={{header\|Haskell}}== <~~lang~~syntaxhighlight lang="haskell">import Data.List (group, sort) import Data.List.Split (chunksOf) import System.Random (Random, randomR, random, newStdGen, randoms, getStdRandom) Line 1,087 ⟶ 1,524: showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts showSumBaseCounts xs = putStrLn (replicate 6 '-') >> printf "Σ: %d\n\n" (length xs)</~~lang~~syntaxhighlight> {{out}} <pre>Initial Sequence: Line 1,128 ⟶ 1,565: ------ Σ: 203</pre> =={{header\|J}}== <~~lang~~syntaxhighlight Jlang="j">ACGT=: 'ACGT' MUTS=: ;: 'del ins mut' Line 1,165 ⟶ 1,601: ) simulate=: (sim@(1 1 1&; &. \|. ))`sim@.(3=#)</~~lang~~syntaxhighlight> {{out}} Line 1,257 ⟶ 1,693: │ │ 200│GGC │ └─────┴────┴──────────────────────────────────────────────────┘</pre> =={{header\|Java}}== <p> ~~<lang java>import java.util.Arrays;~~ This example use a <code>List</code> to hold the base values.<br /> The <code>Random</code> class is used to generate random integer values.<br /> A <code>record</code> is used to hold the counts of each base. </p> <p> The "pretty print" is defined within the <code>toString</code> method.<br /> Which uses a <code>StringBuilder</code> to generate a <kbd>string</kbd> of sequential bases.<br /> A <code>BufferedReader</code> to read the augmented <kbd>string</kbd> line-for-line.<br /> Finally, a <kbd>string</kbd> formatter is used to justify and format the output text. </p> <syntaxhighlight lang="java"> import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import java.util.Random; class Program { List<Character> sequence; Random random; SequenceMutation() { sequence = new ArrayList<>(); random = new Random(); } void generate(int amount) { for (int count = 0; count < amount; count++) sequence.add(randomBase()); } void mutate(int amount) { int index; for (int count = 0; count < amount; count++) { index = random.nextInt(0, sequence.size()); switch (random.nextInt(0, 3)) { case 0 -> sequence.set(index, randomBase()); case 1 -> sequence.remove(index); case 2 -> sequence.add(index, randomBase()); } } } private char randomBase() { return switch (random.nextInt(0, 4)) { case 0 -> 'A'; case 1 -> 'C'; case 2 -> 'G'; case 3 -> 'T'; default -> '?'; }; } private Base count(String string) { int a = 0, c = 0, g = 0, t = 0; for (char base : string.toCharArray()) { switch (base) { case 'A' -> a++; case 'C' -> c++; case 'G' -> g++; case 'T' -> t++; } } return new Base(a, c, g, t); } /* used exclusively for count totals / private record Base(int a, int c, int g, int t) { int total() { return a + c + g + t; } @Override public String toString() { return "[A %2d, C %2d, G %2d, T %2d]".formatted(a, c, g, t); } } @Override public String toString() { StringBuilder string = new StringBuilder(); StringBuilder stringB = new StringBuilder(); String newline = System.lineSeparator(); for (int index = 0; index < sequence.size(); index++) { if (index != 0 && index % 50 == 0) string.append(newline); string.append(sequence.get(index)); stringB.append(sequence.get(index)); } try { BufferedReader reader = new BufferedReader(new StringReader(string.toString())); string = new StringBuilder(); int count = 0; String line; while ((line = reader.readLine()) != null) { string.append(count++); string.append(" %-50s ".formatted(line)); string.append(count(line)); string.append(newline); } } catch (IOException exception) { / ignore / } string.append(newline); Base bases = count(stringB.toString()); int total = bases.total(); string.append("Total of %d bases%n".formatted(total)); string.append("A %3d (%.2f%%)%n".formatted(bases.a, ((double) bases.a / total) 100)); string.append("C %3d (%.2f%%)%n".formatted(bases.c, ((double) bases.c / total) * 100)); string.append("G %3d (%.2f%%)%n".formatted(bases.g, ((double) bases.g / total) * 100)); string.append("T %3d (%.2f%%)%n".formatted(bases.t, ((double) bases.t / total) * 100)); return string.toString(); } } </syntaxhighlight> <p> Here is a sequence of 200 mutated 10 times. </p> <pre> Before mutation 0 TCGCTTGGGGGGAGCAAGGTGTTCGCAATAGATCACAGCCGGTCTCGCAT [A 10, C 12, G 17, T 11] 1 AGCTATTTCTACGCTATCGAGCCTGTACTGTGTCAGTAGACCATGTACTC [A 11, C 13, G 10, T 16] 2 CCCAGACTCGTCTTGCCAAGTGACTGCTCAAGGGGAGCGCCCACAGGGTA [A 11, C 16, G 15, T 8] 3 TCTAGAGCATTCGATCACACGGAAAAATTTTATTCGGCAGATCCAGTTAA [A 17, C 10, G 9, T 14] Total of 200 bases A 49 (24.50%) C 51 (25.50%) G 51 (25.50%) T 49 (24.50%) After mutation 0 TCGCTTGGGGGGAGTAAGGTGTTCGCAATAGTCACAGCCGGTCTCGCATA [A 10, C 11, G 17, T 12] 1 GCTTTTCTACGCATCGAGCCTGTACTGTGTCAGTAGACATGTACTCCCCA [A 10, C 15, G 10, T 15] 2 GACTCGTTTGCCAAGTGACCTGCTCAAGGGGAGCGCCCACAGGGTACTAG [A 11, C 14, G 16, T 9] 3 AGCAGTTCGATCACACGGAAAAATTTTTTCGGCAGATCCAGTTAA [A 15, C 9, G 9, T 12] Total of 195 bases A 46 (23.59%) C 49 (25.13%) G 52 (26.67%) T 48 (24.62%) </pre> <p> Here is a sequence of 200 mutated 90,000 times </p> <pre> Before mutation 0 CGCACCCTCCTTCGGGCGAAGCGGGGTTATTTACCCGATTCACCGCACCT [A 8, C 19, G 12, T 11] 1 CGCGGCTCTAAAAGTTCGAAGATCCCTGCGTAGACTGGACCTCATAACAA [A 15, C 14, G 11, T 10] 2 CCGTATTACGCTCCGTACGAATAACTCGGTTGTGCGATGCGGAAAGCGAC [A 12, C 13, G 14, T 11] 3 ATTTCTCAGGCCGAACGTACGCTTCTCTCCTACACCTCGCCTCGAGTATG [A 9, C 18, G 9, T 14] Total of 200 bases A 44 (22.00%) C 64 (32.00%) G 46 (23.00%) T 46 (23.00%) After mutation 0 CGTTTAAGCGGGAAGGTCGTCCACCACACGAAGGCCCCCCTCCAGCACTA [A 12, C 19, G 12, T 7] 1 CCCTGGGCGAGTGCGACCGGCTACAAGAATACGGACAACCGCACTTCGTA [A 13, C 16, G 14, T 7] 2 GTTGCGACGCCAAACCGAGGTTTGAAAGGCAGCCGAAACTCCTAGCCATC [A 14, C 15, G 13, T 8] 3 CGGGCAGCCCACTGGTTTAGATGTTACGTGATGGAAAGGTGGATCATCGT [A 11, C 9, G 17, T 13] 4 GGTTGCCCTGGCGTTGCGTACTTCGTGTCTGAATATTGGTTACAATCGCT [A 7, C 11, G 14, T 18] 5 CGACGACCTGACGATTCTGGATCAACCAACTGCCTAAAGTCGCGAATTAA [A 16, C 14, G 10, T 10] 6 TAATCGACTGCATCACATGTTAGTCTAGTCATCACGAGTACATAGTGTGG [A 14, C 10, G 11, T 15] 7 CCACCTCCTAACGTACTATTTACATAGGATATGGCAGCCCTAACGCACAC [A 15, C 17, G 7, T 11] 8 TGTACGAAAGTGAGACTCCTTACCGAGATTCTAGGCTTAGTGATCCTTGA [A 13, C 10, G 12, T 15] 9 AAACGCTAGCCTAGGAATGACGGGGACTTGATCGGCC [A 10, C 9, G 12, T 6] Total of 487 bases A 125 (25.67%) C 130 (26.69%) G 122 (25.05%) T 110 (22.59%) </pre> <br /> Here is an alternate demonstration <syntaxhighlight lang="java">import java.util.Arrays; import java.util.Random; Line 1,371 ⟶ 1,987: private static final int OP_COUNT = 3; private static final char[] BASES = {'A', 'C', 'G', 'T'}; }</~~lang~~syntaxhighlight> {{out}} Line 1,405 ⟶ 2,021: =={{header\|JavaScript}}== <~~lang~~syntaxhighlight lang="javascript">// Basic set-up const numBases = 250 const numMutations = 30 Line 1,562 ⟶ 2,178: console.log('\nMUTATED BASE COUNTS:') printBases(mut); </syntaxhighlight> ~~</lang>~~ {{out}} Line 1,626 ⟶ 2,242: T: 67 Σ: 261 </pre> =={{header\|jq}}== {{Works with\|jq}} '''Works with gojq, the Go implementation of jq''' '''Adapted from [[#Wren\|Wren]]''' Since jq does not include a PRNG, the following assumes that an external source of entropy such as /dev/urandom is available. See the "Invocation" section below for details. <syntaxhighlight lang="jq"> ### Generic utilities # Output: a PRN in range(0; .) def prn: if . == 1 then 0 else . as $n \| (($n-1)\|tostring\|length) as $w \| [limit($w; inputs)] \| join("") \| tonumber \| if . < $n then . else ($n \| prn) end end; # bag of words def bow(stream): reduce stream as $word ({}; .[($word\|tostring)] += 1); # Emit a stream of the constituent characters of the input string def chars: explode[] \| [.] \| implode; def lpad($len): tostring \| ($len - length) as $l \| (" " * $l) + .; # Print $n-character segments at a time, each prefixed by a 1-based index def pretty_nwise($n): (length \| tostring \| length) as $len \| def _n($i): if length == 0 then empty else "\($i\|lpad($len)): \(.[:$n])", (.[$n:] \| _n($i+$n)) end; _n(1); ### Biology def bases: ["A", "C", "G", "T"]; def randomBase: bases \| .[length\|prn]; # $w is an array [weightSwap, weightDelete, weightInsert] # specifying the weights out of 300 for each of swap, delete and insert # Input: an object {dna} # Output: an object {dna, message} def mutate($w): def removeAt($p): .[:$p] + .[$p+1:]; (.dna\|length) as $le # get a random position in the dna to mutate \| ($le \| prn) as $p # get a random number between 0 and 299 inclusive \| (300 \| prn) as $r \| .dna \|= [chars] \| if $r < $w[0] then # swap randomBase as $base \| .message = " Change @\($p) \(.dna[$p]) to \($base)" \| .dna[$p] = $base elif $r < $w[0] + $w[1] then # delete .message = " Delete @\($p) \(.dna[$p])" \| .dna \|= removeAt($p) else # insert randomBase as $base \| .message = " Insert @\($p) \($base)" \| .dna \|= .[:$p] + [$base] + .[$p:] end \| .dna \|= join("") ; # Generate a random dna sequence of given length: def generate($n): [range(0; $n) \| randomBase] \| join(""); # Pretty print dna and stats. def prettyPrint($rowLen): "SEQUENCE:", pretty_nwise($rowLen), ( bow(chars) as $baseMap \| "\nBASE COUNT:", ( bases[] as $c \| " \($c): \($baseMap[$c] // 0)" ), " ------", " Σ: \(length)", " ======\n" ) ; # For displaying the weights def pretty_weights: " Change: \(.[0])\n Delete: \(.[1])\n Insert: \(.[2])"; # Arguments are length, weights, mutations def task($n; $w; $muts ): generate($n) \| . as $dna \| prettyPrint(50), "\nWEIGHTS (0 .. 300):", ($w\|pretty_weights), "\nMUTATIONS (\($muts)):", (reduce range(0;$muts) as $i ({$dna}; mutate($w) \| .emit += [.message] ) \| (.emit \| join("\n")), "", (.dna \| prettyPrint(50)) ) ; task(250; # length [100, 100, 100]; # use e.g. [0, 300, 0] to choose only deletions 10 # mutations ) </syntaxhighlight> '''Invocation:''' <pre> < /dev/urandom tr -cd '0-9' \| fold -w 1 \| $JQ -cnr -f rc-sequence-mutation.jq </pre> {{output}} <pre> SEQUENCE: 1: AGGACACTGCCTTATTTTGTTTCAACAGAAGCCATCTCGAGCAACTACGT 51: GGCCACACAAGCTAATACGAATGACCTTGTATGGGGAGTTACGGGGGGTT 101: TATCTTGAGAAATGGTATAACGATACCCCAAGTGGCGTGATAGGCCGCGC 151: GGGCCTCAGAATAGGTCGTAGATCCGTAAGGGCACCGGGAGCCTTTCTTC 201: TCGTATAATCCGCCGAGATGTTAAAAGACAGCTATGGATTCCCGTAATGC BASE COUNT: A: 66 C: 57 G: 67 T: 60 ------ Σ: 250 ====== WEIGHTS (0 .. 300): Change: 100 Delete: 100 Insert: 100 MUTATIONS (10): Insert @76 T Delete @104 C Change @197 T to T Insert @206 A Delete @184 C Change @69 A to C Insert @211 G Delete @31 C Insert @165 G Insert @234 T SEQUENCE: 1: AGGACACTGCCTTATTTTGTTTCAACAGAAGCATCTCGAGCAACTACGTG 51: GCCACACAAGCTAATACGCATGACCTTTGTATGGGGAGTTACGGGGGGTT 101: TATTTGAGAAATGGTATAACGATACCCCAAGTGGCGTGATAGGCCGCGCG 151: GGCCTCAGAATAGGTGCGTAGATCCGTAAGGGCACGGGAGCCTTTCTTCT 201: CGTATAAATCCGGCCGAGATGTTAAAAGACAGCTTATGGATTCCCGTAAT 251: GC BASE COUNT: A: 66 C: 55 G: 69 T: 62 ------ Σ: 252 ====== </pre> =={{header\|Julia}}== <~~lang~~syntaxhighlight lang="julia">dnabases = ['A', 'C', 'G', 'T'] randpos(seq) = rand(1:length(seq)) # 1 mutateat(pos, seq) = (s = seq[:]; s[pos] = rand(dnabases); s) # 2-1 Line 1,682 ⟶ 2,470: testbioseq() </~~lang~~syntaxhighlight>{{out}} <pre> 500nt DNA sequence: Line 1,743 ⟶ 2,531: </pre> =={{header\|Lua}}== Using the <code>prettyprint()</code> function from [[Bioinformatics/base_count#Lua]] (not replicated here) <syntaxhighlight lang="lua">math.randomseed(os.time()) bases = {"A","C","T","G"} function randbase() return bases[math.random(#bases)] end function mutate(seq) local i,h = math.random(#seq), "%-6s %3s at %3d" local old,new = seq:sub(i,i), randbase() local ops = { function(s) h=h:format("Swap", old..">"..new, i) return s:sub(1,i-1)..new..s:sub(i+1) end, function(s) h=h:format("Delete", " -"..old, i) return s:sub(1,i-1)..s:sub(i+1) end, function(s) h=h:format("Insert", " +"..new, i) return s:sub(1,i-1)..new..s:sub(i) end, } local weighted = { 1,1,2,3 } local n = weighted[math.random(#weighted)] return ops[n](seq), h end local seq,hist="",{} for i = 1, 200 do seq=seq..randbase() end print("ORIGINAL:") prettyprint(seq) print() for i = 1, 10 do seq,h=mutate(seq) hist[#hist+1]=h end print("MUTATIONS:") for i,h in ipairs(hist) do print(" "..h) end print() print("MUTATED:") prettyprint(seq)</syntaxhighlight> {{out}} <pre>ORIGINAL: LOCUS AB000000 200 bp mRNA linear HUM 01-JAN-2001 BASE COUNT 50 a 47 c 51 g 52 t ORIGIN 1 atggatccga cgtgattata ttcactatgg ggcaatcgca cattagtttt atctccatca 61 gcgacacgat ggggatcaat gggctgctac tggagacgtc cgatgcgatg attggtaatt 121 gcatagagtg gatctccttt aacctagtag aaacgccctt ccggttcagc atggcgagtg 181 cgtacaacgt cacccagact MUTATIONS: Insert +A at 190 Delete -C at 134 Swap A>G at 57 Delete -G at 83 Insert +T at 81 Swap T>T at 164 Delete -C at 199 Swap T>G at 147 Swap C>G at 33 Swap C>G at 191 MUTATED: LOCUS AB000000 199 bp mRNA linear HUM 01-JAN-2001 BASE COUNT 50 a 43 c 54 g 52 t ORIGIN 1 atggatccga cgtgattata ttcactatgg gggaatcgca cattagtttt atctccgtca 61 gcgacacgat ggggatcaat tggctgctac tggagacgtc cgatgcgatg attggtaatt 121 gcatagagtg gattccttta acctaggaga aacgcccttc cggttcagca tggcgagtgc 181 gtacaacgat gacccagat</pre> =={{header\|Mathematica}} / {{header\|Wolfram Language}}== BioSequence is a fundamental data type in Mathematica: <~~lang~~syntaxhighlight ~~Mathematica~~lang="mathematica">SeedRandom[13122345]; seq = BioSequence["DNA", "ATAAACGTACGTTTTTAGGCT"]; randompos = RandomInteger[seq["SequenceLength"]]; Line 1,779 ⟶ 2,628: ends = Rest[Accumulate[Prepend[StringLength /@ parts, 0]]]; StringRiffle[MapThread[ToString[#1] <> "-" <> ToString[#2] <> ": " <> #3 &, {begins, ends, parts}], "\n"] Tally[Characters[seq["SequenceString"]]]</~~lang~~syntaxhighlight> {{out}} <pre>1-50: TAGCAGGGGAATTGTCGACTCCCGGGTTTCAATTGCCAACCAAGCATATT Line 1,793 ⟶ 2,642: 201-246: ACTTTGGTCCAAGATAGTTAGATATCAATCCGTATAATGTAGGCTT {{"T", 60}, {"A", 70}, {"G", 67}, {"C", 49}}</pre> =={{header\|Nim}}== <~~lang~~syntaxhighlight ~~Nim~~lang="nim">import random import strformat import strutils Line 1,911 ⟶ 2,759: echo "\nMutated sequence" echo "————————————————\n" dnaSeq.display()</~~lang~~syntaxhighlight> {{out}} Line 1,947 ⟶ 2,795: TCGTGACTGC CAGTCGAC 198 //</pre> =={{header\|Perl}}== {{trans\|Raku}} <~~lang~~syntaxhighlight lang="perl">use strict; use warnings; use feature 'say'; Line 1,994 ⟶ 2,841: say "Total bases: ". length $mutate; say "$_: $cnt{$_}" for @bases; </syntaxhighlight> ~~</lang>~~ {{out}} <pre>Original DNA strand: Line 2,019 ⟶ 2,866: G: 51 T: 51</pre> =={{header\|Phix}}== <!--<~~lang~~syntaxhighlight ~~Phix~~lang="phix">(phixonline)--> <span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #000000;">200</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">rand</span><span style="color: #0000FF;">(</span><span style="color: #000000;">300</span><span style="color: #0000FF;">))</span> <span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> <span style="color: #000000;">dna</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"ACGT"</span><span style="color: #0000FF;">[</span><span style="color: #7060A8;">rand</span><span style="color: #0000FF;">(</span><span style="color: #000000;">4</span><span style="color: #0000FF;">)]</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span> Line 2,057 ⟶ 2,903: <span style="color: #000000;">mutate</span><span style="color: #0000FF;">()</span> <span style="color: #000000;">show</span><span style="color: #0000FF;">()</span> <!--</~~lang~~syntaxhighlight>--> {{out}} <pre> Line 2,097 ⟶ 2,943: Base counts: A:128, C:110, G:119, T:123, total:480 </pre> =={{header\|PureBasic}}== <syntaxhighlight lang="purebasic">#BASE$="ACGT" #SEQLEN=200 #PROTOCOL=#True Global dna.s Define i.i Procedure pprint() Define p.i, cnt.i, sum.i For p=1 To Len(dna) Step 50 Print(RSet(Str(p-1)+": ",5)) PrintN(Mid(dna,p,50)) Next PrintN("Base counts:") For p=1 To 4 cnt=CountString(dna,Mid(#BASE$,p,1)) : sum+cnt Print(Mid(#BASE$,p,1)+": "+Str(cnt)+", ") Next PrintN("Total: "+Str(sum)) EndProcedure Procedure InsertAtPos(basenr.i,position.i) If #PROTOCOL : PrintN("Insert base "+Mid(#BASE$,basenr,1)+" at position "+Str(position)) : EndIf dna=InsertString(dna,Mid(#BASE$,basenr,1),position) EndProcedure Procedure EraseAtPos(position.i) If #PROTOCOL : PrintN("Erase base "+Mid(dna,position,1)+" at position "+Str(position)) : EndIf If position>0 And position<=Len(dna) dna=Left(dna,position-1)+Right(dna,Len(dna)-position) EndIf EndProcedure Procedure OverwriteAtPos(basenr.i,position.i) If #PROTOCOL : PrintN("Change base at position "+Str(position)+" from "+Mid(dna,position,1)+" to "+Mid(#BASE$,basenr,1)) : EndIf If position>0 And position<=Len(dna) position-1 PokeS(@dna+2position,Mid(#BASE$,basenr,1),-1,#PB_String_NoZero) EndIf EndProcedure If OpenConsole()=0 : End 1 : EndIf For i=1 To #SEQLEN : dna+Mid(#BASE$,Random(4,1),1) : Next PrintN("Initial sequence:") pprint() For i=1 To 10 Select Random(2) Case 0 : InsertAtPos(Random(4,1),Random(Len(dna),1)) Case 1 : EraseAtPos(Random(Len(dna),1)) Case 2 : OverwriteAtPos(Random(4,1),Random(Len(dna),1)) EndSelect Next PrintN("After 10 mutations:") pprint() Input()</syntaxhighlight> {{out}} <pre>Initial sequence: 0: AAGTTTACGTCGGACTTCATTAATCGGTTTAGTCAGACCCGATCCAAATC 50: TTGCTTTCACTCCGCATTCTTCTCATGAGTAAAAGGCTGCTCCTGCACTA 100: AAGCGTTCTCAACACCTTGGAGAGCCATCTCGGTACTCCGCGCAAAATAG 150: CCATAGAGGGTATCAGGAAACGCATCGAAGGTTTAGCCGAACTAAGGTCT Base counts: A: 54, C: 52, G: 42, T: 52, Total: 200 Change base at position 7 from A to T Insert base T at position 66 Erase base G at position 198 Insert base C at position 32 Change base at position 80 from A to G Erase base A at position 2 Insert base C at position 33 Insert base C at position 201 Insert base G at position 70 Erase base T at position 187 After 10 mutations: 0: AGTTTTCGTCGGACTTCATTAATCGGTTTACGCTCAGACCCGATCCAAAT 50: CTTGCTTTCACTCCGCTATGTCTTCTCATGGGTAAAAGGCTGCTCCTGCA 100: CTAAAGCGTTCTCAACACCTTGGAGAGCCATCTCGGTACTCCGCGCAAAA 150: TAGCCATAGAGGGTATCAGGAAACGCATCGAAGGTTAGCCGAACTAAGTC 200: CT Base counts: A: 51, C: 55, G: 43, T: 53, Total: 202</pre> =={{header\|Python}}== In function seq_mutate argument kinds selects between the three kinds of mutation. The characters I, D, and S are chosen from the string to give the kind of mutation to perform, so the more of that character, the more of that type of mutation performed.<br> Similarly parameter choice is chosen from to give the base for substitution or insertion - the more any base appears, the more likely it is to be chosen in any insertion/substitution. <~~lang~~syntaxhighlight lang="python">import random from collections import Counter Line 2,147 ⟶ 3,077: print(f" {kind:>10} @{index}") print() seq_pp(mseq)</~~lang~~syntaxhighlight> {{out}} Line 2,190 ⟶ 3,120: T: 72 TOT= 251</pre> =={{header\|Quackery}}== <code>prettyprint</code> and <code>tallybases</code> are defined at [[Bioinformatics/base count#Quackery]]. <~~lang~~syntaxhighlight ~~Quackery~~lang="quackery"> [ $ "ACGT" 4 random peek ] is randomgene ( --> c ) [ $ "" swap times [ randomgene join ] ] is randomsequence ( n --> $ ) [ dup size random 3 random [ table ~~[ pluck drop ]~~ [ pluck drop ] [ randomgene unrot stuff ] [ randomgene unrot poke ] ] Line 2,211 ⟶ 3,141: cr cr say "Mutating..." cr 10 times mutate dup prettyprint cr cr tallybases</~~lang~~syntaxhighlight> {{out}} Line 2,242 ⟶ 3,172: total 201 </pre> =={{header\|Racket}}== <~~lang~~syntaxhighlight lang="racket">#lang racket (define current-S-weight (make-parameter 1)) Line 2,322 ⟶ 3,251: (define s+d (parameterize ((current-D-weight 5)) (for/fold ((s initial-sequence)) ((_ 10)) (mutate s)))) (newline) (report-sequence s+d))</~~lang~~syntaxhighlight> {{out}} Line 2,386 ⟶ 3,315: T : 42 TOTAL: 193</pre> =={{header\|Raku}}== (formerly Perl 6) Line 2,393 ⟶ 3,321: <syntaxhighlight lang="raku" ~~perl6~~line>my @bases = <A C G T>; # The DNA strand Line 2,424 ⟶ 3,352: sub diff ($orig, $repl) { ($orig.comb Z $repl.comb).map( -> ($o, $r) { $o eq $r ?? $o !! $r.lc }).join }</~~lang~~syntaxhighlight> {{out}} <pre>ORIGINAL DNA STRAND: Line 2,449 ⟶ 3,377: G 43 T 53</pre> =={{header\|Ring}}== <~~lang~~syntaxhighlight lang="ring"> row = 0 dnaList = [] Line 2,564 ⟶ 3,491: total = dnaBase["A"] + dnaBase["T"] + dnaBase["C"] + dnaBase["G"] see "Total: " + total+ nl + nl </syntaxhighlight> ~~</lang>~~ {{out}} <pre> Line 2,610 ⟶ 3,537: A: 83, T: 32, C: 36, G: 49, Total: 200 </pre> =={{header\|Ruby}}== <syntaxhighlight lang="ruby">class DNA_Seq attr_accessor :seq def initialize(bases: %i[A C G T] , size: 0) @bases = bases @seq = Array.new(size){ bases.sample } end def mutate(n = 10) n.times{\|n\| method([:s, :d, :i].sample).call} end def to_s(n = 50) just_size = @seq.size / n (0...@seq.size).step(n).map{\|from\| "#{from.to_s.rjust(just_size)} " + @seq[from, n].join}.join("\n") + "\nTotal #{seq.size}: #{@seq.tally.sort.to_h.inspect}\n\n" end def s = @seq[rand_index]= @bases.sample def d = @seq.delete_at(rand_index) def i = @seq.insert(rand_index, @bases.sample ) alias :swap :s alias :delete :d alias :insert :i private def rand_index = rand( @seq.size ) end puts test = DNA_Seq.new(size: 200) test.mutate puts test test.delete puts test </syntaxhighlight> {{out}} <pre> 0 TAAGGTGAGGAGTGTGATGGAGTTCGGTGGCTAGCCACAAATACAACACA 50 CTCACCCATACTCGCCTCTGAAGCATGTTTTACTTGGATAGGGCCTACAG 100 CAGTATTCACCCATTCCTCGGCTCCTGACCTGATGTAGGTCTATGTGCGG 150 GAAAATAGGACAATACTGCCGAAGTCATATCCATTGGAGGGGCCCCAGGC Total 200: {:A=>51, :C=>50, :G=>52, :T=>47} 0 TAAGCTGAGGTGTGTGATGGAGTTCGGTGGCTAGCCACAAATACAACACA 50 CTCACCCATACTCGCCTCTGAAGCATGTTTTAATTGGATAGGGCCTACAG 100 CAGTATTCACCCTTCCTCCGCTCCTGACCTGATATAGGTCTATGTGCGGG 150 AAAATAGGACAATACTGCCGAAGTCATATCCATTGGAGGGGCCCCAAGGC Total 200: {:A=>52, :C=>51, :G=>49, :T=>48} 0 TAAGCTGAGGTGTGTGATGGAGTTCGGTGGCTAGCCACAAATACAACACA 50 CTCACCCATACTCGCTCTGAAGCATGTTTTAATTGGATAGGGCCTACAGC 100 AGTATTCACCCTTCCTCCGCTCCTGACCTGATATAGGTCTATGTGCGGGA 150 AAATAGGACAATACTGCCGAAGTCATATCCATTGGAGGGGCCCCAAGGC Total 199: {:A=>52, :C=>50, :G=>49, :T=>48} </pre> =={{header\|Rust}}== <syntaxhighlight lang="rust"> use rand::prelude::; use std::collections::HashMap; use std::fmt::{Display, Formatter, Error}; pub struct Seq<'a> { alphabet: Vec<&'a str>, distr: rand::distributions::Uniform<usize>, pos_distr: rand::distributions::Uniform<usize>, seq: Vec<&'a str>, } impl Display for Seq<'_> { fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { let pretty: String = self.seq .iter() .enumerate() .map(\|(i, nt)\| if (i + 1) % 60 == 0 { format!("{}\n", nt) } else { nt.to_string() }) .collect(); let counts_hm = self.seq .iter() .fold(HashMap::<&str, usize>::new(), \|mut m, nt\| { m.entry(nt).or_default() += 1; m }); let mut counts_vec: Vec<(&str, usize)> = counts_hm.into_iter().collect(); counts_vec.sort_by(\|a, b\| a.0.cmp(&b.0)); let counts_string = counts_vec .iter() .fold(String::new(), \|mut counts_string, (nt, count)\| { counts_string += &format!("{} = {}\n", nt, count); counts_string }); write!(f, "Seq:\n{}\n\nLength: {}\n\nCounts:\n{}", pretty, self.seq.len(), counts_string) } } impl Seq<'_> { pub fn new(alphabet: Vec<&str>, len: usize) -> Seq { let distr = rand::distributions::Uniform::new_inclusive(0, alphabet.len() - 1); let pos_distr = rand::distributions::Uniform::new_inclusive(0, len - 1); let seq: Vec<&str> = (0..len) .map(\|_\| { alphabet[thread_rng().sample(distr)] }) .collect(); Seq { alphabet, distr, pos_distr, seq } } pub fn insert(&mut self) { let pos = thread_rng().sample(self.pos_distr); let nt = self.alphabet[thread_rng().sample(self.distr)]; println!("Inserting {} at position {}", nt, pos); self.seq.insert(pos, nt); } pub fn delete(&mut self) { let pos = thread_rng().sample(self.pos_distr); println!("Deleting {} at position {}", self.seq[pos], pos); self.seq.remove(pos); } pub fn swap(&mut self) { let pos = thread_rng().sample(self.pos_distr); let cur_nt = self.seq[pos]; let new_nt = self.alphabet[thread_rng().sample(self.distr)]; println!("Replacing {} at position {} with {}", cur_nt, pos, new_nt); self.seq[pos] = new_nt; } } fn main() { let mut seq = Seq::new(vec!["A", "C", "T", "G"], 200); println!("Initial sequnce:\n{}", seq); let mut_distr = rand::distributions::Uniform::new_inclusive(0, 2); for _ in 0..10 { let mutation = thread_rng().sample(mut_distr); if mutation == 0 { seq.insert() } else if mutation == 1 { seq.delete() } else { seq.swap() } } println!("\nMutated sequence:\n{}", seq); } </syntaxhighlight> {{out}} <pre> Initial sequnce: Seq: TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACACCGTGTAGAGGGGATTTGTCAGGA CACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGT CTGCCGAAGTGACGAAGTGCACGTTATAGCTCTATTAAGTATGTTCGTTAACAGGTATTA ATGCTCTTAGCCAAGACCGT Length: 200 Counts: A = 56 C = 38 G = 53 T = 53 Deleting C at position 197 Inserting T at position 157 Replacing C at position 149 with G Replacing A at position 171 with G Replacing T at position 182 with G Deleting C at position 124 Inserting T at position 128 Replacing G at position 175 with C Deleting A at position 35 Replacing A at position 193 with G Mutated sequence: Seq: TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACCCGTGTAGAGGGGATTTGTCAGGAC ACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGTC TGCGAAGTTGACGAAGTGCACGTTATAGGTCTATTATAGTATGTTCGTTAGCAGCTATTA AGGCTCTTAGCCAGGACGT Length: 199 Counts: A = 53 C = 36 G = 56 T = 54</pre> =={{header\|Swift}}== <~~lang~~syntaxhighlight lang="swift">let bases: [Character] = ["A", "C", "G", "T"] enum Action: CaseIterable { Line 2,669 ⟶ 3,791: } printSeq(d)</~~lang~~syntaxhighlight> {{out}} Line 2,697 ⟶ 3,819: G: 56 T: 45</pre> =={{header\|V (Vlang)}}== {{trans\|Go}} <syntaxhighlight lang="v (vlang)">import rand import rand.seed const bases = "ACGT" // 'w' contains the weights out of 300 for each // of swap, delete or insert in that order. fn mutate(dna string, w [3]int) string { le := dna.len // get a random position in the dna to mutate p := rand.intn(le) or {0} // get a random number between 0 and 299 inclusive r := rand.intn(300) or {0} mut bytes := dna.bytes() match true { r < w[0] { // swap base := bases[rand.intn(4) or {0}] println(" Change @${p:3} ${[bytes[p]].bytestr()} to ${[base].bytestr()}") bytes[p] = base } r < w[0]+w[1] { // delete println(" Delete @${p:3} ${bytes[p]}") bytes.delete(p) //copy(bytes[p:], bytes[p+1:]) bytes = bytes[0..le-1] } else { // insert base := bases[rand.intn(4) or {0}] bytes << 0 bytes.insert(p,bytes[p]) //copy(bytes[p+1:], bytes[p:]) println(" Insert @${p:3} $base") bytes[p] = base } } return bytes.bytestr() } // Generate a random dna sequence of given length. fn generate(le int) string { mut bytes := []u8{len:le} for i := 0; i < le; i++ { bytes[i] = bases[rand.intn(4) or {0}] } return bytes.bytestr() } // Pretty print dna and stats. fn pretty_print(dna string, rowLen int) { println("SEQUENCE:") le := dna.len for i := 0; i < le; i += rowLen { mut k := i + rowLen if k > le { k = le } println("${i:5}: ${dna[i..k]}") } mut base_map := map[byte]int{} // allows for 'any' base for i in 0..le { base_map[dna[i]]++ } mut bb := base_map.keys() bb.sort() println("\nBASE COUNT:") for base in bb { println(" $base: ${base_map[base]:3}") } println(" ------") println(" Σ: $le") println(" ======\n") } // Express weights as a string. fn wstring(w [3]int) string { return " Change: ${w[0]}\n Delete: ${w[1]}\n Insert: ${w[2]}\n" } fn main() { rand.seed(seed.time_seed_array(2)) mut dna := generate(250) pretty_print(dna, 50) muts := 10 w := [100, 100, 100]! // use e.g. {0, 300, 0} to choose only deletions println("WEIGHTS (ex 300):\n${wstring(w)}") println("MUTATIONS ($muts):") for _ in 0..muts { dna = mutate(dna, w) } println('') pretty_print(dna, 50) }</syntaxhighlight> {{out}} Sample run: <pre> SEQUENCE: 0: CATTGGATTGCTAGTCGTTCAATAGCGAACGAACAGTTTGCATGAATCAG 50: AGAGAGCCTGAAACCTTGGTTGGTATCGACACAACCTCATAATTCACATT 100: CACAAACTTATTTTCGGATCCGCGAAAACGCAAGCGCATTAAGAGACACC 150: CCCAGAGACTCAATTCCGGATTTGCGCTGCTATATACCCACATTGATGAT 200: ATAGGGCTTAGAACGGCCTTAGCCCCGTCGGCTAGTTTCTGAAGTCTCTT BASE COUNT: A: 71 C: 62 G: 52 T: 65 ------ Σ: 250 ====== WEIGHTS (ex 300): Change: 100 Delete: 100 Insert: 100 MUTATIONS (10): Delete @166 "C" Change @185 "C" to "G" Insert @230 "T" Insert @230 "G" Insert @226 "C" Change @162 "A" to "C" Change @236 "G" to "C" Insert @ 25 "C" Delete @ 75 "A" Change @104 "A" to "T" SEQUENCE: 0: CATTGGATTGCTAGTCGTTCAATAGCCGAACGAACAGTTTGCATGAATCA 50: GAGAGAGCCTGAAACCTTGGTTGGTTCGACACAACCTCATAATTCACATT 100: CACATACTTATTTTCGGATCCGCGAAAACGCAAGCGCATTAAGAGACACC 150: CCCAGAGACTCACTTCGGATTTGCGCTGCTATATAGCCACATTGATGATA 200: TAGGGCTTAGAACGGCCTTAGCCCCGCTCGGGTCTACTTTCTGAAGTCTC 250: TT BASE COUNT: A: 68 C: 64 G: 53 T: 67 ------ Σ: 252 ====== </pre> =={{header\|Wren}}== Line 2,702 ⟶ 3,973: {{libheader\|Wren-sort}} {{libheader\|Wren-fmt}} <~~lang~~syntaxhighlight ~~ecmascript~~lang="wren">import "random" for Random import "./fmt" for Fmt import "./sort" for Sort var rand = Random.new() Line 2,779 ⟶ 4,050: for (i in 0...muts) dna = mutate.call(dna, w) System.print() prettyPrint.call(dna, 50)</~~lang~~syntaxhighlight> {{out}} Line 2,835 ⟶ 4,106: </pre> =={{header\|Yabasic}}== {{trans\|Phix}} <syntaxhighlight lang="yabasic">// Rosetta Code problem: http://rosettacode.org/wiki/Sequence_mutation // by Galileo, 07/2022 r = int(ran(300)) for i = 1 to 200 + r : dna$ = dna$ + mid$("ACGT", int(ran(4))+1, 1) : next sub show() local acgt(4), i, j, x, total for i = 1 to len(dna$) x = instr("ACGT", mid$(dna$, i, 1)) acgt(x) = acgt(x) + 1 next for i = 1 to 4 : total = total + acgt(i) : next for i = 1 to len(dna$) step 50 print i, ":\t"; for j = 0 to 49 step 10 print mid$(dna$, i+j, 10), " "; next print next print "\nBase counts: A: ", acgt(1), ", C: ", acgt(2), ", G: ", acgt(3), ", T: ", acgt(4), ", total: ", total end sub sub mutate() local i, p, sdi$, rep$, was$ print for i = 1 to 10 p = int(ran(len(dna$))) + 1 sdi$ = mid$("SDI", int(ran(3)) + 1, 1) rep$ = mid$("ACGT", int(ran(4)) + 1, 1) was$ = mid$(dna$, p, 1) switch sdi$ case "S": mid$(dna$, p, 1) = rep$ print "swapped ", was$, " at ", p, " for ", rep$ : break case "D": dna$ = left$(dna$, p - 1) + right$(dna$, len(dna$) - p) print "deleted ", was$, " at ", p : break case "I": dna$ = left$(dna$, p - 1) + rep$ + right$(dna$, (len(dna$) - p + 1)) print "inserted ", rep$, " at ", p, ", before ", was$ : break end switch next print end sub show() mutate() show()</syntaxhighlight> {{out}} <pre>1: TCCATCGTGG GATCGCTCTA GCGGTATGCT ATCATTCCTA TAGCAATTCT 51: CAGGGGGCCC GACGGCGCCG ATCACATGTG ATCCTTGTGT GATCGCTTCA 101: TGTCATGGCT TTCTAGACCT TGGATAAGCA TGTACGGTTG GACCAGTCGT 151: GCGTCGGTAA ACAACGCATC TGTGTTATAT CCGTCGAATA ACCCATATGT 201: CTCCAGTCTA ATCCCCTAAG CAACTGCTCA AGGTAAAATG CAAATACAGG 251: TGAGGAGTCC TCGAAGGGGT CGCACCGCAA TATGGGCGTC CCTTATTGGC 301: CCTCATCAGT AT Base counts: A: 72, C: 81, G: 76, T: 83, total: 312 inserted G at 89, before G swapped T at 174 for C deleted A at 31 deleted G at 89 deleted C at 275 inserted A at 278, before A inserted C at 200, before C inserted C at 232, before G deleted G at 10 swapped A at 124 for C 1: TCCATCGTGG ATCGCTCTAG CGGTATGCTT CATTCCTATA GCAATTCTCA 51: GGGGGCCCGA CGGCGCCGAT CACATGTGAT CCTTGTGTGA TCGCTTCATG 101: TCATGGCTTT CTAGACCTTG GATCAGCATG TACGGTTGGA CCAGTCGTGC 151: GTCGGTAAAC AACGCATCTG CGTTATATCC GTCGAATAAC CCATATGTCC 201: TCCAGTCTAA TCCCCTAAGC AACTGCTCAA CGGTAAAATG CAAATACAGG 251: TGAGGAGTCC TCGAAGGGGT CGCACGCAAA TATGGGCGTC CCTTATTGGC 301: CCTCATCAGT AT Base counts: A: 71, C: 84, G: 75, T: 82, total: 312 ---Program done, press RETURN---</pre> =={{header\|zkl}}== <~~lang~~syntaxhighlight lang="zkl">var [const] bases="ACGT", lbases=bases.toLower(); dna:=(190).pump(Data().howza(3),(0).random.fp(0,4),bases.get); // bucket of bytes Line 2,868 ⟶ 4,226: [0..,50].zipWith(fcn(n,bases){ println("%6d: %s".fmt(n,bases.concat())) }, dna.walker().walk.fp(50)).pump(Void); // .pump forces the iterator }</~~lang~~syntaxhighlight> {{out}} <pre> Line 2,896 ⟶ 4,254: Base Counts: 191 : A(49) C(45) G(57) T(40) </pre> ~~=={{header\|Rust}}==~~ ~~<lang Rust>~~ ~~use rand::prelude::;~~ ~~use std::collections::HashMap;~~ ~~use std::fmt::{Display, Formatter, Error};~~ ~~pub struct Seq<'a> {~~ ~~alphabet: Vec<&'a str>,~~ ~~distr: rand::distributions::Uniform<usize>,~~ ~~pos_distr: rand::distributions::Uniform<usize>,~~ ~~seq: Vec<&'a str>,~~ } ~~impl Display for Seq<'_> {~~ ~~fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {~~ ~~let pretty: String = self.seq~~ ~~.iter()~~ ~~.enumerate()~~ ~~.map(\|(i, nt)\| if (i + 1) % 60 == 0 { format!("{}\n", nt) } else { nt.to_string() })~~ ~~.collect();~~ ~~let counts_hm = self.seq~~ ~~.iter()~~ ~~.fold(HashMap::<&str, usize>::new(), \|mut m, nt\| {~~ m.entry(nt).or_default() += 1; m ~~});~~ ~~let mut counts_vec: Vec<(&str, usize)> = counts_hm.into_iter().collect();~~ ~~counts_vec.sort_by(\|a, b\| a.0.cmp(&b.0));~~ ~~let counts_string = counts_vec~~ ~~.iter()~~ ~~.fold(String::new(), \|mut counts_string, (nt, count)\| {~~ ~~counts_string += &format!("{} = {}\n", nt, count);~~ ~~counts_string~~ ~~});~~ ~~write!(f, "Seq:\n{}\n\nLength: {}\n\nCounts:\n{}", pretty, self.seq.len(), counts_string)~~ } } ~~impl Seq<'_> {~~ ~~pub fn new(alphabet: Vec<&str>, len: usize) -> Seq {~~ ~~let distr = rand::distributions::Uniform::new_inclusive(0, alphabet.len() - 1);~~ ~~let pos_distr = rand::distributions::Uniform::new_inclusive(0, len - 1);~~ ~~let seq: Vec<&str> = (0..len)~~ ~~.map(\|_\| {~~ ~~alphabet[thread_rng().sample(distr)]~~ }) ~~.collect();~~ ~~Seq { alphabet, distr, pos_distr, seq }~~ } ~~pub fn insert(&mut self) {~~ ~~let pos = thread_rng().sample(self.pos_distr);~~ ~~let nt = self.alphabet[thread_rng().sample(self.distr)];~~ ~~println!("Inserting {} at position {}", nt, pos);~~ ~~self.seq.insert(pos, nt);~~ } ~~pub fn delete(&mut self) {~~ ~~let pos = thread_rng().sample(self.pos_distr);~~ ~~println!("Deleting {} at position {}", self.seq[pos], pos);~~ ~~self.seq.remove(pos);~~ } ~~pub fn swap(&mut self) {~~ ~~let pos = thread_rng().sample(self.pos_distr);~~ ~~let cur_nt = self.seq[pos];~~ ~~let new_nt = self.alphabet[thread_rng().sample(self.distr)];~~ ~~println!("Replacing {} at position {} with {}", cur_nt, pos, new_nt);~~ ~~self.seq[pos] = new_nt;~~ } } ~~fn main() {~~ ~~let mut seq = Seq::new(vec!["A", "C", "T", "G"], 200);~~ ~~println!("Initial sequnce:\n{}", seq);~~ ~~let mut_distr = rand::distributions::Uniform::new_inclusive(0, 2);~~ ~~for _ in 0..10 {~~ ~~let mutation = thread_rng().sample(mut_distr);~~ ~~if mutation == 0 {~~ ~~seq.insert()~~ ~~} else if mutation == 1 {~~ ~~seq.delete()~~ ~~} else {~~ ~~seq.swap()~~ } } ~~println!("\nMutated sequence:\n{}", seq);~~ } ~~</lang>~~ ~~{{out}}~~ ~~<pre>~~ ~~Initial sequnce:~~ ~~Seq:~~ ~~TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACACCGTGTAGAGGGGATTTGTCAGGA~~ ~~CACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGT~~ ~~CTGCCGAAGTGACGAAGTGCACGTTATAGCTCTATTAAGTATGTTCGTTAACAGGTATTA~~ ~~ATGCTCTTAGCCAAGACCGT~~ ~~Length: 200~~ ~~Counts:~~ ~~A = 56~~ ~~C = 38~~ ~~G = 53~~ ~~T = 53~~ ~~Deleting C at position 197~~ ~~Inserting T at position 157~~ ~~Replacing C at position 149 with G~~ ~~Replacing A at position 171 with G~~ ~~Replacing T at position 182 with G~~ ~~Deleting C at position 124~~ ~~Inserting T at position 128~~ ~~Replacing G at position 175 with C~~ ~~Deleting A at position 35~~ ~~Replacing A at position 193 with G~~ ~~Mutated sequence:~~ ~~Seq:~~ ~~TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACCCGTGTAGAGGGGATTTGTCAGGAC~~ ~~ACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGTC~~ ~~TGCGAAGTTGACGAAGTGCACGTTATAGGTCTATTATAGTATGTTCGTTAGCAGCTATTA~~ ~~AGGCTCTTAGCCAGGACGT~~ ~~Length: 199~~ ~~Counts:~~ ~~A = 53~~ ~~C = 36~~ ~~G = 56~~ ~~T = 54</pre>~~