Bioinformatics/Sequence mutation: Difference between revisions
Content added Content deleted
(Add Common Lisp) |
(Added C++ solution) |
||
Line 316: | Line 316: | ||
Total:513 |
Total:513 |
||
</pre> |
|||
=={{header|C++}}== |
|||
<lang cpp>#include <array> |
|||
#include <iomanip> |
|||
#include <iostream> |
|||
#include <random> |
|||
#include <string> |
|||
class sequence_generator { |
|||
public: |
|||
sequence_generator(); |
|||
std::string generate_sequence(size_t length); |
|||
void mutate_sequence(std::string&); |
|||
static void print_sequence(std::ostream&, const std::string&); |
|||
enum class operation { change, erase, insert }; |
|||
void set_weight(operation, unsigned int); |
|||
private: |
|||
char get_random_base() { |
|||
return bases_[base_dist_(engine_)]; |
|||
} |
|||
operation get_random_operation(); |
|||
static const char bases_[]; |
|||
std::mt19937 engine_; |
|||
std::uniform_int_distribution<size_t> base_dist_; |
|||
std::array<unsigned int, 3> operation_weight_; |
|||
unsigned int total_weight_; |
|||
}; |
|||
const char sequence_generator::bases_[] = { 'A', 'C', 'G', 'T' }; |
|||
sequence_generator::sequence_generator() : engine_(std::random_device()()), |
|||
base_dist_(0, sizeof(bases_)/sizeof(bases_[0]) - 1), |
|||
total_weight_(operation_weight_.size()) { |
|||
operation_weight_.fill(1); |
|||
} |
|||
sequence_generator::operation sequence_generator::get_random_operation() { |
|||
std::uniform_int_distribution<unsigned int> op_dist(0, total_weight_ - 1); |
|||
unsigned int n = op_dist(engine_), op = 0, weight = 0; |
|||
for (; op < operation_weight_.size(); ++op) { |
|||
weight += operation_weight_[op]; |
|||
if (n < weight) |
|||
break; |
|||
} |
|||
return static_cast<operation>(op); |
|||
} |
|||
void sequence_generator::set_weight(operation op, unsigned int weight) { |
|||
total_weight_ -= operation_weight_[static_cast<size_t>(op)]; |
|||
operation_weight_[static_cast<size_t>(op)] = weight; |
|||
total_weight_ += weight; |
|||
} |
|||
std::string sequence_generator::generate_sequence(size_t length) { |
|||
std::string sequence; |
|||
sequence.reserve(length); |
|||
for (size_t i = 0; i < length; ++i) |
|||
sequence += get_random_base(); |
|||
return sequence; |
|||
} |
|||
void sequence_generator::mutate_sequence(std::string& sequence) { |
|||
std::uniform_int_distribution<size_t> dist(0, sequence.length() - 1); |
|||
size_t pos = dist(engine_); |
|||
char b; |
|||
switch (get_random_operation()) { |
|||
case operation::change: |
|||
b = get_random_base(); |
|||
std::cout << "Change base at position " << pos << " from " |
|||
<< sequence[pos] << " to " << b << '\n'; |
|||
sequence[pos] = b; |
|||
break; |
|||
case operation::erase: |
|||
std::cout << "Erase base " << sequence[pos] << " at position " |
|||
<< pos << '\n'; |
|||
sequence.erase(pos, 1); |
|||
break; |
|||
case operation::insert: |
|||
b = get_random_base(); |
|||
std::cout << "Insert base " << b << " at position " |
|||
<< pos << '\n'; |
|||
sequence.insert(pos, 1, b); |
|||
break; |
|||
} |
|||
} |
|||
void sequence_generator::print_sequence(std::ostream& out, const std::string& sequence) { |
|||
constexpr size_t base_count = sizeof(bases_)/sizeof(bases_[0]); |
|||
size_t n = sequence.length(); |
|||
size_t count[base_count] = { 0 }; |
|||
for (size_t i = 0; i < n; ++i) { |
|||
if (i % 50 == 0) { |
|||
if (i != 0) |
|||
out << '\n'; |
|||
out << std::setw(3) << i << ": "; |
|||
} |
|||
out << sequence[i]; |
|||
for (size_t j = 0; j < base_count; ++j) { |
|||
if (bases_[j] == sequence[i]) { |
|||
++count[j]; |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
out << '\n'; |
|||
out << "Base counts:\n"; |
|||
size_t total = 0; |
|||
for (size_t j = 0; j < base_count; ++j) { |
|||
total += count[j]; |
|||
out << bases_[j] << ": " << count[j] << ", "; |
|||
} |
|||
out << "Total: " << total << '\n'; |
|||
} |
|||
int main() { |
|||
sequence_generator gen; |
|||
gen.set_weight(sequence_generator::operation::change, 2); |
|||
std::string sequence = gen.generate_sequence(250); |
|||
std::cout << "Initial sequence:\n"; |
|||
sequence_generator::print_sequence(std::cout, sequence); |
|||
constexpr int count = 10; |
|||
for (int i = 0; i < count; ++i) |
|||
gen.mutate_sequence(sequence); |
|||
std::cout << "After " << count << " mutations:\n"; |
|||
sequence_generator::print_sequence(std::cout, sequence); |
|||
return 0; |
|||
}</lang> |
|||
{{out}} |
|||
<pre> |
|||
Initial sequence: |
|||
0: CATATCTGCGTAAGGCGTCGAATCCTTAGAGAAAACTCGCCAAACGCGCT |
|||
50: AGCCAAGACTTAATTAAAGGCTGGCCACATAACAGTAGTACTGCAAGGAT |
|||
100: GACGTGACTACAACGTGGAATACTCTATCTGATGAGCCCCACGTGGGCCA |
|||
150: ACCTTCCAATGCGGCGTCTTGCAGTCTTCGGACTTTGCCTCTACTAGGAG |
|||
200: TAGCCATGACGAGTGGTGAGGCGGAGGGACCAATTCCGCACTTCGAATCG |
|||
Base counts: |
|||
A: 67, C: 65, G: 64, T: 54, Total: 250 |
|||
Change base at position 39 from C to C |
|||
Erase base T at position 194 |
|||
Insert base T at position 70 |
|||
Insert base C at position 190 |
|||
Insert base T at position 45 |
|||
Erase base A at position 111 |
|||
Change base at position 96 from A to C |
|||
Change base at position 113 from A to C |
|||
Change base at position 5 from C to A |
|||
Change base at position 44 from C to T |
|||
After 10 mutations: |
|||
0: CATATATGCGTAAGGCGTCGAATCCTTAGAGAAAACTCGCCAAATTGCGC |
|||
50: TAGCCAAGACTTAATTAAAGGTCTGGCCACATAACAGTAGTACTGCCAGG |
|||
100: ATGACGTGACTCACCGTGGAATACTCTATCTGATGAGCCCCACGTGGGCC |
|||
150: AACCTTCCAATGCGGCGTCTTGCAGTCTTCGGACTTTGCCCTCTACAGGA |
|||
200: GTAGCCATGACGAGTGGTGAGGCGGAGGGACCAATTCCGCACTTCGAATC |
|||
250: G |
|||
Base counts: |
|||
A: 65, C: 66, G: 64, T: 56, Total: 251 |
|||
</pre> |
</pre> |
||