Burrows–Wheeler transform: Difference between revisions

Add Rust implementation
(Add Rust implementation)
Line 2,029:
--> Input can't contain STX or ETX
--></pre>
 
=={{header|Rust}}==
<lang rust>
use core::cmp::Ordering;
 
const STX: char = '\u{0002}';
const ETX: char = '\u{0003}';
 
// this compare uses simple alphabetical sort, but for the special characters (ETX, STX)
// it sorts them later than alphanumeric characters
pub fn special_cmp(lhs: &str, rhs: &str) -> Ordering {
let mut iter1 = lhs.chars();
let mut iter2 = rhs.chars();
 
loop {
match (iter1.next(), iter2.next()) {
(Some(lhs), Some(rhs)) => {
if lhs != rhs {
let is_lhs_special = lhs == ETX || lhs == STX;
let is_rhs_special = rhs == ETX || rhs == STX;
 
let result = if is_lhs_special == is_rhs_special {
lhs.cmp(&rhs)
} else if is_lhs_special {
Ordering::Greater
} else {
Ordering::Less
};
 
return result;
}
}
(Some(_), None) => return Ordering::Greater,
(None, Some(_)) => return Ordering::Less,
(None, None) => return lhs.cmp(&rhs),
}
}
}
 
fn burrows_wheeler_transform(input: &str) -> String {
let mut table: Vec<String> = vec![];
 
// add markers for the start and end
let input_string = format!("{}{}{}", STX, input, ETX);
 
// create all possible rotations
for (i, _) in input_string.char_indices() {
table.push(format!(
"{}{}",
&input_string[input_string.len() - 1 - i..],
&input_string[0..input_string.len() - 1 - i]
));
}
 
// sort rows alphabetically
table.sort_unstable_by(|lhs, rhs| special_cmp(&lhs, &rhs));
 
// return the last column
table
.iter()
.map(|s| s.chars().nth_back(0).unwrap())
.collect::<String>()
}
 
fn inverse_burrows_wheeler_transform(input: &str) -> String {
let mut table: Vec<String> = vec![String::new(); input.len()];
for _ in 0..input.len() {
// insert the charatcers of the encoded input as a first column for each row
for (j, s) in table.iter_mut().enumerate() {
*s = format!("{}{}", input.chars().nth(j).unwrap(), s);
}
 
// sort rows alphabetically
table.sort_unstable_by(|lhs, rhs| special_cmp(&lhs, &rhs));
}
 
// return the row which has the end marker at the last position
table
.into_iter()
.filter(|s| s.ends_with(ETX))
.collect::<String>()
// remove start and markers
.replace(STX, "")
.replace(ETX, "")
}
 
fn main() {
let input = [
"banana",
"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES",
"TO BE OR NOT TO BE OR WANT TO BE OR NOT?",
];
for s in input.iter() {
let bwt = burrows_wheeler_transform(s);
let ibwt = inverse_burrows_wheeler_transform(&bwt);
println!("Input: {}", s);
println!("\tBWT: {}", bwt.replace(STX, "^").replace(ETX, "|"));
println!("\tInverse BWT: {}", ibwt);
}
}
</lang>
{{out}}
<pre>
Input: banana
BWT: bnn^aa|a
Inverse BWT: banana
Input: SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES
BWT: TEXYDST.E.IXIXIXXSSMPPS.B..E.^.UESFXDIIOIIIT|S
Inverse BWT: SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES
Input: TO BE OR NOT TO BE OR WANT TO BE OR NOT?
BWT: OOORREEETTRTW BBB ATTT NNOOONOO^ |?
Inverse BWT: TO BE OR NOT TO BE OR WANT TO BE OR NOT?
</pre>
 
=={{header|Scala}}==
Anonymous user