Run-length encoding
This page uses content from Wikipedia. The original article was at Run-length_encoding. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance) |
You are encouraged to solve this task according to the task description, using any language you may know.
Given a string containing uppercase characters (A-Z), compress repeated 'runs' of the same character by storing the length of that run, and provide a function to reverse the compression. The output can be anything, as long as you can recreate the input with it.
Example:
- Input:
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
- Output:
12W1B12W3B24W1B14W
Haskell
<lang haskell>-- needs Data.List for 'group' function import Data.List
-- Datatypes type Encoded = [(Int, Char)] -- An encoded String with form [(times, char), ...] type Decoded = String
-- Takes a decoded string and returns an encoded list of tuples rlencode :: Decoded -> Encoded rlencode s = zip (map length splitstr) (map head splitstr)
where splitstr = group s
-- Takes an encoded list of tuples and returns the associated decoded String rldecode :: Encoded -> Decoded rldecode (s:sx)
| sx == [] = decodeTuple s | otherwise = decodeTuple s ++ rldecode sx where repeatchar c n -- helper function to repeat a character, c, n times | n < 1 = [] | otherwise = c : repeatchar c (n - 1) decodeTuple t = repeatchar (snd t) (fst t)
main :: IO () main = do
-- Get input putStrLn "String to encode: " input <- getLine -- Output encoded and decoded versions of input let encoded = rlencode input decoded = rldecode encoded in putStrLn $ "Encoded: " ++ show encoded ++ "\nDecoded: " ++ show decoded</lang>
Java
<lang java>import java.util.regex.Matcher; import java.util.regex.Pattern;
public class RunLengthEncoding {
public String encode(String source) { StringBuffer dest = new StringBuffer(); for (int i = 0; i < source.length(); i++) { int runLength = 1; while( i+1 < source.length() && source.charAt(i) == source.charAt(i+1) ) { runLength++; i++; } dest.append(runLength); dest.append(source.charAt(i)); } return dest.toString(); }
public String decode(String source) { StringBuffer dest = new StringBuffer(); Pattern pattern = Pattern.compile("[0-9]+|[a-zA-Z]"); Matcher matcher = pattern.matcher(source);
while (matcher.find()) { int number = Integer.parseInt(matcher.group()); matcher.find(); while (number-- != 0) { dest.append(matcher.group()); } } return dest.toString(); }
public static void main(String[] args) {
RunLengthEncoding RLE = new RunLengthEncoding(); String example = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"; System.out.println(RLE.encode(example)); System.out.println(RLE.decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));
} }</lang>
Lisp
<lang lisp>(defun group-similar (sequence &key (test 'eql))
(loop for x in (rest sequence) with temp = (subseq sequence 0 1) if (funcall test (first temp) x) do (push x temp) else collect temp and do (setf temp (list x))))
(defun run-length-encode (sequence)
(mapcar (lambda (group) (list (first group) (length group))) (group-similar (coerce sequence 'list))))
(defun run-length-decode (sequence)
(reduce (lambda (s1 s2) (concatenate 'simple-string s1 s2)) (mapcar (lambda (elem) (make-string (second elem) :initial-element (first elem))) sequence)))
(run-length-encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW") (run-length-decode '((#\W 12) (#\B 1) (#\W 12) (#\B 3) (#\W 24) (#\B 1)))</lang>
Python
<lang python>def encode(input_string):
count = 1 prev = lst = [] for character in input_string: if character != prev: if prev: entry = (prev,count) lst.append(entry) #print lst count = 1 prev = character elif character == prev: count += 1 else: entry = (character,count) lst.append(entry) return lst
def decode(lst):
q = [] for i in range(len(lst)): z = 1 while z <= lst[i][1]: r = lst[i][0] q.append(r) z += 1 return "".join(q)
- Method call
encode("aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa") decode([('a', 5), ('h', 6), ('m', 7), ('u', 1), ('i', 7), ('a', 6)])</lang>