Run-length encoding

From Rosetta Code
Revision as of 10:01, 24 April 2009 by rosettacode>DataWraith (Import code from Wikipedia)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
This page uses content from Wikipedia. The original article was at Run-length_encoding. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance)
Task
Run-length encoding
You are encouraged to solve this task according to the task description, using any language you may know.

Given a string containing uppercase characters (A-Z), compress repeated 'runs' of the same character by storing the length of that run, and provide a function to reverse the compression. The output can be anything, as long as you can recreate the input with it.

Example:

Input: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Output: 12W1B12W3B24W1B14W

Haskell

<lang haskell>-- needs Data.List for 'group' function import Data.List

-- Datatypes type Encoded = [(Int, Char)] -- An encoded String with form [(times, char), ...] type Decoded = String

-- Takes a decoded string and returns an encoded list of tuples rlencode :: Decoded -> Encoded rlencode s = zip (map length splitstr) (map head splitstr)

 where splitstr = group s

-- Takes an encoded list of tuples and returns the associated decoded String rldecode :: Encoded -> Decoded rldecode (s:sx)

 | sx == []  = decodeTuple s
 | otherwise = decodeTuple s ++ rldecode sx
   where repeatchar c n  -- helper function to repeat a character, c, n times
           | n < 1     = []
           | otherwise = c : repeatchar c (n - 1)
         decodeTuple t = repeatchar (snd t) (fst t)

main :: IO () main = do

 -- Get input
 putStrLn "String to encode: "
 input <- getLine
 -- Output encoded and decoded versions of input
 let encoded = rlencode input
     decoded = rldecode encoded
   in putStrLn $ "Encoded: " ++ show encoded ++ "\nDecoded: " ++ show decoded</lang>

Java

<lang java>import java.util.regex.Matcher; import java.util.regex.Pattern;

public class RunLengthEncoding {

public String encode(String source) { StringBuffer dest = new StringBuffer(); for (int i = 0; i < source.length(); i++) { int runLength = 1; while( i+1 < source.length() && source.charAt(i) == source.charAt(i+1) ) { runLength++; i++; } dest.append(runLength); dest.append(source.charAt(i)); } return dest.toString(); }


public String decode(String source) { StringBuffer dest = new StringBuffer(); Pattern pattern = Pattern.compile("[0-9]+|[a-zA-Z]"); Matcher matcher = pattern.matcher(source);

while (matcher.find()) { int number = Integer.parseInt(matcher.group()); matcher.find(); while (number-- != 0) { dest.append(matcher.group()); } } return dest.toString(); }

public static void main(String[] args) {

            RunLengthEncoding RLE = new RunLengthEncoding();
            String example = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
            System.out.println(RLE.encode(example));
            System.out.println(RLE.decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));

} }</lang>

Lisp

<lang lisp>(defun group-similar (sequence &key (test 'eql))

 (loop for x in (rest sequence)
       with temp = (subseq sequence 0 1)
       if (funcall test (first temp) x)
         do (push x temp)
       else
         collect temp
         and do (setf temp (list x))))

(defun run-length-encode (sequence)

 (mapcar (lambda (group) (list (first group) (length group)))
         (group-similar (coerce sequence 'list))))

(defun run-length-decode (sequence)

 (reduce (lambda (s1 s2) (concatenate 'simple-string s1 s2))
         (mapcar (lambda (elem)
                   (make-string (second elem)
                                :initial-element
                                (first elem)))
                 sequence)))

(run-length-encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW") (run-length-decode '((#\W 12) (#\B 1) (#\W 12) (#\B 3) (#\W 24) (#\B 1)))</lang>

Python

<lang python>def encode(input_string):

   count = 1
   prev = 
   lst = []
   for character in input_string:
       if character != prev:
           if prev:
               entry = (prev,count)
               lst.append(entry)
               #print lst
           count = 1
           prev = character
       elif character == prev:
           count += 1
   else:
       entry = (character,count)
       lst.append(entry)
   return lst


def decode(lst):

   q = []
   for i in range(len(lst)):
       z = 1
       while z <= lst[i][1]:
           r = lst[i][0]
           q.append(r)
           z += 1
   return "".join(q)
  1. Method call

encode("aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa") decode([('a', 5), ('h', 6), ('m', 7), ('u', 1), ('i', 7), ('a', 6)])</lang>