Longest common subsequence: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎{{header|D}}: ++ fortran)
Line 176: Line 176:
writefln(lcsi("thisisatest","testing123testing")) ;
writefln(lcsi("thisisatest","testing123testing")) ;
}</lang>
}</lang>

=={{header|Fortran}}==
{{works with|Fortran|95}}
Using the <tt>iso_varying_string</tt> module which can be found [http://www.fortran.com/iso_varying_string.f95 here] (or equivalent module conforming to the ISO/IEC 1539-2:2000 API or to a subset according to the need of this code: <code>char</code>, <code>len</code>, <code>//</code>, <code>extract</code>, <code>==</code>, <code>=</code>)

<lang fortran>program lcstest
use iso_varying_string
implicit none

type(varying_string) :: s1, s2

s1 = "thisisatest"
s2 = "testing123testing"
print *, char(lcs(s1, s2))

s1 = "1234"
s2 = "1224533324"
print *, char(lcs(s1, s2))

contains

recursive function lcs(a, b) result(l)
type(varying_string) :: l
type(varying_string), intent(in) :: a, b

type(varying_string) :: x, y

l = ""
if ( (len(a) == 0) .or. (len(b) == 0) ) return
if ( extract(a, len(a), len(a)) == extract(b, len(b), len(b)) ) then
l = lcs(extract(a, 1, len(a)-1), extract(b, 1, len(b)-1)) // extract(a, len(a), len(a))
else
x = lcs(a, extract(b, 1, len(b)-1))
y = lcs(extract(a, 1, len(a)-1), b)
if ( len(x) > len(y) ) then
l = x
else
l = y
end if
end if
end function lcs

end program lcstest</lang>


=={{header|Haskell}}==
=={{header|Haskell}}==

Revision as of 17:01, 23 March 2009

Longest common subsequence is a programming puzzle. It lays out a problem which Rosetta Code users are encouraged to solve, using languages and techniques they know. Multiple approaches are not discouraged, so long as the puzzle guidelines are followed. For other Puzzles, see Category:Puzzles.

The longest common subsequence (or LCS) of groups A and B is the longest group of elements from A and B that are common between the two groups and in the same order in each group. For example, the sequences "1234" and "1224533324" have an LCS of "1234":

1234
1224533324

For a string example, consider the sequences "thisisatest" and "testing123testing". An LCS would be "tsitest":

thisisatest
testing123testing

In this puzzle, your code only needs to deal with strings. Write a function which returns an LCS of two strings (case-sensitive). You don't need to show multiple LCS's.

Ada

Using recursion: <lang ada> with Ada.Text_IO; use Ada.Text_IO;

procedure Test_LCS is

  function LCS (A, B : String) return String is
  begin
     if A'Length = 0 or else B'Length = 0 then
        return "";
     elsif A (A'Last) = B (B'Last) then
        return LCS (A (A'First..A'Last - 1), B (B'First..B'Last - 1)) & A (A'Last);
     else
        declare
           X : String renames LCS (A, B (B'First..B'Last - 1));
           Y : String renames LCS (A (A'First..A'Last - 1), B);
        begin
           if X'Length > Y'Length then
              return X;
           else
              return Y;
           end if;
        end;
     end if;
  end LCS;

begin

  Put_Line (LCS ("thisisatest", "testing123testing"));

end Test_LCS; </lang> Sample output:

tsitest

Non-recursive solution: <lang ada> with Ada.Text_IO; use Ada.Text_IO;

procedure Test_LCS is

  function LCS (A, B : String) return String is
     L : array (A'First..A'Last + 1, B'First..B'Last + 1) of Natural;
  begin
     for I in L'Range (1) loop
        L (I, B'First) := 0;
     end loop;
     for J in L'Range (2) loop
        L (A'First, J) := 0;
     end loop;
     for I in A'Range loop
        for J in B'Range loop
           if A (I) = B (J) then
              L (I + 1, J + 1) := L (I, J) + 1;
           else
              L (I + 1, J + 1) := Natural'Max (L (I + 1, J), L (I, J + 1));
           end if;
        end loop;
     end loop;
     declare
        I : Integer := L'Last (1);
        J : Integer := L'Last (2);
        R : String (1..Integer'Max (A'Length, B'Length));
        K : Integer := R'Last;
     begin
        while I > L'First (1) and then J > L'First (2) loop
           if L (I, J) = L (I - 1, J) then
              I := I - 1;
           elsif L (I, J) = L (I, J - 1) then
              J := J - 1;
           else
              I := I - 1;
              J := J - 1;
              R (K) := A (I);
              K := K - 1;
           end if;
        end loop;
        return R (K + 1..R'Last);
     end;
  end LCS;

begin

  Put_Line (LCS ("thisisatest", "testing123testing"));

end Test_LCS; </lang> Sample output:

tsitest

ALGOL 68

Translation of: Ada
Works with: ALGOL 68 version Standard - no extensions to language used
Works with: ALGOL 68G version Any - tested with release mk15-0.8b.fc9.i386
Works with: ELLA ALGOL 68 version Any (with appropriate job cards) - tested with release 1.8.8d.fc9.i386

<lang> main:(

  PROC lcs = (STRING a, b)STRING:
  BEGIN
     IF UPB a = 0 OR UPB b = 0 THEN
        ""
     ELIF a [UPB a] = b [UPB b] THEN
        lcs (a [:UPB a - 1], b [:UPB b - 1]) + a [UPB a]
     ELSE
        STRING x = lcs (a, b [:UPB b - 1]);
        STRING y = lcs (a [:UPB a - 1], b);
        IF UPB x > UPB y THEN x ELSE y FI
     FI
  END # lcs #;
  print((lcs ("thisisatest", "testing123testing"), new line))

) </lang> Output:

tsitest

BASIC

Works with: QuickBasic version 4.5
Translation of: Java

<lang qbasic>FUNCTION lcs$ (a$, b$)

   IF LEN(a$) = 0 OR LEN(b$) = 0 THEN

lcs$ = ""

   ELSEIF RIGHT$(a$, 1) = RIGHT$(b$, 1) THEN

lcs$ = lcs$(LEFT$(a$, LEN(a$) - 1), LEFT$(b$, LEN(b$) - 1)) + RIGHT$(a$, 1)

   ELSE

x$ = lcs$(a$, LEFT$(b$, LEN(b$) - 1)) y$ = lcs$(LEFT$(a$, LEN(a$) - 1), b$) IF LEN(x$) > LEN(y$) THEN lcs$ = x$ ELSE lcs$ = y$ END IF

   END IF

END FUNCTION</lang>

D

<lang d>module lcs ; import std.stdio ;

T[] lcsr(T)(T[] a, T[] b) { // recursive

 if(a.length == 0 || b.length == 0) return null ;
 T[] x = a[1..$] , y = b[1..$] ; 
 if(a[0] == b[0]) return a[0] ~ lcsr(x, y) ;
 x = lcsr(x, b) ;  y = lcsr(a, y) ;
 return x.length > y.length ? x : y ;

}

T imax(T)(T a, T b) { return a > b ? a : b ; }

T[] lcsi(T)(T[] a, T[] b) { // dynamic programming

 int i,j, m = a.length , n = b.length ;
 int[][] L = new int[][](m + 1,n + 1);
 T[] res ;
 for(i = 0 ; i < m ; i++)
   for(j = 0 ; j < n ; j++)
     L[i+1][j+1] = (a[i] == b[j]) ? 1 + L[i][j] : imax(L[i+1][j], L[i][j+1]) ;
 while(i >0 && j >0)
   if(a[i-1] == b[j-1]) { 
     res ~= a[i-1] ; i-- ; j-- ; 
   } else 
     if (L[i][j-1] < L[i-1][j]) 
       i-- ;
     else 
       j-- ;
 return res.reverse ;

}

void main(string[] args) {

 writefln(lcsr("thisisatest","testing123testing")) ;
 writefln(lcsi("thisisatest","testing123testing")) ;

}</lang>

Fortran

Works with: Fortran version 95

Using the iso_varying_string module which can be found here (or equivalent module conforming to the ISO/IEC 1539-2:2000 API or to a subset according to the need of this code: char, len, //, extract, ==, =)

<lang fortran>program lcstest

 use iso_varying_string
 implicit none
 type(varying_string) :: s1, s2
 s1 = "thisisatest"
 s2 = "testing123testing"
 print *, char(lcs(s1, s2))
 s1 = "1234"
 s2 = "1224533324"
 print *, char(lcs(s1, s2))

contains

 recursive function lcs(a, b) result(l)
   type(varying_string) :: l
   type(varying_string), intent(in) :: a, b
   type(varying_string) :: x, y
   l = ""
   if ( (len(a) == 0) .or. (len(b) == 0) ) return
   if ( extract(a, len(a), len(a)) == extract(b, len(b), len(b)) ) then
      l = lcs(extract(a, 1, len(a)-1), extract(b, 1, len(b)-1)) // extract(a, len(a), len(a))
   else
      x = lcs(a, extract(b, 1, len(b)-1))
      y = lcs(extract(a, 1, len(a)-1), b)
      if ( len(x) > len(y) ) then
         l = x
      else
         l = y
      end if
   end if
 end function lcs

end program lcstest</lang>

Haskell

The Wikipedia solution translates directly into Haskell, with the only difference that equal characters are added in front:

longest xs ys = if length xs > length ys then xs else ys

lcs [] _ = []
lcs _ [] = []
lcs (x:xs) (y:ys) 
  | x == y    = x : lcs xs ys
  | otherwise = longest (lcs (x:xs) ys) (lcs xs (y:ys))

Memoization (aka dynamic programming) of that uses zip to make both the index and the character available:

import Data.Array

lcs xs ys = a!(0,0) where
  n = length xs
  m = length ys
  a = array ((0,0),(n,m)) $ l1 ++ l2 ++ l3
  l1 = [((i,m),[]) | i <- [0..n]]
  l2 = [((n,j),[]) | j <- [0..m]]
  l3 = [((i,j), f x y i j) | (x,i) <- zip xs [0..], (y,j) <- zip ys [0..]]
  f x y i j 
    | x == y    = x : a!(i+1,j+1)
    | otherwise = longest (a!(i,j+1)) (a!(i+1,j))

Both solutions work of course not only with strings, but also with any other list. Example:

*Main> lcs "thisisatest" "testing123testing"
"tsitest"

J

lcs=: dyad define
 |.x{~ 0{"1 cullOne^:_ (\:~~ +/@|:) 4$.$. x =/ y
)
cullOne=: verb define
 if. (#y) = First0=.0(= i. 1:) 1,*./|: 2 >/\ y 
 do. y  else. y #~ 0 First0}(#y)#1  end.
)

Java

Recursion

This is not a particularly fast algorithm, but it gets the job done eventually. The speed is a result of many recursive function calls. <lang java>public static String lcs(String a, String b){

   if(a.length() == 0 || b.length() == 0){
       return "";
   }else if(a.charAt(a.length()-1) == b.charAt(b.length()-1)){
       return lcs(a.substring(0,a.length()-1),b.substring(0,b.length()-1))
           + a.charAt(a.length()-1);
   }else{
       String x = lcs(a, b.substring(0,b.length()-1));
       String y = lcs(a.substring(0,a.length()-1), b);
       return (x.length() > y.length()) ? x : y;
   }

}</lang>

Dynamic Programming

<lang java>public static String lcs(String a, String b) {

   int[][] lengths = new int[a.length()+1][b.length()+1];
   // row 0 and column 0 are initialized to 0 already
   for (int i = 0; i < a.length(); i++)
       for (int j = 0; j < b.length(); j++)
           if (a.charAt(i) == b.charAt(j))
               lengths[i+1][j+1] = lengths[i][j] + 1;
           else
               lengths[i+1][j+1] =
                   Math.max(lengths[i+1][j], lengths[i][j+1]);
   // read the substring out from the matrix
   StringBuffer sb = new StringBuffer();
   for (int x = a.length(), y = b.length();
        x != 0 && y != 0; ) {
       if (lengths[x][y] == lengths[x-1][y])
           x--;
       else if (lengths[x][y] == lengths[x][y-1])
           y--;
       else {
           assert a.charAt(x-1) == b.charAt(y-1);
           sb.append(a.charAt(x-1));
           x--;
           y--;
       }
   }
   return sb.reverse().toString();

}</lang>

OCaml

Recursion

from Haskell <lang ocaml>let longest xs ys = if List.length xs > List.length ys then xs else ys

let rec lcs a b = match a, b with

  [], _
| _, []        -> []
| x::xs, y::ys ->
   if x = y then
     x :: lcs xs ys
   else 
     longest (lcs a ys) (lcs xs b)</lang>

Dynamic programming

<lang ocaml>let lcs xs' ys' =

 let xs = Array.of_list xs'
 and ys = Array.of_list ys' in
 let n = Array.length xs
 and m = Array.length ys in
 let a = Array.make_matrix (n+1) (m+1) [] in
 for i = n-1 downto 0 do
   for j = m-1 downto 0 do
     a.(i).(j) <- if xs.(i) = ys.(j) then
                    xs.(i) :: a.(i+1).(j+1)
                  else
                    longest a.(i).(j+1) a.(i+1).(j)
   done
 done;
 a.(0).(0)</lang>

Because both solutions only work with lists, here are some functions to convert to and from strings: <lang ocaml>let list_of_string str =

 let result = ref [] in
 String.iter (fun x -> result := x :: !result)
             str;
 List.rev !result

let string_of_list lst =

 let result = String.create (List.length lst) in
 ignore (List.fold_left (fun i x -> result.[i] <- x; i+1) 0 lst);
 result</lang>

Both solutions work. Example:

# string_of_list (lcs (list_of_string "thisisatest")
                      (list_of_string "testing123testing"));;
- : string = "tsitest"

Python

Recursion

This solution is similar to the Haskell's one. It is slow. <lang python>def lcs(xstr, ystr):

   """
   >>> lcs('thisisatest', 'testing123testing')
   'tsitest'
   """
   if not xstr or not ystr:
       return ""
   x, xs, y, ys = xstr[0], xstr[1:], ystr[0], ystr[1:]
   if x == y:
       return x + lcs(xs, ys)
   else:
       return max(lcs(xstr, ys), lcs(xs, ystr), key=len)</lang>

Test it: <lang python> if __name__=="__main__":

   import doctest; doctest.testmod()

</lang>

Dynamic Programming

Translation of: Java

<lang python>def lcs(a, b):

   lengths = [[0 for j in range(len(b)+1)] for i in range(len(a)+1)]
   # row 0 and column 0 are initialized to 0 already
   for i in range(len(a)):
       for j in range(len(b)):
           if a[i] == b[j]:
               lengths[i+1][j+1] = lengths[i][j] + 1
           else:
               lengths[i+1][j+1] = \
                   max(lengths[i+1][j], lengths[i][j+1])
   # read the substring out from the matrix
   result = ""
   x, y = len(a), len(b)
   while x != 0 and y != 0:
       if lengths[x][y] == lengths[x-1][y]:
           x -= 1
       elif lengths[x][y] == lengths[x][y-1]:
           y -= 1
       else:
           assert a[x-1] == b[y-1]
           result = a[x-1] + result
           x -= 1
           y -= 1
   return result</lang>