Shortest common supersequence

From Rosetta Code
Revision as of 08:21, 27 September 2016 by rosettacode>Craigd (→‎{{header|Tcl}}: added zkl)
Shortest common supersequence is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

The   shortest common supersequence   is a problem closely related to the   longest common subsequence,   which you can use as an external function for this task.


Task

Given two strings     and     find the shortest possible sequence     which is the shortest common super-sequence of     and     where both     and     are a subsequence of     Defined as such,     is not necessarily unique.


Demonstrate this by printing     where

   abcbdab 

and

   bdcaba



C

The C99 code here isn't all that different from Levenstein distance calculation. <lang c>#include <stdio.h>

  1. include <string.h>

typedef struct link link_t; struct link { int len; char letter; link_t *next; };

// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough. int scs(char *x, char *y, char *out) { int lx = strlen(x), ly = strlen(y); link_t lnk[ly + 1][lx + 1];

for (int i = 0; i < ly; i++) lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};

for (int j = 0; j < lx; j++) lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};

lnk[ly][lx] = (link_t) {0};

for (int i = ly; i--; ) { for (int j = lx; j--; ) { link_t *lp = &lnk[i][j]; if (y[i] == x[j]) { lp->next = &lnk[i+1][j+1]; lp->letter = x[j]; } else if (lnk[i][j+1].len < lnk[i+1][j].len) { lp->next = &lnk[i][j+1]; lp->letter = x[j]; } else { lp->next = &lnk[i+1][j]; lp->letter = y[i]; } lp->len = lp->next->len + 1; } }

for (link_t *lp = &lnk[0][0]; lp; lp = lp->next) *out++ = lp->letter;

return 0; }

int main(void) { char x[] = "abcbdab", y[] = "bdcaba", res[128]; scs(x, y, res); printf("SCS(%s, %s) -> %s\n", x, y, res); return 0; }</lang>

Output:
SCS(abcbdab, bdcaba) -> abdcabdab

D

Translation of: Racket

<lang d>import std.stdio, std.functional, std.array, std.range;

dstring scs(in dstring x, in dstring y) nothrow @safe {

   alias mScs = memoize!scs;
   if (x.empty) return y;
   if (y.empty) return x;
   if (x.front == y.front)
       return x.front ~ mScs(x.dropOne, y.dropOne);
   if (mScs(x, y.dropOne).length <= mScs(x.dropOne, y).length)
       return y.front ~ mScs(x, y.dropOne);
   else
       return x.front ~ mScs(x.dropOne, y);

}

void main() @safe {

   scs("abcbdab", "bdcaba").writeln;

}</lang>

Output:
abdcabdab

Elixir

Translation of: Ruby
Works with: Elixir version 1.3

uses 'LCS' from here <lang elixir>defmodule SCS do

 def scs(u, v) do
   lcs = LCS.lcs(u, v) |> to_charlist
   scs(to_charlist(u), to_charlist(v), lcs, []) |> to_string
 end
 
 defp scs(u, v, [], res), do: Enum.reverse(res) ++ u ++ v
 defp scs([h|ut], [h|vt], [h|lt], res),      do: scs(ut, vt, lt, [h|res])
 defp scs([h|_]=u, [vh|vt], [h|_]=lcs, res), do: scs(u, vt, lcs, [vh|res])
 defp scs([uh|ut], v, lcs, res),             do: scs(ut, v, lcs, [uh|res])

end

u = "abcbdab" v = "bdcaba" IO.puts "SCS(#{u}, #{v}) = #{SCS.scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abdcabdab

Perl

<lang perl>sub lcs { # longest common subsequence

   my( $u, $v ) = @_;
   return  unless length($u) and length($v);
   my $longest = ;
   for my $first ( 0..length($u)-1 ) {
       my $char = substr $u, $first, 1;
       my $i = index( $v, $char );
       next if -1==$i;
       my $next = $char;
       $next .= lcs( substr( $u, $first+1), substr( $v, $i+1 ) ) unless $i==length($v)-1;
       $longest = $next if length($next) > length($longest);
   }
   return $longest;

}

sub scs { # shortest common supersequence

   my( $u, $v ) = @_;
   my @lcs = split //, lcs $u, $v;
   my $pat = "(.*)".join("(.*)",@lcs)."(.*)"; 
   my @u = $u =~ /$pat/;
   my @v = $v =~ /$pat/;
   my $scs = shift(@u).shift(@v);
   $scs .= $_.shift(@u).shift(@v) for @lcs;
   return $scs;

}

my $u = "abcbdab"; my $v = "bdcaba"; printf "Strings %s %s\n", $u, $v; printf "Longest common subsequence: %s\n", lcs $u, $v; printf "Shortest common supersquence: %s\n", scs $u, $v; </lang>

Output:
Strings abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Racket

Translation of: C

This program is based on the C implementation, but use memorization instead of dynamic programming. More explanations about the memorization part in http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html .

<lang Racket>#lang racket

(struct link (len letters))

(define (link-add li n letter)

 (link (+ n (link-len li)) 
       (cons letter (link-letters li))))

(define (memoize f)

 (local ([define table (make-hash)])
   (lambda args
     (dict-ref! table args (λ () (apply f args))))))

(define scs/list

 (memoize 
  (lambda (x y)
    (cond
      [(null? x)
       (link (length y) y)]
      [(null? y)
       (link (length x) x)]
      [(eq? (car x) (car y))
       (link-add (scs/list (cdr x) (cdr y)) 1 (car x))]
      [(<= (link-len (scs/list x (cdr y)))
           (link-len (scs/list (cdr x) y)))
       (link-add (scs/list x (cdr y)) 1 (car y))]
      [else
       (link-add (scs/list (cdr x) y) 1 (car x))]))))

(define (scs x y)

 (list->string (link-letters (scs/list (string->list x) (string->list y)))))

(scs "abcbdab" "bdcaba")</lang>

Output:
"abdcabdab"

Ruby

Translation of: Tcl

uses 'lcs' from here <lang ruby>require 'lcs'

def scs(u, v)

 lcs = lcs(u, v)
 scs = ""
 # Iterate over the characters until LCS processed
 ui = vi = li = 0
 while li < lcs.size
   uc = u[ui]
   vc = v[vi]
   lc = lcs[li]
   if uc==lc
     if vc==lc
       # Part of the LCS, so consume from all strings
       scs << lc
       ui += 1
       li += 1
     else
       # char of u = char of LCS, but char of LCS v doesn't so consume just that
       scs << vc
     end
     vi += 1
   else
     # char of u != char of LCS, so consume just that
     scs << uc
     ui += 1
   end
 end
 # append remaining characters, which are not in common
 scs << u[ui..-1] << v[vi..-1]

end

u = "abcbdab" v = "bdcaba" puts "SCS(#{u}, #{v}) = #{scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abcbdcaba

Sidef

Translation of: Perl

Uses the lcs function defined here. <lang ruby>func scs(u, v) {

   var ls = lcs(u, v).chars
   var pat = Regex('(.*)'+ls.join('(.*)')+'(.*)')
   u.scan!(pat)
   v.scan!(pat)
   var ss = (u.shift + v.shift)
   ls.each { |c| ss += (c + u.shift + v.shift) }
   return ss

}

say scs("abcbdab", "bdcaba")</lang>

Output:
abdcabdab

Tcl

This example uses either of the lcs implementations from here, assumed renamed to lcs… <lang tcl>proc scs {u v} {

   set lcs [lcs $u $v]
   set scs ""
   # Iterate over the characters until LCS processed
   for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {

set uc [string index $u $ui] set vc [string index $v $vi] set lc [string index $lcs $li] if {$uc eq $lc} { if {$vc eq $lc} { # Part of the LCS, so consume from all strings append scs $lc incr ui incr li } else { # char of u = char of LCS, but char of LCS v doesn't so consume just that append scs $vc } incr vi } else { # char of u != char of LCS, so consume just that append scs $uc incr ui }

   }
   # append remaining characters, which are not in common
   append scs [string range $u $ui end] [string range $v $vi end]
   return $scs

}</lang> Demonstrating: <lang tcl>set u "abcbdab" set v "bdcaba" puts "SCS($u,$v) = [scs $u $v]"</lang>

Output:
SCS(abcbdab,bdcaba) = abdcabdab

zkl

Translation of: C

<lang zkl>class Link{ var len,letter,next;

  fcn init(l=0,c="",lnk=Void){ len,letter,next=l,c,lnk; }

} fcn scs(x,y,out){

  lx,ly:=x.len(),y.len();
  lnk:=(ly+1).pump(List,'wrap(_){ (lx+1).pump(List(),Link.create) });
  foreach i in (ly){ lnk[i][lx]=Link(ly-i, y[i]) }
  foreach j in (lx){ lnk[ly][j]=Link(lx-j, x[j]) }

  foreach i,j in ([ly-1..0,-1],[lx-1..0,-1]){
     lp:=lnk[i][j];
     if (y[i]==x[j]){

lp.next =lnk[i+1][j+1]; lp.letter=x[j];

     }else if(lnk[i][j+1].len < lnk[i+1][j].len){

lp.next =lnk[i][j+1]; lp.letter=x[j];

     }else{

lp.next =lnk[i+1][j]; lp.letter=y[i];

     }
     lp.len=lp.next.len + 1;
  }
  lp:=lnk[0][0]; while(lp){ out.write(lp.letter); lp=lp.next; }
  out.close()

}</lang> <lang zkl>scs("abcbdab","bdcaba", Sink(String)).println();</lang>

Output:
abdcabdab