Shortest common supersequence

From Rosetta Code
Shortest common supersequence is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

The   shortest common supersequence   is a problem closely related to the   longest common subsequence,   which you can use as an external function for this task.


Task

Given two strings and , find the shortest possible sequence , which is the shortest common super-sequence of and where both and are a subsequence of . Defined as such, is not necessarily unique.

Demonstrate this by printing where abcbdab” and bdcaba”.

C

The C99 code here isn't all that different from Levenstein distance calculation. <lang c>#include <stdio.h>

  1. include <string.h>

typedef struct link link_t; struct link { int len; char letter; link_t *next; };

// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough. int scs(char *x, char *y, char *out) { int lx = strlen(x), ly = strlen(y); link_t lnk[ly + 1][lx + 1];

for (int i = 0; i < ly; i++) lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};

for (int j = 0; j < lx; j++) lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};

lnk[ly][lx] = (link_t) {0};

for (int i = ly; i--; ) { for (int j = lx; j--; ) { link_t *lp = &lnk[i][j]; if (y[i] == x[j]) { lp->next = &lnk[i+1][j+1]; lp->letter = x[j]; } else if (lnk[i][j+1].len < lnk[i+1][j].len) { lp->next = &lnk[i][j+1]; lp->letter = x[j]; } else { lp->next = &lnk[i+1][j]; lp->letter = y[i]; } lp->len = lp->next->len + 1; } }

for (link_t *lp = &lnk[0][0]; lp; lp = lp->next) *out++ = lp->letter;

return 0; }

int main(void) { char x[] = "abcbdab", y[] = "bdcaba", res[128]; scs(x, y, res); printf("SCS(%s, %s) -> %s\n", x, y, res); return 0; }</lang>

Output:
SCS(abcbdab, bdcaba) -> abdcabdab

D

Translation of: Racket

<lang d>import std.stdio, std.functional, std.array, std.range;

dstring scs(in dstring x, in dstring y) nothrow @safe {

   alias mScs = memoize!scs;
   if (x.empty) return y;
   if (y.empty) return x;
   if (x.front == y.front)
       return x.front ~ mScs(x.dropOne, y.dropOne);
   if (mScs(x, y.dropOne).length <= mScs(x.dropOne, y).length)
       return y.front ~ mScs(x, y.dropOne);
   else
       return x.front ~ mScs(x.dropOne, y);

}

void main() @safe {

   scs("abcbdab", "bdcaba").writeln;

}</lang>

Output:
abdcabdab

Elixir

Translation of: Ruby
Works with: Elixir version 1.3

uses 'LCS' from here <lang elixir>defmodule SCS do

 def scs(u, v) do
   lcs = LCS.lcs(u, v) |> to_charlist
   scs(to_charlist(u), to_charlist(v), lcs, []) |> to_string
 end
 
 defp scs(u, v, [], res), do: Enum.reverse(res) ++ u ++ v
 defp scs([h|ut], [h|vt], [h|lt], res),      do: scs(ut, vt, lt, [h|res])
 defp scs([h|_]=u, [vh|vt], [h|_]=lcs, res), do: scs(u, vt, lcs, [vh|res])
 defp scs([uh|ut], v, lcs, res),             do: scs(ut, v, lcs, [uh|res])

end

u = "abcbdab" v = "bdcaba" IO.puts "SCS(#{u}, #{v}) = #{SCS.scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abdcabdab

Perl

<lang perl>sub lcs { # longest common subsequence

   my( $u, $v ) = @_;
   return  unless length($u) and length($v);
   my $longest = ;
   for my $first ( 0..length($u)-1 ) {
       my $char = substr $u, $first, 1;
       my $i = index( $v, $char );
       next if -1==$i;
       my $next = $char;
       $next .= lcs( substr( $u, $first+1), substr( $v, $i+1 ) ) unless $i==length($v)-1;
       $longest = $next if length($next) > length($longest);
   }
   return $longest;

}

sub scs { # shortest common supersequence

   my( $u, $v ) = @_;
   my @lcs = split //, lcs $u, $v;
   my $pat = "(.*)".join("(.*)",@lcs)."(.*)"; 
   my @u = $u =~ /$pat/;
   my @v = $v =~ /$pat/;
   my $scs = shift(@u).shift(@v);
   $scs .= $_.shift(@u).shift(@v) for @lcs;
   return $scs;

}

my $u = "abcbdab"; my $v = "bdcaba"; printf "Strings %s %s\n", $u, $v; printf "Longest common subsequence: %s\n", lcs $u, $v; printf "Shortest common supersquence: %s\n", scs $u, $v; </lang>

Output:
Strings abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Racket

Translation of: C

This program is based on the C implementation, but use memorization instead of dynamic programming. More explanations about the memorization part in http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html .

<lang Racket>#lang racket

(struct link (len letters))

(define (link-add li n letter)

 (link (+ n (link-len li)) 
       (cons letter (link-letters li))))

(define (memoize f)

 (local ([define table (make-hash)])
   (lambda args
     (dict-ref! table args (λ () (apply f args))))))

(define scs/list

 (memoize 
  (lambda (x y)
    (cond
      [(null? x)
       (link (length y) y)]
      [(null? y)
       (link (length x) x)]
      [(eq? (car x) (car y))
       (link-add (scs/list (cdr x) (cdr y)) 1 (car x))]
      [(<= (link-len (scs/list x (cdr y)))
           (link-len (scs/list (cdr x) y)))
       (link-add (scs/list x (cdr y)) 1 (car y))]
      [else
       (link-add (scs/list (cdr x) y) 1 (car x))]))))

(define (scs x y)

 (list->string (link-letters (scs/list (string->list x) (string->list y)))))

(scs "abcbdab" "bdcaba")</lang>

Output:
"abdcabdab"

Ruby

Translation of: Tcl

uses 'lcs' from here <lang ruby>require 'lcs'

def scs(u, v)

 lcs = lcs(u, v)
 u, v = u.dup, v.dup
 scs = ""
 # Iterate over the characters until LCS processed
 until lcs.empty?
   if u[0]==lcs[0] and v[0]==lcs[0]
     # Part of the LCS, so consume from all strings
     scs << lcs.slice!(0)
     u.slice!(0)
     v.slice!(0)
   elsif u[0]==lcs[0]
     # char of u = char of LCS, but char of LCS v doesn't so consume just that
     scs << v.slice!(0)
   else
     # char of u != char of LCS, so consume just that
     scs << u.slice!(0)
   end
 end
 # append remaining characters, which are not in common
 scs + u + v

end

u = "abcbdab" v = "bdcaba" puts "SCS(#{u}, #{v}) = #{scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abcbdcaba

Sidef

Translation of: Perl

Uses the lcs function defined here. <lang ruby>func scs(u, v) {

   var ls = lcs(u, v).chars
   var pat = Regex('(.*)'+ls.join('(.*)')+'(.*)')
   u.scan!(pat)
   v.scan!(pat)
   var ss = (u.shift + v.shift)
   ls.each { |c| ss += (c + u.shift + v.shift) }
   return ss

}

say scs("abcbdab", "bdcaba")</lang>

Output:
abdcabdab

Tcl

This example uses either of the lcs implementations from here, assumed renamed to lcs… <lang tcl>proc scs {u v} {

   set lcs [lcs $u $v]
   set scs ""
   # Iterate over the characters until LCS processed
   for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {

set uc [string index $u $ui] set vc [string index $v $vi] set lc [string index $lcs $li] if {$uc eq $lc} { if {$vc eq $lc} { # Part of the LCS, so consume from all strings append scs $lc incr ui incr li } else { # char of u = char of LCS, but char of LCS v doesn't so consume just that append scs $vc } incr vi } else { # char of u != char of LCS, so consume just that append scs $uc incr ui }

   }
   # append remaining characters, which are not in common
   append scs [string range $u $ui end] [string range $v $vi end]
   return $scs

}</lang> Demonstrating: <lang tcl>set u "abcbdab" set v "bdcaba" puts "SCS($u,$v) = [scs $u $v]"</lang>

Output:
SCS(abcbdab,bdcaba) = abdcabdab

zkl

Translation of: C

<lang zkl>class Link{ var len,letter,next;

  fcn init(l=0,c="",lnk=Void){ len,letter,next=l,c,lnk; }

} fcn scs(x,y,out){

  lx,ly:=x.len(),y.len();
  lnk:=(ly+1).pump(List,'wrap(_){ (lx+1).pump(List(),Link.create) });
  foreach i in (ly){ lnk[i][lx]=Link(ly-i, y[i]) }
  foreach j in (lx){ lnk[ly][j]=Link(lx-j, x[j]) }

  foreach i,j in ([ly-1..0,-1],[lx-1..0,-1]){
     lp:=lnk[i][j];
     if (y[i]==x[j]){

lp.next =lnk[i+1][j+1]; lp.letter=x[j];

     }else if(lnk[i][j+1].len < lnk[i+1][j].len){

lp.next =lnk[i][j+1]; lp.letter=x[j];

     }else{

lp.next =lnk[i+1][j]; lp.letter=y[i];

     }
     lp.len=lp.next.len + 1;
  }
  lp:=lnk[0][0]; while(lp){ out.write(lp.letter); lp=lp.next; }
  out.close()

}</lang> <lang zkl>scs("abcbdab","bdcaba", Sink(String)).println();</lang>

Output:
abdcabdab