Shortest common supersequence

From Rosetta Code
Task
Shortest common supersequence
You are encouraged to solve this task according to the task description, using any language you may know.

The   shortest common supersequence   is a problem closely related to the   longest common subsequence,   which you can use as an external function for this task.


Task

Given two strings and , find the shortest possible sequence , which is the shortest common super-sequence of and where both and are a subsequence of . Defined as such, is not necessarily unique.

Demonstrate this by printing where abcbdab” and bdcaba”.


Also see



11l

Translation of: C++

<lang 11l>F scs(String x, y)

  I x.empty
     R y
  I y.empty
     R x
  I x[0] == y[0]
     R x[0]‘’scs(x[1..], y[1..])
  I scs(x, y[1..]).len <= scs(x[1..], y).len
     R y[0]‘’scs(x, y[1..])
  E
     R x[0]‘’scs(x[1..], y)

print(scs(‘abcbdab’, ‘bdcaba’))</lang>

Output:
abdcabdab

C

The C99 code here isn't all that different from Levenstein distance calculation. <lang c>#include <stdio.h>

  1. include <string.h>

typedef struct link link_t; struct link { int len; char letter; link_t *next; };

// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough. int scs(char *x, char *y, char *out) { int lx = strlen(x), ly = strlen(y); link_t lnk[ly + 1][lx + 1];

for (int i = 0; i < ly; i++) lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};

for (int j = 0; j < lx; j++) lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};

lnk[ly][lx] = (link_t) {0};

for (int i = ly; i--; ) { for (int j = lx; j--; ) { link_t *lp = &lnk[i][j]; if (y[i] == x[j]) { lp->next = &lnk[i+1][j+1]; lp->letter = x[j]; } else if (lnk[i][j+1].len < lnk[i+1][j].len) { lp->next = &lnk[i][j+1]; lp->letter = x[j]; } else { lp->next = &lnk[i+1][j]; lp->letter = y[i]; } lp->len = lp->next->len + 1; } }

for (link_t *lp = &lnk[0][0]; lp; lp = lp->next) *out++ = lp->letter;

return 0; }

int main(void) { char x[] = "abcbdab", y[] = "bdcaba", res[128]; scs(x, y, res); printf("SCS(%s, %s) -> %s\n", x, y, res); return 0; }</lang>

Output:
SCS(abcbdab, bdcaba) -> abdcabdab

C++

Translation of: Java

<lang cpp>#include <iostream>

std::string scs(std::string x, std::string y) {

   if (x.empty()) {
       return y;
   }
   if (y.empty()) {
       return x;
   }
   if (x[0] == y[0]) {
       return x[0] + scs(x.substr(1), y.substr(1));
   }
   if (scs(x, y.substr(1)).size() <= scs(x.substr(1), y).size()) {
       return y[0] + scs(x, y.substr(1));
   } else {
       return x[0] + scs(x.substr(1), y);
   }

}

int main() {

   auto res = scs("abcbdab", "bdcaba");
   std::cout << res << '\n';
   return 0;

}</lang>

Output:
abdcabdab

D

Translation of: Racket

<lang d>import std.stdio, std.functional, std.array, std.range;

dstring scs(in dstring x, in dstring y) nothrow @safe {

   alias mScs = memoize!scs;
   if (x.empty) return y;
   if (y.empty) return x;
   if (x.front == y.front)
       return x.front ~ mScs(x.dropOne, y.dropOne);
   if (mScs(x, y.dropOne).length <= mScs(x.dropOne, y).length)
       return y.front ~ mScs(x, y.dropOne);
   else
       return x.front ~ mScs(x.dropOne, y);

}

void main() @safe {

   scs("abcbdab", "bdcaba").writeln;

}</lang>

Output:
abdcabdab

Elixir

Translation of: Ruby
Works with: Elixir version 1.3

uses 'LCS' from here <lang elixir>defmodule SCS do

 def scs(u, v) do
   lcs = LCS.lcs(u, v) |> to_charlist
   scs(to_charlist(u), to_charlist(v), lcs, []) |> to_string
 end
 
 defp scs(u, v, [], res), do: Enum.reverse(res) ++ u ++ v
 defp scs([h|ut], [h|vt], [h|lt], res),      do: scs(ut, vt, lt, [h|res])
 defp scs([h|_]=u, [vh|vt], [h|_]=lcs, res), do: scs(u, vt, lcs, [vh|res])
 defp scs([uh|ut], v, lcs, res),             do: scs(ut, v, lcs, [uh|res])

end

u = "abcbdab" v = "bdcaba" IO.puts "SCS(#{u}, #{v}) = #{SCS.scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abdcabdab

Factor

Translation of: Julia

<lang factor>USING: combinators io kernel locals math memoize sequences ;

MEMO:: scs ( x y -- seq )

   {
       { [ x empty? ] [ y ] }
       { [ y empty? ] [ x ] }
       { [ x first y first = ]
         [ x rest y rest scs x first prefix ] }
       { [ x y rest scs length x rest y scs length <= ]
         [ x y rest scs y first prefix ] }
       [ x rest y scs x first prefix ]
   } cond ;

"abcbdab" "bdcaba" scs print</lang>

Output:
abdcabdab

Go

Translation of: Kotlin

<lang go>package main

import (

   "fmt"
   "strings"

)

func lcs(x, y string) string {

   xl, yl := len(x), len(y)
   if xl == 0 || yl == 0 {
       return ""
   }
   x1, y1 := x[:xl-1], y[:yl-1]
   if x[xl-1] == y[yl-1] {
       return fmt.Sprintf("%s%c", lcs(x1, y1), x[xl-1])
   }
   x2, y2 := lcs(x, y1), lcs(x1, y)
   if len(x2) > len(y2) {
       return x2
   } else {
       return y2
   }

}

func scs(u, v string) string {

   ul, vl := len(u), len(v)
   lcs := lcs(u, v)
   ui, vi := 0, 0
   var sb strings.Builder
   for i := 0; i < len(lcs); i++ {
       for ui < ul && u[ui] != lcs[i] {
           sb.WriteByte(u[ui])
           ui++
       }
       for vi < vl && v[vi] != lcs[i] {
           sb.WriteByte(v[vi])
           vi++
       }
       sb.WriteByte(lcs[i])
       ui++
       vi++
   }
   if ui < ul {
       sb.WriteString(u[ui:])
   }
   if vi < vl {
       sb.WriteString(v[vi:])
   }
   return sb.String()

}

func main() {

   u := "abcbdab"
   v := "bdcaba"
   fmt.Println(scs(u, v))

}</lang>

Output:
abdcabdab

Java

Translation of: D

<lang java>public class ShortestCommonSuperSequence {

   private static boolean isEmpty(String s) {
       return null == s || s.isEmpty();
   }
   private static String scs(String x, String y) {
       if (isEmpty(x)) {
           return y;
       }
       if (isEmpty(y)) {
           return x;
       }
       if (x.charAt(0) == y.charAt(0)) {
           return x.charAt(0) + scs(x.substring(1), y.substring(1));
       }
       if (scs(x, y.substring(1)).length() <= scs(x.substring(1), y).length()) {
           return y.charAt(0) + scs(x, y.substring(1));
       } else {
           return x.charAt(0) + scs(x.substring(1), y);
       }
   }
   public static void main(String[] args) {
       System.out.println(scs("abcbdab", "bdcaba"));
   }

}</lang>

Output:
abdcabdab

Julia

Translation of: D

<lang Julia>using Memoize

@memoize function scs(x, y)

   if x == ""
       return y
   elseif y == ""
       return x
   elseif x[1] == y[1]
       return "$(x[1])$(scs(x[2:end], y[2:end]))"
   elseif length(scs(x, y[2:end])) <= length(scs(x[2:end], y))
       return "$(y[1])$(scs(x, y[2:end]))"
   else
       return "$(x[1])$(scs(x[2:end], y))"
   end

end

println(scs("abcbdab", "bdcaba")) </lang>

Output:
abdcabdab

Kotlin

Uses 'lcs' function from Longest common subsequence#Kotlin: <lang scala>// version 1.1.2

fun lcs(x: String, y: String): String {

   if (x.length == 0 || y.length == 0) return ""
   val x1 = x.dropLast(1)  
   val y1 = y.dropLast(1)
   if (x.last() == y.last()) return lcs(x1, y1) + x.last()
   val x2 = lcs(x, y1)
   val y2 = lcs(x1, y)
   return if (x2.length > y2.length) x2 else y2

}

fun scs(u: String, v: String): String{

   val lcs = lcs(u, v)
   var ui = 0
   var vi = 0
   val sb = StringBuilder()
   for (i in 0 until lcs.length) {
       while (ui < u.length && u[ui] != lcs[i]) sb.append(u[ui++])       
       while (vi < v.length && v[vi] != lcs[i]) sb.append(v[vi++])
       sb.append(lcs[i])
       ui++; vi++
   }
   if (ui < u.length) sb.append(u.substring(ui))
   if (vi < v.length) sb.append(v.substring(vi))
   return sb.toString()

}

fun main(args: Array<String>) {

   val u = "abcbdab"
   val v = "bdcaba"  
   println(scs(u, v))

}</lang>

Output:
abdcabdab

Nim

Translation of: Kotlin

<lang Nim>proc lcs(x, y: string): string =

 if x.len == 0 or y.len == 0: return
 let x1 = x[0..^2]
 let y1 = y[0..^2]
 if x[^1] == y[^1]: return lcs(x1, y1) & x[^1]
 let x2 = lcs(x, y1)
 let y2 = lcs(x1, y)
 result = if x2.len > y2.len: x2 else: y2

proc scs(u, v: string): string =

 let lcs = lcs(u, v)
 var ui, vi = 0
 for ch in lcs:
   while ui < u.len and u[ui] != ch:
     result.add u[ui]
     inc ui
   while vi < v.len and v[vi] != ch:
     result.add v[vi]
     inc vi
   result.add ch
   inc ui
   inc vi
 if ui < u.len: result.add u.substr(ui)
 if vi < v.len: result.add v.substr(vi)

when isMainModule:

 let u = "abcbdab"
 let v = "bdcaba"
 echo scs(u, v)</lang>
Output:
abdcabdab

Perl

<lang perl>sub lcs { # longest common subsequence

   my( $u, $v ) = @_;
   return  unless length($u) and length($v);
   my $longest = ;
   for my $first ( 0..length($u)-1 ) {
       my $char = substr $u, $first, 1;
       my $i = index( $v, $char );
       next if -1==$i;
       my $next = $char;
       $next .= lcs( substr( $u, $first+1), substr( $v, $i+1 ) ) unless $i==length($v)-1;
       $longest = $next if length($next) > length($longest);
   }
   return $longest;

}

sub scs { # shortest common supersequence

   my( $u, $v ) = @_;
   my @lcs = split //, lcs $u, $v;
   my $pat = "(.*)".join("(.*)",@lcs)."(.*)"; 
   my @u = $u =~ /$pat/;
   my @v = $v =~ /$pat/;
   my $scs = shift(@u).shift(@v);
   $scs .= $_.shift(@u).shift(@v) for @lcs;
   return $scs;

}

my $u = "abcbdab"; my $v = "bdcaba"; printf "Strings %s %s\n", $u, $v; printf "Longest common subsequence: %s\n", lcs $u, $v; printf "Shortest common supersquence: %s\n", scs $u, $v; </lang>

Output:
Strings abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Phix

Translation of: Python

<lang Phix>function longest_common_subsequence(sequence a, b) sequence res = ""

   if length(a) and length(b) then
       if a[$]=b[$] then
           res = longest_common_subsequence(a[1..-2],b[1..-2])&a[$]
       else
           sequence l = longest_common_subsequence(a,b[1..-2]),
                    r = longest_common_subsequence(a[1..-2],b)
           res = iff(length(l)>length(r)?l:r)
       end if
   end if
   return res

end function

function shortest_common_supersequence(string a, b)

   string lcs = longest_common_subsequence(a, b),
          scs = ""
   -- Consume lcs
   while length(lcs) do
       integer c = lcs[1]
       if a[1]==c and b[1]==c then
           -- Part of the lcs, so consume from all strings
           scs &= c
           lcs = lcs[2..$]
           a = a[2..$]
           b = b[2..$]
       elsif a[1]==c then
           scs &= b[1]
           b = b[2..$]
       else
           scs &= a[1]
           a = a[2..$]
       end if
   end while
   -- append remaining characters
   return scs & a & b

end function

?shortest_common_supersequence("abcbdab", "bdcaba") ?shortest_common_supersequence("WEASELS", "WARDANCE")</lang>

Output:
"abdcabdab"
"WEASRDANCELS"

Python

<lang Python>

  1. Use the Longest Common Subsequence algorithm

def shortest_common_supersequence(a, b):

   lcs = longest_common_subsequence(a, b)
   scs = ""
   # Consume lcs
   while len(lcs) > 0:
       if a[0]==lcs[0] and b[0]==lcs[0]:
       # Part of the LCS, so consume from all strings
           scs += lcs[0]
           lcs = lcs[1:]
           a = a[1:]
           b = b[1:]
       elif a[0]==lcs[0]:
           scs += b[0]
           b = b[1:]
       else:
           scs += a[0]
           a = a[1:]
   # append remaining characters
   return scs + a + b

</lang>

Output:
Seq1: WEASELS
Seq2: WARDANCE
SCS:  WEASRDANCELS

Racket

Translation of: C

This program is based on the C implementation, but use memorization instead of dynamic programming. More explanations about the memorization part in http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html .

<lang Racket>#lang racket

(struct link (len letters))

(define (link-add li n letter)

 (link (+ n (link-len li)) 
       (cons letter (link-letters li))))

(define (memoize f)

 (local ([define table (make-hash)])
   (lambda args
     (dict-ref! table args (λ () (apply f args))))))

(define scs/list

 (memoize 
  (lambda (x y)
    (cond
      [(null? x)
       (link (length y) y)]
      [(null? y)
       (link (length x) x)]
      [(eq? (car x) (car y))
       (link-add (scs/list (cdr x) (cdr y)) 1 (car x))]
      [(<= (link-len (scs/list x (cdr y)))
           (link-len (scs/list (cdr x) y)))
       (link-add (scs/list x (cdr y)) 1 (car y))]
      [else
       (link-add (scs/list (cdr x) y) 1 (car x))]))))

(define (scs x y)

 (list->string (link-letters (scs/list (string->list x) (string->list y)))))

(scs "abcbdab" "bdcaba")</lang>

Output:
"abdcabdab"

Raku

(formerly Perl 6)

Using 'lcs' routine from Longest common subsequence task <lang perl6>sub lcs(Str $xstr, Str $ystr) { # longest common subsequence

   return "" unless $xstr && $ystr;
   my ($x, $xs, $y, $ys) = $xstr.substr(0, 1), $xstr.substr(1), $ystr.substr(0, 1), $ystr.substr(1);
   return $x eq $y
       ?? $x ~ lcs($xs, $ys)
       !! max(:by{ $^a.chars }, lcs($xstr, $ys), lcs($xs, $ystr) );

}

sub scs ($u, $v) { # shortest common supersequence

   my @lcs = (lcs $u, $v).comb;
   my $pat = '(.*)' ~ join('(.*)',@lcs) ~ '(.*)';
   my $regex = "rx/$pat/".EVAL;
   my @u = ($u ~~ $regex).list;
   my @v = ($v ~~ $regex).list;
   my $scs = shift(@u) ~ shift(@v);
   $scs ~= $_ ~ shift(@u) ~ shift(@v) for @lcs;
   return $scs;

}

my $u = 'abcbdab'; my $v = 'bdcaba'; printf "Strings: %s %s\n", $u, $v; printf "Longest common subsequence: %s\n", lcs $u, $v; printf "Shortest common supersquence: %s\n", scs $u, $v;</lang>

Output:
Strings: abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

REXX

Translation of: RING

<lang rexx>/*REXX program finds the Shortest common supersequence (SCS) of two character strings.*/ parse arg u v . /*obtain optional arguments from the CL*/ if u== | u=="," then u= 'abcbdab' /*Not specified? Then use the default.*/ if v== | v=="," then v= 'bdcaba' /* " " " " " " */ say ' string u=' u /*echo the value of string U to term.*/ say ' string v=' v /* " " " " " V " " */ $= u /*define initial value for the output. */

     do n=1    to length(u)                     /*process the whole length of string U.*/
       do m=n  to length(v) - 1                 /*   "    right─ish  part   "    "   V.*/
       p= pos( substr(v, m, 1), $)              /*position of mTH  V  char in $ string.*/
       _= substr(v, m+1, 1)                     /*obtain a single character of string V*/
       if p\==0  &  _\==substr($, p+1, 1)  then $= insert(_, $, p)
       end   /*m*/                              /* [↑]  insert _ in $ after position P.*/
     end     /*n*/

say say 'shortest common supersequence=' $ /*stick a fork in it, we're all done. */</lang>

output   when using the default inputs:
                     string u= abcbdab
                     string v= bdcaba

shortest common supersequence= abdcabdab
output   when using the inputs values:     ab   ac
                     string u= ab
                     string v= ac

shortest common supersequence= acb
output   when using the inputs values:     ac   ab
                     string u= ac
                     string v= ab

shortest common supersequence= abc 

Ring

<lang ring>

  1. Project : Shortest common supersequence

str1 = "a b c b d a b" str2 = "bdcaba" str3 = str2list(substr(str1, " ", nl)) for n = 1 to len(str3)

    for m = n to len(str2)-1
         pos = find(str3, str2[m])
         if pos > 0 and str2[m+1] != str3[pos+1]
            insert(str3, pos, str2[m+1])
         ok
    next

next showarray(str3)

func showarray(vect)

      svect = ""
      for n = 1 to len(vect)
            svect = svect + vect[n]
      next
      see svect

</lang> Output:

Shortest common supersequence: abdcabdab

Ruby

Translation of: Tcl

uses 'lcs' from here <lang ruby>require 'lcs'

def scs(u, v)

 lcs = lcs(u, v)
 u, v = u.dup, v.dup
 scs = ""
 # Iterate over the characters until LCS processed
 until lcs.empty?
   if u[0]==lcs[0] and v[0]==lcs[0]
     # Part of the LCS, so consume from all strings
     scs << lcs.slice!(0)
     u.slice!(0)
     v.slice!(0)
   elsif u[0]==lcs[0]
     # char of u = char of LCS, but char of LCS v doesn't so consume just that
     scs << v.slice!(0)
   else
     # char of u != char of LCS, so consume just that
     scs << u.slice!(0)
   end
 end
 # append remaining characters, which are not in common
 scs + u + v

end

u = "abcbdab" v = "bdcaba" puts "SCS(#{u}, #{v}) = #{scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abcbdcaba

Sidef

Translation of: Perl

Uses the lcs function defined here. <lang ruby>func scs(u, v) {

   var ls = lcs(u, v).chars
   var pat = Regex('(.*)'+ls.join('(.*)')+'(.*)')
   u.scan!(pat)
   v.scan!(pat)
   var ss = (u.shift + v.shift)
   ls.each { |c| ss += (c + u.shift + v.shift) }
   return ss

}

say scs("abcbdab", "bdcaba")</lang>

Output:
abdcabdab

Tcl

This example uses either of the lcs implementations from here, assumed renamed to lcs… <lang tcl>proc scs {u v} {

   set lcs [lcs $u $v]
   set scs ""
   # Iterate over the characters until LCS processed
   for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {

set uc [string index $u $ui] set vc [string index $v $vi] set lc [string index $lcs $li] if {$uc eq $lc} { if {$vc eq $lc} { # Part of the LCS, so consume from all strings append scs $lc incr ui incr li } else { # char of u = char of LCS, but char of LCS v doesn't so consume just that append scs $vc } incr vi } else { # char of u != char of LCS, so consume just that append scs $uc incr ui }

   }
   # append remaining characters, which are not in common
   append scs [string range $u $ui end] [string range $v $vi end]
   return $scs

}</lang> Demonstrating: <lang tcl>set u "abcbdab" set v "bdcaba" puts "SCS($u,$v) = [scs $u $v]"</lang>

Output:
SCS(abcbdab,bdcaba) = abdcabdab

Wren

Translation of: Kotlin

<lang ecmascript>var lcs // recursive lcs = Fn.new { |x, y|

   if (x.count == 0 || y.count == 0) return ""
   var x1 = x[0...-1]
   var y1 = y[0...-1]
   if (x[-1] == y[-1]) return lcs.call(x1, y1) + x[-1]
   var x2 = lcs.call(x, y1)
   var y2 = lcs.call(x1, y)
   return (x2.count > y2.count) ? x2 : y2

}

var scs = Fn.new { |u, v|

   var lcs = lcs.call(u, v)
   var ui = 0
   var vi = 0
   var sb = ""
   for (i in 0...lcs.count) {
       while (ui < u.count && u[ui] != lcs[i]) {
           sb = sb + u[ui]
           ui = ui + 1
       }
       while (vi < v.count && v[vi] != lcs[i]) {
           sb = sb + v[vi]
           vi = vi + 1
       }
       sb = sb + lcs[i]
       ui = ui + 1
       vi = vi + 1
   }
   if (ui < u.count) sb = sb + u[ui..-1]
   if (vi < v.count) sb = sb + v[vi..-1]
   return sb

}

var u = "abcbdab" var v = "bdcaba" System.print(scs.call(u, v))</lang>

Output:
abdcabdab

zkl

Translation of: C

<lang zkl>class Link{ var len,letter,next;

  fcn init(l=0,c="",lnk=Void){ len,letter,next=l,c,lnk; }

} fcn scs(x,y,out){

  lx,ly:=x.len(),y.len();
  lnk:=(ly+1).pump(List,'wrap(_){ (lx+1).pump(List(),Link.create) });
  foreach i in (ly){ lnk[i][lx]=Link(ly-i, y[i]) }
  foreach j in (lx){ lnk[ly][j]=Link(lx-j, x[j]) }

  foreach i,j in ([ly-1..0,-1],[lx-1..0,-1]){
     lp:=lnk[i][j];
     if (y[i]==x[j]){

lp.next =lnk[i+1][j+1]; lp.letter=x[j];

     }else if(lnk[i][j+1].len < lnk[i+1][j].len){

lp.next =lnk[i][j+1]; lp.letter=x[j];

     }else{

lp.next =lnk[i+1][j]; lp.letter=y[i];

     }
     lp.len=lp.next.len + 1;
  }
  lp:=lnk[0][0]; while(lp){ out.write(lp.letter); lp=lp.next; }
  out.close()

}</lang> <lang zkl>scs("abcbdab","bdcaba", Sink(String)).println();</lang>

Output:
abdcabdab