Shortest common supersequence

From Rosetta Code
Task
Shortest common supersequence
You are encouraged to solve this task according to the task description, using any language you may know.

The   shortest common supersequence   is a problem closely related to the   longest common subsequence,   which you can use as an external function for this task.


Task

Given two strings and , find the shortest possible sequence , which is the shortest common super-sequence of and where both and are a subsequence of . Defined as such, is not necessarily unique.

Demonstrate this by printing where abcbdab” and bdcaba”.


Also see



C[edit]

The C99 code here isn't all that different from Levenstein distance calculation.

#include <stdio.h>
#include <string.h>
 
typedef struct link link_t;
struct link {
int len;
char letter;
link_t *next;
};
 
// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough.
int scs(char *x, char *y, char *out)
{
int lx = strlen(x), ly = strlen(y);
link_t lnk[ly + 1][lx + 1];
 
for (int i = 0; i < ly; i++)
lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};
 
for (int j = 0; j < lx; j++)
lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};
 
lnk[ly][lx] = (link_t) {0};
 
for (int i = ly; i--; ) {
for (int j = lx; j--; ) {
link_t *lp = &lnk[i][j];
if (y[i] == x[j]) {
lp->next = &lnk[i+1][j+1];
lp->letter = x[j];
} else if (lnk[i][j+1].len < lnk[i+1][j].len) {
lp->next = &lnk[i][j+1];
lp->letter = x[j];
} else {
lp->next = &lnk[i+1][j];
lp->letter = y[i];
}
lp->len = lp->next->len + 1;
}
}
 
for (link_t *lp = &lnk[0][0]; lp; lp = lp->next)
*out++ = lp->letter;
 
return 0;
}
 
int main(void)
{
char x[] = "abcbdab", y[] = "bdcaba", res[128];
scs(x, y, res);
printf("SCS(%s, %s) -> %s\n", x, y, res);
return 0;
}
Output:
SCS(abcbdab, bdcaba) -> abdcabdab

D[edit]

Translation of: Racket
import std.stdio, std.functional, std.array, std.range;
 
dstring scs(in dstring x, in dstring y) nothrow @safe {
alias mScs = memoize!scs;
if (x.empty) return y;
if (y.empty) return x;
if (x.front == y.front)
return x.front ~ mScs(x.dropOne, y.dropOne);
if (mScs(x, y.dropOne).length <= mScs(x.dropOne, y).length)
return y.front ~ mScs(x, y.dropOne);
else
return x.front ~ mScs(x.dropOne, y);
}
 
void main() @safe {
scs("abcbdab", "bdcaba").writeln;
}
Output:
abdcabdab

Go[edit]

Translation of: Kotlin
package main
 
import (
"fmt"
"strings"
)
 
func lcs(x, y string) string {
xl, yl := len(x), len(y)
if xl == 0 || yl == 0 {
return ""
}
x1, y1 := x[:xl-1], y[:yl-1]
if x[xl-1] == y[yl-1] {
return fmt.Sprintf("%s%c", lcs(x1, y1), x[xl-1])
}
x2, y2 := lcs(x, y1), lcs(x1, y)
if len(x2) > len(y2) {
return x2
} else {
return y2
}
}
 
func scs(u, v string) string {
ul, vl := len(u), len(v)
lcs := lcs(u, v)
ui, vi := 0, 0
var sb strings.Builder
for i := 0; i < len(lcs); i++ {
for ui < ul && u[ui] != lcs[i] {
sb.WriteByte(u[ui])
ui++
}
for vi < vl && v[vi] != lcs[i] {
sb.WriteByte(v[vi])
vi++
}
sb.WriteByte(lcs[i])
ui++
vi++
}
if ui < ul {
sb.WriteString(u[ui:])
}
if vi < vl {
sb.WriteString(v[vi:])
}
return sb.String()
}
 
func main() {
u := "abcbdab"
v := "bdcaba"
fmt.Println(scs(u, v))
}
Output:
abdcabdab

Elixir[edit]

Translation of: Ruby
Works with: Elixir version 1.3

uses 'LCS' from here

defmodule SCS do
def scs(u, v) do
lcs = LCS.lcs(u, v) |> to_charlist
scs(to_charlist(u), to_charlist(v), lcs, []) |> to_string
end
 
defp scs(u, v, [], res), do: Enum.reverse(res) ++ u ++ v
defp scs([h|ut], [h|vt], [h|lt], res), do: scs(ut, vt, lt, [h|res])
defp scs([h|_]=u, [vh|vt], [h|_]=lcs, res), do: scs(u, vt, lcs, [vh|res])
defp scs([uh|ut], v, lcs, res), do: scs(ut, v, lcs, [uh|res])
end
 
u = "abcbdab"
v = "bdcaba"
IO.puts "SCS(#{u}, #{v}) = #{SCS.scs(u, v)}"
Output:
SCS(abcbdab, bdcaba) = abdcabdab

Kotlin[edit]

Uses 'lcs' function from Longest common subsequence#Kotlin:

// version 1.1.2
 
fun lcs(x: String, y: String): String {
if (x.length == 0 || y.length == 0) return ""
val x1 = x.dropLast(1)
val y1 = y.dropLast(1)
if (x.last() == y.last()) return lcs(x1, y1) + x.last()
val x2 = lcs(x, y1)
val y2 = lcs(x1, y)
return if (x2.length > y2.length) x2 else y2
}
 
fun scs(u: String, v: String): String{
val lcs = lcs(u, v)
var ui = 0
var vi = 0
val sb = StringBuilder()
for (i in 0 until lcs.length) {
while (ui < u.length && u[ui] != lcs[i]) sb.append(u[ui++])
while (vi < v.length && v[vi] != lcs[i]) sb.append(v[vi++])
sb.append(lcs[i])
ui++; vi++
}
if (ui < u.length) sb.append(u.substring(ui))
if (vi < v.length) sb.append(v.substring(vi))
return sb.toString()
}
 
fun main(args: Array<String>) {
val u = "abcbdab"
val v = "bdcaba"
println(scs(u, v))
}
Output:
abdcabdab

Perl[edit]

sub lcs { # longest common subsequence
my( $u, $v ) = @_;
return '' unless length($u) and length($v);
my $longest = '';
for my $first ( 0..length($u)-1 ) {
my $char = substr $u, $first, 1;
my $i = index( $v, $char );
next if -1==$i;
my $next = $char;
$next .= lcs( substr( $u, $first+1), substr( $v, $i+1 ) ) unless $i==length($v)-1;
$longest = $next if length($next) > length($longest);
}
return $longest;
}
 
sub scs { # shortest common supersequence
my( $u, $v ) = @_;
my @lcs = split //, lcs $u, $v;
my $pat = "(.*)".join("(.*)",@lcs)."(.*)";
my @u = $u =~ /$pat/;
my @v = $v =~ /$pat/;
my $scs = shift(@u).shift(@v);
$scs .= $_.shift(@u).shift(@v) for @lcs;
return $scs;
}
 
my $u = "abcbdab";
my $v = "bdcaba";
printf "Strings %s %s\n", $u, $v;
printf "Longest common subsequence:  %s\n", lcs $u, $v;
printf "Shortest common supersquence: %s\n", scs $u, $v;
 
Output:
Strings abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Perl 6[edit]

Using 'lcs' routine from Longest common subsequence task

sub lcs(Str $xstr, Str $ystr) { # longest common subsequence
return "" unless $xstr && $ystr;
my ($x, $xs, $y, $ys) = $xstr.substr(0, 1), $xstr.substr(1), $ystr.substr(0, 1), $ystr.substr(1);
return $x eq $y
?? $x ~ lcs($xs, $ys)
!! max(:by{ $^a.chars }, lcs($xstr, $ys), lcs($xs, $ystr) );
}
 
sub scs ($u, $v) { # shortest common supersequence
my @lcs = (lcs $u, $v).comb;
my $pat = '(.*)' ~ join('(.*)',@lcs) ~ '(.*)';
my $regex = "rx/$pat/".EVAL;
my @u = ($u ~~ $regex).list;
my @v = ($v ~~ $regex).list;
my $scs = shift(@u) ~ shift(@v);
$scs ~= $_ ~ shift(@u) ~ shift(@v) for @lcs;
return $scs;
}
 
my $u = 'abcbdab';
my $v = 'bdcaba';
printf "Strings: %s %s\n", $u, $v;
printf "Longest common subsequence:  %s\n", lcs $u, $v;
printf "Shortest common supersquence: %s\n", scs $u, $v;
Output:
Strings: abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Phix[edit]

Translation of: Python
function longest_common_subsequence(sequence a, b)
sequence res = ""
if length(a) and length(b) then
if a[$]=b[$] then
res = longest_common_subsequence(a[1..-2],b[1..-2])&a[$]
else
sequence l = longest_common_subsequence(a,b[1..-2]),
r = longest_common_subsequence(a[1..-2],b)
res = iff(length(l)>length(r)?l:r)
end if
end if
return res
end function
 
function shortest_common_supersequence(string a, b)
string lcs = longest_common_subsequence(a, b),
scs = ""
-- Consume lcs
while length(lcs) do
integer c = lcs[1]
if a[1]==c and b[1]==c then
-- Part of the lcs, so consume from all strings
scs &= c
lcs = lcs[2..$]
a = a[2..$]
b = b[2..$]
elsif a[1]==c then
scs &= b[1]
b = b[2..$]
else
scs &= a[1]
a = a[2..$]
end if
end while
-- append remaining characters
return scs & a & b
end function
 
?shortest_common_supersequence("abcbdab", "bdcaba")
?shortest_common_supersequence("WEASELS", "WARDANCE")
Output:
"abdcabdab"
"WEASRDANCELS"

Python[edit]

 
# Use the Longest Common Subsequence algorithm
 
def shortest_common_supersequence(a, b):
lcs = longest_common_subsequence(a, b)
scs = ""
# Consume lcs
while len(lcs) > 0:
if a[0]==lcs[0] and b[0]==lcs[0]:
# Part of the LCS, so consume from all strings
scs += lcs[0]
lcs = lcs[1:]
a = a[1:]
b = b[1:]
elif a[0]==lcs[0]:
scs += b[0]
b = b[1:]
else:
scs += a[0]
a = a[1:]
# append remaining characters
return scs + a + b
 
Output:
Seq1: WEASELS
Seq2: WARDANCE
SCS:  WEASRDANCELS

Racket[edit]

Translation of: C
This program is based on the C implementation, but use memorization instead of dynamic programming. More explanations about the memorization part in http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html .
#lang racket
 
(struct link (len letters))
 
(define (link-add li n letter)
(link (+ n (link-len li))
(cons letter (link-letters li))))
 
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
 
(define scs/list
(memoize
(lambda (x y)
(cond
[(null? x)
(link (length y) y)]
[(null? y)
(link (length x) x)]
[(eq? (car x) (car y))
(link-add (scs/list (cdr x) (cdr y)) 1 (car x))]
[(<= (link-len (scs/list x (cdr y)))
(link-len (scs/list (cdr x) y)))
(link-add (scs/list x (cdr y)) 1 (car y))]
[else
(link-add (scs/list (cdr x) y) 1 (car x))]))))
 
(define (scs x y)
(list->string (link-letters (scs/list (string->list x) (string->list y)))))
 
(scs "abcbdab" "bdcaba")
Output:
"abdcabdab"

REXX[edit]

Translation of: RING
/*REXX program finds the  Shortest common supersequence (SCS)  of two character strings.*/
parse arg u v . /*obtain optional arguments from the CL*/
if u=='' | u=="," then u= 'abcbdab' /*Not specified? Then use the default.*/
if v=='' | v=="," then v= 'bdcaba' /* " " " " " " */
say ' string u=' u /*echo the value of string U to term.*/
say ' string v=' v /* " " " " " V " " */
$= u /*define initial value for the output. */
do n=1 to length(u) /*process the whole length of string U.*/
do m=n to length(v) - 1 /* " right─ish part " " V.*/
p= pos( substr(v, m, 1), $) /*position of mTH V char in $ string.*/
_= substr(v, m+1, 1) /*obtain a single character of string V*/
if p\==0 & _\==substr($, p+1, 1) then $= insert(_, $, p)
end /*m*/ /* [↑] insert _ in $ after position P.*/
end /*n*/
say
say 'shortest common supersequence=' $ /*stick a fork in it, we're all done. */
output   when using the default inputs:
                     string u= abcbdab
                     string v= bdcaba

shortest common supersequence= abdcabdab
output   when using the inputs values:     ab   ac
                     string u= ab
                     string v= ac

shortest common supersequence= acb
output   when using the inputs values:     ac   ab
                     string u= ac
                     string v= ab

shortest common supersequence= abc 

Ring[edit]

 
# Project : Shortest common supersequence
 
str1 = "a b c b d a b"
str2 = "bdcaba"
str3 = str2list(substr(str1, " ", nl))
for n = 1 to len(str3)
for m = n to len(str2)-1
pos = find(str3, str2[m])
if pos > 0 and str2[m+1] != str3[pos+1]
insert(str3, pos, str2[m+1])
ok
next
next
showarray(str3)
 
func showarray(vect)
svect = ""
for n = 1 to len(vect)
svect = svect + vect[n]
next
see svect
 

Output:

Shortest common supersequence: abdcabdab

Ruby[edit]

Translation of: Tcl

uses 'lcs' from here

require 'lcs'
 
def scs(u, v)
lcs = lcs(u, v)
u, v = u.dup, v.dup
scs = ""
# Iterate over the characters until LCS processed
until lcs.empty?
if u[0]==lcs[0] and v[0]==lcs[0]
# Part of the LCS, so consume from all strings
scs << lcs.slice!(0)
u.slice!(0)
v.slice!(0)
elsif u[0]==lcs[0]
# char of u = char of LCS, but char of LCS v doesn't so consume just that
scs << v.slice!(0)
else
# char of u != char of LCS, so consume just that
scs << u.slice!(0)
end
end
# append remaining characters, which are not in common
scs + u + v
end
 
u = "abcbdab"
v = "bdcaba"
puts "SCS(#{u}, #{v}) = #{scs(u, v)}"
Output:
SCS(abcbdab, bdcaba) = abcbdcaba

Sidef[edit]

Translation of: Perl

Uses the lcs function defined here.

func scs(u, v) {
var ls = lcs(u, v).chars
var pat = Regex('(.*)'+ls.join('(.*)')+'(.*)')
u.scan!(pat)
v.scan!(pat)
var ss = (u.shift + v.shift)
ls.each { |c| ss += (c + u.shift + v.shift) }
return ss
}
 
say scs("abcbdab", "bdcaba")
Output:
abdcabdab

Tcl[edit]

This example uses either of the lcs implementations from here, assumed renamed to lcs

proc scs {u v} {
set lcs [lcs $u $v]
set scs ""
 
# Iterate over the characters until LCS processed
for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {
set uc [string index $u $ui]
set vc [string index $v $vi]
set lc [string index $lcs $li]
if {$uc eq $lc} {
if {$vc eq $lc} {
# Part of the LCS, so consume from all strings
append scs $lc
incr ui
incr li
} else {
# char of u = char of LCS, but char of LCS v doesn't so consume just that
append scs $vc
}
incr vi
} else {
# char of u != char of LCS, so consume just that
append scs $uc
incr ui
}
}
 
# append remaining characters, which are not in common
append scs [string range $u $ui end] [string range $v $vi end]
return $scs
}

Demonstrating:

set u "abcbdab"
set v "bdcaba"
puts "SCS($u,$v) = [scs $u $v]"
Output:
SCS(abcbdab,bdcaba) = abdcabdab

zkl[edit]

Translation of: C
class Link{ var len,letter,next;
fcn init(l=0,c="",lnk=Void){ len,letter,next=l,c,lnk; }
}
fcn scs(x,y,out){
lx,ly:=x.len(),y.len();
lnk:=(ly+1).pump(List,'wrap(_){ (lx+1).pump(List(),Link.create) });
 
foreach i in (ly){ lnk[i][lx]=Link(ly-i, y[i]) }
foreach j in (lx){ lnk[ly][j]=Link(lx-j, x[j]) }
 
foreach i,j in ([ly-1..0,-1],[lx-1..0,-1]){
lp:=lnk[i][j];
if (y[i]==x[j]){
lp.next =lnk[i+1][j+1];
lp.letter=x[j];
}else if(lnk[i][j+1].len < lnk[i+1][j].len){
lp.next =lnk[i][j+1];
lp.letter=x[j];
}else{
lp.next =lnk[i+1][j];
lp.letter=y[i];
}
lp.len=lp.next.len + 1;
}
 
lp:=lnk[0][0]; while(lp){ out.write(lp.letter); lp=lp.next; }
out.close()
}
scs("abcbdab","bdcaba", Sink(String)).println();
Output:
abdcabdab