Shortest common supersequence
You are encouraged to solve this task according to the task description, using any language you may know.
The shortest common supersequence is a problem closely related to the longest common subsequence, which you can use as an external function for this task.
- Task
Given two strings and , find the shortest possible sequence , which is the shortest common super-sequence of and where both and are a subsequence of . Defined as such, is not necessarily unique.
Demonstrate this by printing where “abcbdab” and “bdcaba”.
- Also see
- The Wikipedia article: shortest common supersequence.
Contents
C[edit]
The C99 code here isn't all that different from Levenstein distance calculation.
#include <stdio.h>
#include <string.h>
typedef struct link link_t;
struct link {
int len;
char letter;
link_t *next;
};
// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough.
int scs(char *x, char *y, char *out)
{
int lx = strlen(x), ly = strlen(y);
link_t lnk[ly + 1][lx + 1];
for (int i = 0; i < ly; i++)
lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};
for (int j = 0; j < lx; j++)
lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};
lnk[ly][lx] = (link_t) {0};
for (int i = ly; i--; ) {
for (int j = lx; j--; ) {
link_t *lp = &lnk[i][j];
if (y[i] == x[j]) {
lp->next = &lnk[i+1][j+1];
lp->letter = x[j];
} else if (lnk[i][j+1].len < lnk[i+1][j].len) {
lp->next = &lnk[i][j+1];
lp->letter = x[j];
} else {
lp->next = &lnk[i+1][j];
lp->letter = y[i];
}
lp->len = lp->next->len + 1;
}
}
for (link_t *lp = &lnk[0][0]; lp; lp = lp->next)
*out++ = lp->letter;
return 0;
}
int main(void)
{
char x[] = "abcbdab", y[] = "bdcaba", res[128];
scs(x, y, res);
printf("SCS(%s, %s) -> %s\n", x, y, res);
return 0;
}
- Output:
SCS(abcbdab, bdcaba) -> abdcabdab
D[edit]
import std.stdio, std.functional, std.array, std.range;
dstring scs(in dstring x, in dstring y) nothrow @safe {
alias mScs = memoize!scs;
if (x.empty) return y;
if (y.empty) return x;
if (x.front == y.front)
return x.front ~ mScs(x.dropOne, y.dropOne);
if (mScs(x, y.dropOne).length <= mScs(x.dropOne, y).length)
return y.front ~ mScs(x, y.dropOne);
else
return x.front ~ mScs(x.dropOne, y);
}
void main() @safe {
scs("abcbdab", "bdcaba").writeln;
}
- Output:
abdcabdab
Go[edit]
package main
import (
"fmt"
"strings"
)
func lcs(x, y string) string {
xl, yl := len(x), len(y)
if xl == 0 || yl == 0 {
return ""
}
x1, y1 := x[:xl-1], y[:yl-1]
if x[xl-1] == y[yl-1] {
return fmt.Sprintf("%s%c", lcs(x1, y1), x[xl-1])
}
x2, y2 := lcs(x, y1), lcs(x1, y)
if len(x2) > len(y2) {
return x2
} else {
return y2
}
}
func scs(u, v string) string {
ul, vl := len(u), len(v)
lcs := lcs(u, v)
ui, vi := 0, 0
var sb strings.Builder
for i := 0; i < len(lcs); i++ {
for ui < ul && u[ui] != lcs[i] {
sb.WriteByte(u[ui])
ui++
}
for vi < vl && v[vi] != lcs[i] {
sb.WriteByte(v[vi])
vi++
}
sb.WriteByte(lcs[i])
ui++
vi++
}
if ui < ul {
sb.WriteString(u[ui:])
}
if vi < vl {
sb.WriteString(v[vi:])
}
return sb.String()
}
func main() {
u := "abcbdab"
v := "bdcaba"
fmt.Println(scs(u, v))
}
- Output:
abdcabdab
Elixir[edit]
uses 'LCS' from here
defmodule SCS do
def scs(u, v) do
lcs = LCS.lcs(u, v) |> to_charlist
scs(to_charlist(u), to_charlist(v), lcs, []) |> to_string
end
defp scs(u, v, [], res), do: Enum.reverse(res) ++ u ++ v
defp scs([h|ut], [h|vt], [h|lt], res), do: scs(ut, vt, lt, [h|res])
defp scs([h|_]=u, [vh|vt], [h|_]=lcs, res), do: scs(u, vt, lcs, [vh|res])
defp scs([uh|ut], v, lcs, res), do: scs(ut, v, lcs, [uh|res])
end
u = "abcbdab"
v = "bdcaba"
IO.puts "SCS(#{u}, #{v}) = #{SCS.scs(u, v)}"
- Output:
SCS(abcbdab, bdcaba) = abdcabdab
Julia[edit]
using Memoize
@memoize function scs(x, y)
if x == ""
return y
elseif y == ""
return x
elseif x[1] == y[1]
return "$(x[1])$(scs(x[2:end], y[2:end]))"
elseif length(scs(x, y[2:end])) <= length(scs(x[2:end], y))
return "$(y[1])$(scs(x, y[2:end]))"
else
return "$(x[1])$(scs(x[2:end], y))"
end
end
println(scs("abcbdab", "bdcaba"))
- Output:
abdcabdab
Kotlin[edit]
Uses 'lcs' function from Longest common subsequence#Kotlin:
// version 1.1.2
fun lcs(x: String, y: String): String {
if (x.length == 0 || y.length == 0) return ""
val x1 = x.dropLast(1)
val y1 = y.dropLast(1)
if (x.last() == y.last()) return lcs(x1, y1) + x.last()
val x2 = lcs(x, y1)
val y2 = lcs(x1, y)
return if (x2.length > y2.length) x2 else y2
}
fun scs(u: String, v: String): String{
val lcs = lcs(u, v)
var ui = 0
var vi = 0
val sb = StringBuilder()
for (i in 0 until lcs.length) {
while (ui < u.length && u[ui] != lcs[i]) sb.append(u[ui++])
while (vi < v.length && v[vi] != lcs[i]) sb.append(v[vi++])
sb.append(lcs[i])
ui++; vi++
}
if (ui < u.length) sb.append(u.substring(ui))
if (vi < v.length) sb.append(v.substring(vi))
return sb.toString()
}
fun main(args: Array<String>) {
val u = "abcbdab"
val v = "bdcaba"
println(scs(u, v))
}
- Output:
abdcabdab
Perl[edit]
sub lcs { # longest common subsequence
my( $u, $v ) = @_;
return '' unless length($u) and length($v);
my $longest = '';
for my $first ( 0..length($u)-1 ) {
my $char = substr $u, $first, 1;
my $i = index( $v, $char );
next if -1==$i;
my $next = $char;
$next .= lcs( substr( $u, $first+1), substr( $v, $i+1 ) ) unless $i==length($v)-1;
$longest = $next if length($next) > length($longest);
}
return $longest;
}
sub scs { # shortest common supersequence
my( $u, $v ) = @_;
my @lcs = split //, lcs $u, $v;
my $pat = "(.*)".join("(.*)",@lcs)."(.*)";
my @u = $u =~ /$pat/;
my @v = $v =~ /$pat/;
my $scs = shift(@u).shift(@v);
$scs .= $_.shift(@u).shift(@v) for @lcs;
return $scs;
}
my $u = "abcbdab";
my $v = "bdcaba";
printf "Strings %s %s\n", $u, $v;
printf "Longest common subsequence: %s\n", lcs $u, $v;
printf "Shortest common supersquence: %s\n", scs $u, $v;
- Output:
Strings abcbdab bdcaba Longest common subsequence: bcba Shortest common supersquence: abdcabdab
Perl 6[edit]
Using 'lcs' routine from Longest common subsequence task
sub lcs(Str $xstr, Str $ystr) { # longest common subsequence
return "" unless $xstr && $ystr;
my ($x, $xs, $y, $ys) = $xstr.substr(0, 1), $xstr.substr(1), $ystr.substr(0, 1), $ystr.substr(1);
return $x eq $y
?? $x ~ lcs($xs, $ys)
!! max(:by{ $^a.chars }, lcs($xstr, $ys), lcs($xs, $ystr) );
}
sub scs ($u, $v) { # shortest common supersequence
my @lcs = (lcs $u, $v).comb;
my $pat = '(.*)' ~ join('(.*)',@lcs) ~ '(.*)';
my $regex = "rx/$pat/".EVAL;
my @u = ($u ~~ $regex).list;
my @v = ($v ~~ $regex).list;
my $scs = shift(@u) ~ shift(@v);
$scs ~= $_ ~ shift(@u) ~ shift(@v) for @lcs;
return $scs;
}
my $u = 'abcbdab';
my $v = 'bdcaba';
printf "Strings: %s %s\n", $u, $v;
printf "Longest common subsequence: %s\n", lcs $u, $v;
printf "Shortest common supersquence: %s\n", scs $u, $v;
- Output:
Strings: abcbdab bdcaba Longest common subsequence: bcba Shortest common supersquence: abdcabdab
Phix[edit]
function longest_common_subsequence(sequence a, b)
sequence res = ""
if length(a) and length(b) then
if a[$]=b[$] then
res = longest_common_subsequence(a[1..-2],b[1..-2])&a[$]
else
sequence l = longest_common_subsequence(a,b[1..-2]),
r = longest_common_subsequence(a[1..-2],b)
res = iff(length(l)>length(r)?l:r)
end if
end if
return res
end function
function shortest_common_supersequence(string a, b)
string lcs = longest_common_subsequence(a, b),
scs = ""
-- Consume lcs
while length(lcs) do
integer c = lcs[1]
if a[1]==c and b[1]==c then
-- Part of the lcs, so consume from all strings
scs &= c
lcs = lcs[2..$]
a = a[2..$]
b = b[2..$]
elsif a[1]==c then
scs &= b[1]
b = b[2..$]
else
scs &= a[1]
a = a[2..$]
end if
end while
-- append remaining characters
return scs & a & b
end function
?shortest_common_supersequence("abcbdab", "bdcaba")
?shortest_common_supersequence("WEASELS", "WARDANCE")
- Output:
"abdcabdab" "WEASRDANCELS"
Python[edit]
# Use the Longest Common Subsequence algorithm
def shortest_common_supersequence(a, b):
lcs = longest_common_subsequence(a, b)
scs = ""
# Consume lcs
while len(lcs) > 0:
if a[0]==lcs[0] and b[0]==lcs[0]:
# Part of the LCS, so consume from all strings
scs += lcs[0]
lcs = lcs[1:]
a = a[1:]
b = b[1:]
elif a[0]==lcs[0]:
scs += b[0]
b = b[1:]
else:
scs += a[0]
a = a[1:]
# append remaining characters
return scs + a + b
- Output:
Seq1: WEASELS Seq2: WARDANCE SCS: WEASRDANCELS
Racket[edit]
This program is based on the C implementation, but use memorization instead of dynamic programming. More explanations about the memorization part in http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html .#lang racket
(struct link (len letters))
(define (link-add li n letter)
(link (+ n (link-len li))
(cons letter (link-letters li))))
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
(define scs/list
(memoize
(lambda (x y)
(cond
[(null? x)
(link (length y) y)]
[(null? y)
(link (length x) x)]
[(eq? (car x) (car y))
(link-add (scs/list (cdr x) (cdr y)) 1 (car x))]
[(<= (link-len (scs/list x (cdr y)))
(link-len (scs/list (cdr x) y)))
(link-add (scs/list x (cdr y)) 1 (car y))]
[else
(link-add (scs/list (cdr x) y) 1 (car x))]))))
(define (scs x y)
(list->string (link-letters (scs/list (string->list x) (string->list y)))))
(scs "abcbdab" "bdcaba")
- Output:
"abdcabdab"
REXX[edit]
/*REXX program finds the Shortest common supersequence (SCS) of two character strings.*/
parse arg u v . /*obtain optional arguments from the CL*/
if u=='' | u=="," then u= 'abcbdab' /*Not specified? Then use the default.*/
if v=='' | v=="," then v= 'bdcaba' /* " " " " " " */
say ' string u=' u /*echo the value of string U to term.*/
say ' string v=' v /* " " " " " V " " */
$= u /*define initial value for the output. */
do n=1 to length(u) /*process the whole length of string U.*/
do m=n to length(v) - 1 /* " right─ish part " " V.*/
p= pos( substr(v, m, 1), $) /*position of mTH V char in $ string.*/
_= substr(v, m+1, 1) /*obtain a single character of string V*/
if p\==0 & _\==substr($, p+1, 1) then $= insert(_, $, p)
end /*m*/ /* [↑] insert _ in $ after position P.*/
end /*n*/
say
say 'shortest common supersequence=' $ /*stick a fork in it, we're all done. */
- output when using the default inputs:
string u= abcbdab string v= bdcaba shortest common supersequence= abdcabdab
- output when using the inputs values: ab ac
string u= ab string v= ac shortest common supersequence= acb
- output when using the inputs values: ac ab
string u= ac string v= ab shortest common supersequence= abc
Ring[edit]
# Project : Shortest common supersequence
str1 = "a b c b d a b"
str2 = "bdcaba"
str3 = str2list(substr(str1, " ", nl))
for n = 1 to len(str3)
for m = n to len(str2)-1
pos = find(str3, str2[m])
if pos > 0 and str2[m+1] != str3[pos+1]
insert(str3, pos, str2[m+1])
ok
next
next
showarray(str3)
func showarray(vect)
svect = ""
for n = 1 to len(vect)
svect = svect + vect[n]
next
see svect
Output:
Shortest common supersequence: abdcabdab
Ruby[edit]
uses 'lcs' from here
require 'lcs'
def scs(u, v)
lcs = lcs(u, v)
u, v = u.dup, v.dup
scs = ""
# Iterate over the characters until LCS processed
until lcs.empty?
if u[0]==lcs[0] and v[0]==lcs[0]
# Part of the LCS, so consume from all strings
scs << lcs.slice!(0)
u.slice!(0)
v.slice!(0)
elsif u[0]==lcs[0]
# char of u = char of LCS, but char of LCS v doesn't so consume just that
scs << v.slice!(0)
else
# char of u != char of LCS, so consume just that
scs << u.slice!(0)
end
end
# append remaining characters, which are not in common
scs + u + v
end
u = "abcbdab"
v = "bdcaba"
puts "SCS(#{u}, #{v}) = #{scs(u, v)}"
- Output:
SCS(abcbdab, bdcaba) = abcbdcaba
Sidef[edit]
Uses the lcs function defined here.
func scs(u, v) {
var ls = lcs(u, v).chars
var pat = Regex('(.*)'+ls.join('(.*)')+'(.*)')
u.scan!(pat)
v.scan!(pat)
var ss = (u.shift + v.shift)
ls.each { |c| ss += (c + u.shift + v.shift) }
return ss
}
say scs("abcbdab", "bdcaba")
- Output:
abdcabdab
Tcl[edit]
This example uses either of the lcs
implementations from here, assumed renamed to lcs…
proc scs {u v} {
set lcs [lcs $u $v]
set scs ""
# Iterate over the characters until LCS processed
for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {
set uc [string index $u $ui]
set vc [string index $v $vi]
set lc [string index $lcs $li]
if {$uc eq $lc} {
if {$vc eq $lc} {
# Part of the LCS, so consume from all strings
append scs $lc
incr ui
incr li
} else {
# char of u = char of LCS, but char of LCS v doesn't so consume just that
append scs $vc
}
incr vi
} else {
# char of u != char of LCS, so consume just that
append scs $uc
incr ui
}
}
# append remaining characters, which are not in common
append scs [string range $u $ui end] [string range $v $vi end]
return $scs
}
Demonstrating:
set u "abcbdab"
set v "bdcaba"
puts "SCS($u,$v) = [scs $u $v]"
- Output:
SCS(abcbdab,bdcaba) = abdcabdab
zkl[edit]
class Link{ var len,letter,next;
fcn init(l=0,c="",lnk=Void){ len,letter,next=l,c,lnk; }
}
fcn scs(x,y,out){
lx,ly:=x.len(),y.len();
lnk:=(ly+1).pump(List,'wrap(_){ (lx+1).pump(List(),Link.create) });
foreach i in (ly){ lnk[i][lx]=Link(ly-i, y[i]) }
foreach j in (lx){ lnk[ly][j]=Link(lx-j, x[j]) }
foreach i,j in ([ly-1..0,-1],[lx-1..0,-1]){
lp:=lnk[i][j];
if (y[i]==x[j]){
lp.next =lnk[i+1][j+1];
lp.letter=x[j];
}else if(lnk[i][j+1].len < lnk[i+1][j].len){
lp.next =lnk[i][j+1];
lp.letter=x[j];
}else{
lp.next =lnk[i+1][j];
lp.letter=y[i];
}
lp.len=lp.next.len + 1;
}
lp:=lnk[0][0]; while(lp){ out.write(lp.letter); lp=lp.next; }
out.close()
}
scs("abcbdab","bdcaba", Sink(String)).println();
- Output:
abdcabdab