Shortest common supersequence: Difference between revisions

From Rosetta Code
Content added Content deleted
(Added Python3 example)
Line 197: Line 197:
Longest common subsequence: bcba
Longest common subsequence: bcba
Shortest common supersquence: abdcabdab
Shortest common supersquence: abdcabdab
</pre>

=={{header|Python}}==
<lang Python>
# Use the Longest Common Subsequence algorithm

def shortest_common_supersequence(a, b):
lcs = longest_common_subsequence(a, b)
scs = ""
# Consume lcs
while len(lcs) > 0:
if a[0]==lcs[0] and b[0]==lcs[0]:
# Part of the LCS, so consume from all strings
scs += lcs[0]
lcs = lcs[1:]
a = a[1:]
b = b[1:]
elif a[0]==lcs[0]:
scs += b[0]
b = b[1:]
else:
scs += a[0]
a = a[1:]
# append remaining characters
return scs + a + b
</lang>
{{out}}
<pre>
Seq1: WEASELS
Seq2: WARDANCE
SCS: WEASRDANCELS
</pre>
</pre>



Revision as of 22:55, 31 May 2018

Shortest common supersequence is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

The   shortest common supersequence   is a problem closely related to the   longest common subsequence,   which you can use as an external function for this task.


Task

Given two strings and , find the shortest possible sequence , which is the shortest common super-sequence of and where both and are a subsequence of . Defined as such, is not necessarily unique.

Demonstrate this by printing where abcbdab” and bdcaba”.

C

The C99 code here isn't all that different from Levenstein distance calculation. <lang c>#include <stdio.h>

  1. include <string.h>

typedef struct link link_t; struct link { int len; char letter; link_t *next; };

// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough. int scs(char *x, char *y, char *out) { int lx = strlen(x), ly = strlen(y); link_t lnk[ly + 1][lx + 1];

for (int i = 0; i < ly; i++) lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};

for (int j = 0; j < lx; j++) lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};

lnk[ly][lx] = (link_t) {0};

for (int i = ly; i--; ) { for (int j = lx; j--; ) { link_t *lp = &lnk[i][j]; if (y[i] == x[j]) { lp->next = &lnk[i+1][j+1]; lp->letter = x[j]; } else if (lnk[i][j+1].len < lnk[i+1][j].len) { lp->next = &lnk[i][j+1]; lp->letter = x[j]; } else { lp->next = &lnk[i+1][j]; lp->letter = y[i]; } lp->len = lp->next->len + 1; } }

for (link_t *lp = &lnk[0][0]; lp; lp = lp->next) *out++ = lp->letter;

return 0; }

int main(void) { char x[] = "abcbdab", y[] = "bdcaba", res[128]; scs(x, y, res); printf("SCS(%s, %s) -> %s\n", x, y, res); return 0; }</lang>

Output:
SCS(abcbdab, bdcaba) -> abdcabdab

D

Translation of: Racket

<lang d>import std.stdio, std.functional, std.array, std.range;

dstring scs(in dstring x, in dstring y) nothrow @safe {

   alias mScs = memoize!scs;
   if (x.empty) return y;
   if (y.empty) return x;
   if (x.front == y.front)
       return x.front ~ mScs(x.dropOne, y.dropOne);
   if (mScs(x, y.dropOne).length <= mScs(x.dropOne, y).length)
       return y.front ~ mScs(x, y.dropOne);
   else
       return x.front ~ mScs(x.dropOne, y);

}

void main() @safe {

   scs("abcbdab", "bdcaba").writeln;

}</lang>

Output:
abdcabdab

Elixir

Translation of: Ruby
Works with: Elixir version 1.3

uses 'LCS' from here <lang elixir>defmodule SCS do

 def scs(u, v) do
   lcs = LCS.lcs(u, v) |> to_charlist
   scs(to_charlist(u), to_charlist(v), lcs, []) |> to_string
 end
 
 defp scs(u, v, [], res), do: Enum.reverse(res) ++ u ++ v
 defp scs([h|ut], [h|vt], [h|lt], res),      do: scs(ut, vt, lt, [h|res])
 defp scs([h|_]=u, [vh|vt], [h|_]=lcs, res), do: scs(u, vt, lcs, [vh|res])
 defp scs([uh|ut], v, lcs, res),             do: scs(ut, v, lcs, [uh|res])

end

u = "abcbdab" v = "bdcaba" IO.puts "SCS(#{u}, #{v}) = #{SCS.scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abdcabdab

Kotlin

Uses 'lcs' function from Longest common subsequence#Kotlin: <lang scala>// version 1.1.2

fun lcs(x: String, y: String): String {

   if (x.length == 0 || y.length == 0) return ""
   val x1 = x.dropLast(1)  
   val y1 = y.dropLast(1)
   if (x.last() == y.last()) return lcs(x1, y1) + x.last()
   val x2 = lcs(x, y1)
   val y2 = lcs(x1, y)
   return if (x2.length > y2.length) x2 else y2

}

fun scs(u: String, v: String): String{

   val lcs = lcs(u, v)
   var ui = 0
   var vi = 0
   val sb = StringBuilder()
   for (i in 0 until lcs.length) {
       while (ui < u.length && u[ui] != lcs[i]) sb.append(u[ui++])       
       while (vi < v.length && v[vi] != lcs[i]) sb.append(v[vi++])
       sb.append(lcs[i])
       ui++; vi++
   }
   if (ui < u.length) sb.append(u.substring(ui))
   if (vi < v.length) sb.append(v.substring(vi))
   return sb.toString()

}

fun main(args: Array<String>) {

   val u = "abcbdab"
   val v = "bdcaba"  
   println(scs(u, v))

}</lang>

Output:
abdcabdab

Perl

<lang perl>sub lcs { # longest common subsequence

   my( $u, $v ) = @_;
   return  unless length($u) and length($v);
   my $longest = ;
   for my $first ( 0..length($u)-1 ) {
       my $char = substr $u, $first, 1;
       my $i = index( $v, $char );
       next if -1==$i;
       my $next = $char;
       $next .= lcs( substr( $u, $first+1), substr( $v, $i+1 ) ) unless $i==length($v)-1;
       $longest = $next if length($next) > length($longest);
   }
   return $longest;

}

sub scs { # shortest common supersequence

   my( $u, $v ) = @_;
   my @lcs = split //, lcs $u, $v;
   my $pat = "(.*)".join("(.*)",@lcs)."(.*)"; 
   my @u = $u =~ /$pat/;
   my @v = $v =~ /$pat/;
   my $scs = shift(@u).shift(@v);
   $scs .= $_.shift(@u).shift(@v) for @lcs;
   return $scs;

}

my $u = "abcbdab"; my $v = "bdcaba"; printf "Strings %s %s\n", $u, $v; printf "Longest common subsequence: %s\n", lcs $u, $v; printf "Shortest common supersquence: %s\n", scs $u, $v; </lang>

Output:
Strings abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Python

<lang Python>

  1. Use the Longest Common Subsequence algorithm

def shortest_common_supersequence(a, b):

   lcs = longest_common_subsequence(a, b)
   scs = ""
   # Consume lcs
   while len(lcs) > 0:
       if a[0]==lcs[0] and b[0]==lcs[0]:
       # Part of the LCS, so consume from all strings
           scs += lcs[0]
           lcs = lcs[1:]
           a = a[1:]
           b = b[1:]
       elif a[0]==lcs[0]:
           scs += b[0]
           b = b[1:]
       else:
           scs += a[0]
           a = a[1:]
   # append remaining characters
   return scs + a + b

</lang>

Output:
Seq1: WEASELS
Seq2: WARDANCE
SCS:  WEASRDANCELS

Racket

Translation of: C

This program is based on the C implementation, but use memorization instead of dynamic programming. More explanations about the memorization part in http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html .

<lang Racket>#lang racket

(struct link (len letters))

(define (link-add li n letter)

 (link (+ n (link-len li)) 
       (cons letter (link-letters li))))

(define (memoize f)

 (local ([define table (make-hash)])
   (lambda args
     (dict-ref! table args (λ () (apply f args))))))

(define scs/list

 (memoize 
  (lambda (x y)
    (cond
      [(null? x)
       (link (length y) y)]
      [(null? y)
       (link (length x) x)]
      [(eq? (car x) (car y))
       (link-add (scs/list (cdr x) (cdr y)) 1 (car x))]
      [(<= (link-len (scs/list x (cdr y)))
           (link-len (scs/list (cdr x) y)))
       (link-add (scs/list x (cdr y)) 1 (car y))]
      [else
       (link-add (scs/list (cdr x) y) 1 (car x))]))))

(define (scs x y)

 (list->string (link-letters (scs/list (string->list x) (string->list y)))))

(scs "abcbdab" "bdcaba")</lang>

Output:
"abdcabdab"

Ring

<lang ring>

  1. Project : Shortest common supersequence
  2. Date  : 2017/12/08
  3. Author : Gal Zsolt (~ CalmoSoft ~)
  4. Email  : <calmosoft@gmail.com>

str1 = "a b c b d a b" str2 = "bdcaba" str3 = str2list(substr(str1, " ", nl)) for n = 1 to len(str3)

    for m = n to len(str2)-1
         pos = find(str3, str2[m])
         if pos > 0 and str2[m+1] != str3[pos+1]
            insert(str3, pos, str2[m+1])
         ok
    next

next showarray(str3)

func showarray(vect)

      svect = ""
      for n = 1 to len(vect)
            svect = svect + vect[n]
      next
      see svect

</lang> Output:

Shortest common supersequence: abdcabdab

Ruby

Translation of: Tcl

uses 'lcs' from here <lang ruby>require 'lcs'

def scs(u, v)

 lcs = lcs(u, v)
 u, v = u.dup, v.dup
 scs = ""
 # Iterate over the characters until LCS processed
 until lcs.empty?
   if u[0]==lcs[0] and v[0]==lcs[0]
     # Part of the LCS, so consume from all strings
     scs << lcs.slice!(0)
     u.slice!(0)
     v.slice!(0)
   elsif u[0]==lcs[0]
     # char of u = char of LCS, but char of LCS v doesn't so consume just that
     scs << v.slice!(0)
   else
     # char of u != char of LCS, so consume just that
     scs << u.slice!(0)
   end
 end
 # append remaining characters, which are not in common
 scs + u + v

end

u = "abcbdab" v = "bdcaba" puts "SCS(#{u}, #{v}) = #{scs(u, v)}"</lang>

Output:
SCS(abcbdab, bdcaba) = abcbdcaba

Sidef

Translation of: Perl

Uses the lcs function defined here. <lang ruby>func scs(u, v) {

   var ls = lcs(u, v).chars
   var pat = Regex('(.*)'+ls.join('(.*)')+'(.*)')
   u.scan!(pat)
   v.scan!(pat)
   var ss = (u.shift + v.shift)
   ls.each { |c| ss += (c + u.shift + v.shift) }
   return ss

}

say scs("abcbdab", "bdcaba")</lang>

Output:
abdcabdab

Tcl

This example uses either of the lcs implementations from here, assumed renamed to lcs… <lang tcl>proc scs {u v} {

   set lcs [lcs $u $v]
   set scs ""
   # Iterate over the characters until LCS processed
   for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {

set uc [string index $u $ui] set vc [string index $v $vi] set lc [string index $lcs $li] if {$uc eq $lc} { if {$vc eq $lc} { # Part of the LCS, so consume from all strings append scs $lc incr ui incr li } else { # char of u = char of LCS, but char of LCS v doesn't so consume just that append scs $vc } incr vi } else { # char of u != char of LCS, so consume just that append scs $uc incr ui }

   }
   # append remaining characters, which are not in common
   append scs [string range $u $ui end] [string range $v $vi end]
   return $scs

}</lang> Demonstrating: <lang tcl>set u "abcbdab" set v "bdcaba" puts "SCS($u,$v) = [scs $u $v]"</lang>

Output:
SCS(abcbdab,bdcaba) = abdcabdab

zkl

Translation of: C

<lang zkl>class Link{ var len,letter,next;

  fcn init(l=0,c="",lnk=Void){ len,letter,next=l,c,lnk; }

} fcn scs(x,y,out){

  lx,ly:=x.len(),y.len();
  lnk:=(ly+1).pump(List,'wrap(_){ (lx+1).pump(List(),Link.create) });
  foreach i in (ly){ lnk[i][lx]=Link(ly-i, y[i]) }
  foreach j in (lx){ lnk[ly][j]=Link(lx-j, x[j]) }

  foreach i,j in ([ly-1..0,-1],[lx-1..0,-1]){
     lp:=lnk[i][j];
     if (y[i]==x[j]){

lp.next =lnk[i+1][j+1]; lp.letter=x[j];

     }else if(lnk[i][j+1].len < lnk[i+1][j].len){

lp.next =lnk[i][j+1]; lp.letter=x[j];

     }else{

lp.next =lnk[i+1][j]; lp.letter=y[i];

     }
     lp.len=lp.next.len + 1;
  }
  lp:=lnk[0][0]; while(lp){ out.write(lp.letter); lp=lp.next; }
  out.close()

}</lang> <lang zkl>scs("abcbdab","bdcaba", Sink(String)).println();</lang>

Output:
abdcabdab