Find common directory path: Difference between revisions

Content added Content deleted

Inline

Revision as of 15:59, 3 July 2010

Create a routine that, given a set of strings representing directory paths and a single character directory separator, will return a string representing that part of the directory tree that is common to all the directories.

Test your routine using the forward slash '/' character as the directory separator and the following three strings as input paths:

 '/home/user1/tmp/coverage/test'
 '/home/user1/tmp/covert/operator'
 '/home/user1/tmp/coven/members'

Note: The resultant path should be the valid directory '/home/user1/tmp' and not the longest common string '/home/user1/tmp/cove'.
If your language has a routine that performs this function (even if it does not have a changeable separator character, then mention it as part of the task)

Ada

<lang ada> with Ada.Text_IO; use Ada.Text_IO;

procedure Test_Common_Path is

  function "rem" (A, B : String) return String is
     Slash : Integer := A'First; -- At the last slash seen in A
     At_A  : Integer := A'first;
     At_B  : Integer := B'first;
  begin
     loop
        if At_A > A'Last then
           if At_B > B'Last or else B (At_B) = '/' then
              return A;
           else
              return A (A'First..Slash - 1);
           end if;
        elsif At_B > B'Last then
           if A (At_A) = '/' then -- A cannot be shorter than B here
              return B;
           else
              return A (A'First..Slash - 1);
           end if;
        elsif A (At_A) /= B (At_B) then
           return A (A'First..Slash - 1);
        elsif A (At_A) = '/' then
           Slash := At_A;
        end if;
        At_A := At_A + 1;
        At_B := At_B + 1;
     end loop;      
  end "rem";

begin

  Put_Line
  (  "/home/user1/tmp/coverage/test" rem
     "/home/user1/tmp/covert/operator" rem
     "/home/user1/tmp/coven/members"
  );

end Test_Common_Path; </lang> Output:

/home/user1/tmp

ALGOL 68

Works with: ALGOL 68 version Revision 1 - no extensions to language used

Works with: ALGOL 68G version Any - tested with release 1.18.0-9h.tiny

Works with: ELLA ALGOL 68 version Any (with appropriate job cards) - tested with release 1.8-8d

<lang algol68># Utilities code #

CHAR dir sep = "/"; # Assume POSIX #

PROC dir name = (STRING dir)STRING: (

 STRING out;
 FOR pos FROM UPB dir BY -1 TO LWB dir DO
   IF dir[pos] = dir sep THEN
     out := dir[:pos-1];
     GO TO out exit
   FI
 OD;
 # else: # out:=""; 
 out exit: out

);

PROC shortest = ([]STRING string list)STRING: (

 INT min := max int;
 INT min key := LWB string list - 1;
 FOR key FROM LWB string list TO UPB string list DO
   IF UPB string list[key][@1] < min THEN
     min := UPB string list[key][@1];
     min key := key
   FI
 OD;
 string list[min key]

);

Actual code #

PROC common prefix = ([]STRING strings)STRING: (

 IF LWB strings EQ UPB strings THEN
   # exit: # strings[LWB strings]
 ELSE
   STRING prefix := shortest(strings);
   FOR pos FROM LWB prefix TO UPB prefix DO
     CHAR first = prefix[pos];
     FOR key FROM LWB strings+1 TO UPB strings DO
       IF strings[key][@1][pos] NE first THEN 
         prefix := prefix[:pos-1];
         GO TO prefix exit
       FI
     OD
   OD;
   prefix exit: prefix 
 FI

);

Test code #

test:(

 []STRING dir list = (
   "/home/user1/tmp/coverage/test",
   "/home/user1/tmp/covert/operator",
   "/home/user1/tmp/coven/members"
 );
 print((dir name(common prefix(dir list)), new line))

)</lang> Output:

/home/user1/tmp

AutoHotkey

<lang autohotkey>Dir1 := "/home/user1/tmp/coverage/test" Dir2 := "/home/user1/tmp/covert/operator" Dir3 := "/home/user1/tmp/coven/members"

StringSplit, Dir1_, Dir1, / StringSplit, Dir2_, Dir2, / StringSplit, Dir3_, Dir3, /

Loop

   If  (Dir1_%A_Index% = Dir2_%A_Index%)
   And (Dir1_%A_Index% = Dir3_%A_Index%)
       Result .= (A_Index=1 ? "" : "/") Dir1_%A_Index%
   Else Break

MsgBox, % Result</lang> Message box shows:

/home/user1/tmp

C

Translation of: ALGOL 68

- This code specimen retains the original ALGOL 68 coding style.

note: This code uses the gcc block expression C language extension: ({ ~ }) <lang c>#include <stdlib.h>

include <stdio.h>
include <string.h>
include <limits.h>

/* Utilities code */

char dir_sep = '/'; /* Assume POSIX */

define LWB(s) 0 /* default C lower bound */
define STRUPB(s) (strlen(s)) /* NUL terminated char* upper bound */

/* find upper bound of a NULL terminated list - ({ ~ }) gcc extension */

define LISTUPB(list) ({ int i; for(i=0; list[i]!=NULL; i++); i-1; })

char *str_slice(char *s, int lwb, int upb){

 char *new = malloc(upb-lwb+2); 
 strncpy(new, s+lwb, upb-lwb+2); 
 new[upb+1] = '\0';
 return new;

}

char *dir_name(char *dir){

 char *out;
 int pos;
 for(pos = STRUPB(dir); pos>=LWB(dir); pos--){
   if(dir[pos] == dir_sep){
     out = str_slice(dir,LWB(dir),pos-1);
     return out;
   }
 }
 return "";

}

char *shortest (char *string_list[]){

 int min = INT_MAX;
 int min_key = LWB(string_list) - 1;
 int key;
 for(key = LWB(string_list); string_list[key]; key++){
   if(STRUPB(string_list[key]) < min){
     min = STRUPB(string_list[key]) - LWB(string_list[key]);
     min_key = key;
   }
 }
 return string_list[min_key];

}

/* Actual code */

char *common_prefix(char *strings[]){

 if(LWB(strings) == LISTUPB(strings)){
   /* exit: */ return strings[LWB(strings)];
 } else {
   char *prefix = shortest(strings);
   int pos;
   for(pos = LWB(prefix); prefix[pos]; pos++){
     char first = prefix[pos];
     int key;
     for(key = LWB(strings)+1; strings[key]; key++){
       if(strings[key][pos] != first){
         prefix = str_slice(prefix, LWB(prefix), pos-1);
         return prefix;
       }
     }
   }
   return prefix ;
 }

}

/* Test code */

main(){

 char *dir_list[] = {
   "/home/user1/tmp/coverage/test",
   "/home/user1/tmp/covert/operator",
   "/home/user1/tmp/coven/members",
   NULL
 };
 printf("%s\n",dir_name(common_prefix(dir_list)));

}</lang> Output:

/home/user1/tmp

Clojure

<lang clojure>(defn common-prefix [sep paths]

 (let [parts-per-path (map #(.split (re-pattern sep) %) paths)
       parts-per-position (apply map vector parts-per-path)]
   (apply str
     (interpose sep
       (for [parts parts-per-position :while (apply = parts)]
         (first parts))))))

(println

 (common-prefix "/"
   ["/home/user1/tmp/coverage/test"
    "/home/user1/tmp/covert/operator"
    "/home/user1/tmp/coven/members"]))</lang>

C#

<lang csharp> using System; using System.Collections.Generic; using System.Linq; using System.Text;

namespace RosettaCodeTasks {

class Program { static void Main ( string[ ] args ) { FindCommonDirectoryPath.Test ( ); }

}

class FindCommonDirectoryPath { public static void Test ( ) { Console.WriteLine ( "Find Common Directory Path" ); Console.WriteLine ( ); List<string> PathSet1 = new List<string> ( ); PathSet1.Add ( "/home/user1/tmp/coverage/test" ); PathSet1.Add ( "/home/user1/tmp/covert/operator" ); PathSet1.Add ( "/home/user1/tmp/coven/members" ); Console.WriteLine("Path Set 1 (All Absolute Paths):"); foreach ( string path in PathSet1 ) { Console.WriteLine ( path ); } Console.WriteLine ( "Path Set 1 Common Path: {0}", FindCommonPath ( "/", PathSet1 ) ); } public static string FindCommonPath ( string Separator, List<string> Paths ) { string CommonPath = String.Empty; List<string> SeparatedPath = Paths .First ( str => str.Length == Paths.Max ( st2 => st2.Length ) ) .Split ( new string[ ] { Separator }, StringSplitOptions.RemoveEmptyEntries ) .ToList ( );

foreach ( string PathSegment in SeparatedPath.AsEnumerable ( ) ) { if ( CommonPath.Length == 0 && Paths.All ( str => str.StartsWith ( PathSegment ) ) ) { CommonPath = PathSegment; } else if ( Paths.All ( str => str.StartsWith ( CommonPath + Separator + PathSegment ) ) ) { CommonPath += Separator + PathSegment; } else { break; } }

return CommonPath; } } }

</lang>

Factor

<lang factor>: take-shorter ( seq1 seq2 -- shorter )

   [ shorter? ] 2keep ? ;

common-head ( seq1 seq2 -- head )

   2dup mismatch [ nip head ] [ take-shorter ] if* ;

common-prefix-1 ( file1 file2 separator -- prefix )

   [ common-head ] dip '[ _ = not ] trim-tail ;

common-prefix ( seq separator -- prefix )

   [ ] swap '[ _ common-prefix-1 ] map-reduce ;</lang>

( scratchpad ) {
                   "/home/user1/tmp/coverage/test"
                   "/home/user1/tmp/covert/operator"
                   "/home/user1/tmp/coven/members" 
               } CHAR: / common-prefix .
"/home/user1/tmp/"

J

Solution: <lang j>parseDirs =: = <;.2 ] getCommonPrefix =: {. ;@{.~ 0 i.~ *./@(="1 {.)

getCommonDirPath=: [: getCommonPrefix parseDirs&></lang>

Example: <lang j> paths=: '/home/user1/tmp/coverage/test';'/home/user1/tmp/covert/operator';'/home/user1/tmp/coven/members'

  getCommonPrefix >paths

/home/user1/tmp/cove

  '/' getCommonDirPath paths

/home/user1/tmp/</lang>

Note: This alternative formulation of parseDirs provides cross-platform support, without the need to specify the path separator. <lang j>parseDirs =: (PATHSEP_j_&= <;.2 ])@jhostpath</lang>

OCaml

<lang ocaml>let rec aux acc paths =

 if List.mem [] paths
 then (List.rev acc) else
 let heads = List.map List.hd paths in
 let item = List.hd heads in
 let all_the_same =
   List.for_all ((=) item) (List.tl heads)
 in
 if all_the_same
 then aux (item::acc) (List.map List.tl paths)
 else (List.rev acc)

let common_prefix sep = function

 | [] -> invalid_arg "common_prefix"
 | dirs ->
     let paths = List.map (Str.split (Str.regexp_string sep)) dirs in
     let res = aux [] paths in
     (sep ^ (String.concat sep res))

let () =

 let dirs = [
   "/home/user1/tmp/coverage/test";
   "/home/user1/tmp/covert/operator";
   "/home/user1/tmp/coven/members";
 ] in
 print_endline (common_prefix "/" dirs);

</lang>

(uses the module Str, str.cma)

Oz

With a few helper functions, we can express the solution like this in Oz: <lang oz>declare

 fun {CommonPrefix Sep Paths}
    fun {GetParts P} {String.tokens P Sep} end
    Parts = {ZipN {Map Paths GetParts}}
    EqualParts = {List.takeWhile Parts fun {$ X|Xr} {All Xr {Equals X}} end}
 in
    {Join Sep {Map EqualParts Head}}
 end

 fun {ZipN Xs}
    if {Some Xs {Equals nil}} then nil
    else
       {Map Xs Head} | {ZipN {Map Xs Tail}}
    end
 end

 fun {Join Sep Xs}
    {FoldR Xs fun {$ X Z} {Append X Sep|Z} end nil}
 end

 fun {Equals C}
    fun {$ X} X == C end
 end

 fun {Head X|_} X end

 fun {Tail _|Xr} Xr end

in

 {System.showInfo {CommonPrefix &/
                   ["/home/user1/tmp/coverage/test"
                    "/home/user1/tmp/covert/operator"
                    "/home/user1/tmp/coven/members"]}}</lang>

PicoLisp

<lang PicoLisp>(de commonPath (Lst Chr)

  (glue Chr
     (make
        (apply find
           (mapcar '((L) (split (chop L) Chr)) Lst)
           '(@ (or (pass <>) (nil (link (next))))) ) ) ) )</lang>

Output:

(commonPath
   (quote
      "/home/user1/tmp/coverage/test"
      "/home/user1/tmp/covert/operator"
      "/home/user1/tmp/coven/members" )
   "/" )

-> "/home/user1/tmp"

PureBasic

PureBasic don't have a path comparator directly but instead have powerful string tools.

Simply by checking the catalog names until they mismatch and add up the correct parts, the task is accomplished. <lang PureBasic>Procedure.s CommonPath(Array InPaths.s(1),separator.s="/")

 Protected SOut$=""
 Protected i, j, toggle
 
 If ArraySize(InPaths())=0
   ProcedureReturn InPaths(0)  ; Special case, only one path
 EndIf
 
 Repeat
   i+1
   toggle=#False
   For j=1 To ArraySize(InPaths())
     If (StringField(InPaths(j-1),i,separator)=StringField(InPaths(j),i,separator))
       If Not toggle
         SOut$+StringField(InPaths(j-1),i,separator)+separator
         toggle=#True
       EndIf
     Else
       ProcedureReturn SOut$
     EndIf      
   Next
 ForEver

EndProcedure</lang>

Example of implementation <lang PureBasic>Dim t.s(2) t(0)="/home/user1/tmp/coverage/test" t(1)="/home/user1/tmp/covert/operator" t(2)="/home/user1/tmp/coven/members"

Debug CommonPath(t(),"/"))</lang>

Python

The Python os.path.commonprefix function is broken as it returns common characters that may not form a valid directory path: <lang python>>>> import os >>> os.path.commonprefix(['/home/user1/tmp/coverage/test',

                         '/home/user1/tmp/covert/operator', '/home/user1/tmp/coven/members'])

'/home/user1/tmp/cove'</lang>

This result can be fixed: <lang python>>>> def commonprefix(*args, sep='/'): return os.path.commonprefix(*args).rpartition(sep)[0]

>>> commonprefix(['/home/user1/tmp/coverage/test',

                 '/home/user1/tmp/covert/operator', '/home/user1/tmp/coven/members'])

'/home/user1/tmp'</lang>

But it may be better to not rely on the faulty implementation at all: <lang python>>>> from itertools import takewhile >>> def allnamesequal(name): return all(n==name[0] for n in name[1:])

>>> def commonprefix(paths, sep='/'): bydirectorylevels = zip(*[p.split(sep) for p in paths]) return sep.join(x[0] for x in takewhile(allnamesequal, bydirectorylevels))

>>> commonprefix(['/home/user1/tmp/coverage/test',

                 '/home/user1/tmp/covert/operator', '/home/user1/tmp/coven/members'])

'/home/user1/tmp'</lang>

R

<lang r> get_common_dir <- function(paths, delim = "/") {

 path_chunks <- strsplit(paths, delim)
 
 i <- 1
 repeat({
   current_chunk <- sapply(path_chunks, function(x) x[i])
   if(any(current_chunk != current_chunk[1])) break
   i <- i + 1
 })
 paste(path_chunks1[seq_len(i - 1)], collapse = delim)

}

Example Usage:

paths <- c(

 '/home/user1/tmp/coverage/test',
 '/home/user1/tmp/covert/operator',
 '/home/user1/tmp/coven/members')

get_common_dir(paths) # "/home/user1/tmp"

</lang>

Ruby

Uses the standard library abbrev module: Given a set of strings, calculate the set of unambiguous abbreviations for those strings, and return a hash where the keys are all the possible abbreviations and the values are the full strings.

<lang ruby>require 'abbrev'

dirs = %w( /home/user1/tmp/coverage/test /home/user1/tmp/covert/operator /home/user1/tmp/coven/members )

common_prefix = dirs.abbrev.keys.min_by {|key| key.length}.chop # => "/home/user1/tmp/cove" common_directory = common_prefix.sub(%r{/[^/]*$}, ) # => "/home/user1/tmp"</lang>

Implementing without that module: <lang ruby>separator = '/' paths = dirs.collect {|dir| dir.split(separator)} uncommon_idx = paths.transpose.each_with_index.find {|dirnames, idx| dirnames.uniq.length > 1}.last common_directory = paths[0][0 ... uncommon_idx].join(separator) # => "/home/user1/tmp"</lang>

Tcl

<lang tcl>package require Tcl 8.5 proc pop {varname} {

   upvar 1 $varname var
   set var [lassign $var head]
   return $head

}

proc common_prefix {dirs {separator "/"}} {

   set parts [split [pop dirs] $separator]
   while {[llength $dirs]} {
       set r {}
       foreach cmp $parts elt [split [pop dirs] $separator] {
           if {$cmp ne $elt} break
           lappend r $cmp
       }
       set parts $r
   }
   return [join $parts $separator]

}</lang>

% common_prefix {/home/user1/tmp/coverage/test /home/user1/tmp/covert/operator /home/user1/tmp/coven/members}
/home/user1/tmp

Ursala

The algorithm is to lex the paths into component directory names, and then find the greatest common prefix of those. <lang Ursala>#import std

comdir"s" "p" = mat"s" reduce(gcp,0) (map sep "s") "p"</lang> where "s" is a dummy variable representing the separator, "p" is a dummy variable representing the list of paths, and

sep is second order function in the standard library that takes a separator character and returns a lexer mapping a string containing the separator to a list of the substrings found between occurrences of it
map is the conventional mapping combinator, which takes a function operating on items of a list to a function operating pointwise on a whole list
gcp is a polymorphic greatest-common-prefix library function working on pairs of strings or lists of any type
reduce is the standard functional programming reduction combinator, which cumulatively applies a binary operator to a list of operands given the operator and the vacuous case result
mat is a second order function in the standard library that takes a separator character and returns a function that flattens a list of strings into a single string with copies of the separator inserted between them

Here is a version using operators instead of mnemonics for map and reduce. <lang Ursala>comdir"s" "p" = mat"s" gcp:-0 sep"s"* "p"</lang> Here is one in partly point-free form, using the composition operator (+). <lang Ursala>comdir"s" = mat"s"+ gcp:-0+ sep"s"*</lang> Here it is in point-free form. <lang Ursala>comdir = +^/mat gcp:-0++ *+ sep</lang> test program: <lang Ursala>#cast %s

test =

comdir`/ <

  '/home/user1/tmp/coverage/test',
  '/home/user1/tmp/covert/operator',
  '/home/user1/tmp/coven/members'></lang>

output:

'/home/user1/tmp'