Multisplit: Difference between revisions

6,765 bytes added ,  28 days ago
Added Easylang
No edit summary
(Added Easylang)
 
(15 intermediate revisions by 11 users not shown)
Line 28:
{{trans|Python}}
 
<langsyntaxhighlight lang="11l">F multisplit(text, sep)
V lastmatch = 0
V i = 0
Line 48:
R matches
 
print(multisplit(‘a!===b=!=c’, [‘==’, ‘!=’, ‘=’]))</langsyntaxhighlight>
 
{{out}}
Line 57:
=={{header|Ada}}==
multisplit.adb:
<langsyntaxhighlight Adalang="ada">with Ada.Containers.Indefinite_Doubly_Linked_Lists;
with Ada.Text_IO;
 
Line 137:
Pos := String_Lists.Next (Pos);
end loop;
end Multisplit;</langsyntaxhighlight>
 
{{out}}
Line 144:
 
=={{header|ALGOL 68}}==
<langsyntaxhighlight lang="algol68"># split a string based on a number of separators #
 
# MODE to hold the split results #
Line 209:
SPLITINFO token = test tokens[ t ];
print( ( "token: [", text OF token, "] at: ", whole( position OF token, 0 ), " delimiter: (", delimiter OF token, ")", newline ) )
OD</langsyntaxhighlight>
{{out}}
<pre>
Line 221:
=={{header|Arturo}}==
 
<langsyntaxhighlight lang="rebol">print split.by:["==" "!=" "="] "a!===b=!=c"</langsyntaxhighlight>
 
{{out}}
Line 228:
 
=={{header|AutoHotkey}}==
<langsyntaxhighlight AutoHotkeylang="autohotkey">Str := "a!===b=!=c"
Sep := ["==","!=", "="]
Res := StrSplit(Str, Sep)
Line 236:
for k, v in Sep
N .= (N?"|":"") "\Q" v "\E"
MsgBox % RegExReplace(str, "(.*?)(" N ")", "$1 {$2}")</langsyntaxhighlight>
{{out}}
<pre>a,,b,,c
Line 242:
 
=={{header|AWK}}==
<syntaxhighlight lang="awk">
<lang AWK>
# syntax: GAWK -f MULTISPLIT.AWK
BEGIN {
Line 266:
exit(0)
}
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 280:
 
=={{header|BBC BASIC}}==
<langsyntaxhighlight lang="bbcbasic"> DIM sep$(2)
sep$() = "==", "!=", "="
PRINT "String splits into:"
Line 303:
ENDIF
UNTIL m% = LEN(s$)
= o$ + """" + MID$(s$, p%) + """"</langsyntaxhighlight>
{{out}}
<pre>
Line 314:
=={{header|Bracmat}}==
This is a surprisingly difficult task to solve in Bracmat, because in a naive solution using a alternating pattern ("=="|"!="|"=") the shorter pattern <code>"="</code> would have precedence over <code>"=="</code>. In the solution below the function <code>oneOf</code> iterates (by recursion) over the operators, trying to match the start of the current subject string <code>sjt</code> with one operator at a time, until success or reaching the end of the list with operators, whichever comes first. If no operator is found at the start of the current subject string, the variable <code>nonOp</code> is extended with one byte, thereby shifting the start of the current subject string one byte to the right. Then a new attempt is made to find an operator. This is repeated until either an operator is found, in which case the unparsed string is restricted to the part of the input after the found operator, or no operator is found, in which case the <code>whl</code> loop terminates.
<langsyntaxhighlight lang="bracmat">( ( oneOf
= operator
. !arg:%?operator ?arg
Line 331:
& put$!unparsed
& put$\n
);</langsyntaxhighlight>
{{out}}
<pre>a {!=} {==} b {=} {!=} c</pre>
Line 337:
=={{header|C}}==
What kind of silly parsing is this?
<langsyntaxhighlight Clang="c">#include <stdio.h>
#include <string.h>
 
Line 360:
 
return 0;
}</langsyntaxhighlight>
{{out}}<syntaxhighlight lang="text">a{!=}{==}b{=}{!=}c</langsyntaxhighlight>
 
=={{header|C sharp}}==
Line 367:
'''Extra Credit Solution'''
 
<langsyntaxhighlight lang="csharp">using System;
using System.Collections.Generic;
using System.Linq;
Line 423:
}
}
}</langsyntaxhighlight>
 
{{out}}
Line 431:
=={{header|C++}}==
using the Boost library tokenizer!
<langsyntaxhighlight lang="cpp">#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
Line 449:
std::cout << '\n' ;
return 0 ;
}</langsyntaxhighlight>
{{out}}
<PRE>a b c</PRE>
 
===Without external libraries===
<syntaxhighlight lang="c++">
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
 
struct Split_data {
std::string segment;
int32_t index;
std::string separator;
};
 
std::vector<Split_data> multi_split(const std::string& text, const std::vector<std::string>& separators) {
std::vector<Split_data> result;
uint64_t i = 0;
std::string segment = "";
while ( i < text.length() ) {
bool found = false;
for ( std::string separator : separators ) {
if ( text.substr(i, separator.length()) == separator ) {
found = true;
result.emplace_back(segment, i, separator);
i += separator.length();
segment = "";
break;
}
}
 
if ( ! found ) {
segment += text[i];
i += 1;
}
}
result.emplace_back(segment, i, "");
return result;
}
 
int main() {
for ( Split_data splits : multi_split("a!===b=!=c", { "==", "!=", "=" } ) ) {
std::cout << std::left << std::setw(3) << "\"" + splits.segment + "\""
<< std::setw(18) << " ( split with \"" + splits.separator + "\""
<< " at index " << splits.index << " )" << std::endl;
}
}
</syntaxhighlight>
{{ out }}
<pre>
"a" ( split with "!=" at index 1 )
"" ( split with "==" at index 3 )
"b" ( split with "=" at index 6 )
"" ( split with "!=" at index 7 )
"c" ( split with "" at index 10 )
</pre>
 
 
===C++23===
<syntaxhighlight lang="c++">
/* multisplit.cpp */
#include <features.h>
#include <iostream>
#include <string>
#include <vector>
#include <format>
 
/* C++23 example for Multisplit 6 Jan 2024
email:
spikeysnack@gmail.com
 
compile:
g++-13 -std=c++23 -Wall -o multisplit multisplit.cpp
*/
 
// extra info
#define _EXTRA
 
// aliases
using std::string;
using std::vector;
using str_vec = vector<string>;
using std::cout;
 
 
// constants
constexpr static const size_t npos = -1;
 
// function signatures
string replace_all(string& str, string& remove, string& insert );
 
str_vec split_on_delim(string& str, const string& delims);
 
str_vec Multisplit( string& input, const str_vec& seps);
 
// functions
 
// replace all substrings in string
// a = "dogs and cats and dogs and cats and birds"
// replace(a, "cats" , "fish");
// ==> "dogs and fish and dogs and fish and birds"
 
string replace_all(string& str,
const string& remove,
const string& insert ){
string s{str};
string::size_type pos = 0;
 
#ifdef _EXTRA
const string rightarrow{"\u2B62"}; //unicode arrow
auto ex = std::format("match: {}\t{} ", remove, rightarrow);
std::cerr << ex;
#endif
while ((pos = s.find(remove, pos)) != npos){
s.replace(pos, remove.size(), insert);
pos++;
}
 
return s;
}
 
 
// create a string vector from a string,
// split on a delimiter string
// x = "ab:cde:fgh:ijk"
// split_on_delim( x, ":");
// ==> { "ab", "cde", "fgh", "ijk" }
 
str_vec split_on_delim(string& str, const string& delims) {
string::size_type beg, pos = 0;
str_vec sv;
string tmp;
while ( (beg = str.find_first_not_of(delims, pos)) != npos ){
 
pos = str.find_first_of(delims, beg + 1);
 
tmp = { str.substr(beg, pos - beg) };
 
sv.push_back(tmp);
}
return sv;
}
 
 
str_vec Multisplit( string& input, const str_vec& seps) {
 
string s1{input};
str_vec sv;
 
for( auto sep : seps){
s1 = replace_all(s1, sep, "^"); // space sep
 
#ifdef _EXTRA
std::cerr << s1 << "\n";
#endif
sv = split_on_delim(s1, "^"); // split
}
return sv;
}
 
 
/* main program */
 
int main(){
string sample{"a!===b=!=c"};
 
const str_vec seps {"!=", "==", "="};
 
auto s = std::format("sample: \t{}\n", sample);
 
cout << s;
 
auto sv = Multisplit(sample, seps);
 
for( auto s : sv){
auto out = std::format( "{}\t" , s);
cout << out;
}
cout << "\n";
return 0;
}
 
// end
</syntaxhighlight>
 
{{ out }}
<pre>
sample: a!===b=!=c
match: != ⭢ a^==b=^c
match: == ⭢ a^^b=^c
match: = ⭢ a^^b^^c
a b c
 
</pre>
 
=={{header|CoffeeScript}}==
<langsyntaxhighlight lang="coffeescript">
multi_split = (text, separators) ->
# Split text up, using separators to break up text and discarding
Line 485 ⟶ 683:
console.log multi_split 'a!===b=!=c', ['==', '!=', '='] # [ 'a', '', 'b', '', 'c' ]
console.log multi_split '', ['whatever'] # [ '' ]
</syntaxhighlight>
</lang>
 
=={{header|D}}==
<langsyntaxhighlight lang="d">import std.stdio, std.array, std.algorithm;
 
string[] multiSplit(in string s, in string[] divisors) pure nothrow {
Line 525 ⟶ 723:
.join(" {} ")
.writeln;
}</langsyntaxhighlight>
{{out}} (separator locations indicated by braces):
<pre>a {} {} b {} {} c</pre>
=={{header|Delphi}}==
{{libheader| System.SysUtils}}
<syntaxhighlight lang="delphi">
<lang Delphi>
program Multisplit;
 
Line 544 ⟶ 742:
write(']');
readln;
end.</langsyntaxhighlight>
{{out}}
<pre>["a" "" "b" "" "c" ]</pre>
=={{header|EasyLang}}==
<syntaxhighlight>
proc multisplit str$ sep$[] . .
repeat
min = 1 / 0
for sep$ in sep$[]
pos = strpos str$ sep$
if pos > 0 and pos < min
min = pos
msep$ = sep$
.
.
until min = 1 / 0
write substr str$ 1 (min - 1) & "{" & msep$ & "}"
str$ = substr str$ (min + len msep$) 9999
.
print str$
.
multisplit "a!===b=!=c" [ "==" "!=" "=" ]
</syntaxhighlight>
{{out}}
<pre>
a{!=}{==}b{=}{!=}c
</pre>
 
=={{header|Elixir}}==
{{trans|Erlang}}
<langsyntaxhighlight lang="elixir">iex(1)> Regex.split(~r/==|!=|=/, "a!====b=!=c")
["a", "", "", "b", "", "c"]</langsyntaxhighlight>
 
=={{header|Erlang}}==
Line 562 ⟶ 785:
 
If we ignore the "Extra Credit" requirements and skip 'ordered separators' condition (i.e. solving absolute different task), this is exactly what one of the overloads of .NET's <code>String.Split</code> method does. Using F# Interactive:
<langsyntaxhighlight lang="fsharp">> "a!===b=!=c".Split([|"=="; "!="; "="|], System.StringSplitOptions.None);;
val it : string [] = [|"a"; ""; "b"; ""; "c"|]
 
> "a!===b=!=c".Split([|"="; "!="; "=="|], System.StringSplitOptions.None);;
val it : string [] = [|"a"; ""; ""; "b"; ""; "c"|]</langsyntaxhighlight>
 
<code>System.StringSplitOptions.None</code> specifies that empty strings should be included in the result.
 
=={{header|Factor}}==
<langsyntaxhighlight lang="factor">USING: arrays fry kernel make sequences ;
 
IN: rosetta-code.multisplit
 
: ?pairfirst-subseq ( ?seq xseparators -- {?,x}/fn separator )
tuck
over [ 2array ] [ 2drop f ] if ;
[ [ subseq-index ] dip 2array ] withd map-index sift-keys
 
[ drop f f ] [ [ first ] infimum-by first2 rot nth ] if-empty ;
: best-separator ( seq -- pos index )
dup [ first ] map infimum '[ first _ = ] find nip first2 ;
 
: first-subseq ( separators seq -- n separator )
dupd [ swap [ subseq-start ] dip ?pair ] curry map-index sift
[ drop f f ] [ best-separator rot nth ] if-empty ;
 
: multisplit ( string separators -- seq )
'[
[ _dup over_ first-subseq dup ] [
length -rot cut-slice swap[ , ] dip swap tail-slice
] while 2drop ,
] { } make ;</langsyntaxhighlight>
 
{{out}}
Line 599 ⟶ 817:
=={{header|FreeBASIC}}==
FreeBASIC does not have a built in 'split' function so we need to write one:
<langsyntaxhighlight lang="freebasic">' FB 1.05.0 Win64
 
Sub Split(s As String, sepList() As String, result() As String, removeEmpty As Boolean = False, showSepInfo As Boolean = False)
Line 672 ⟶ 890:
Print
Print "Press any key to quit"
Sleep</langsyntaxhighlight>
 
{{out}}
Line 693 ⟶ 911:
 
=={{header|Go}}==
<langsyntaxhighlight lang="go">package main
 
import (
Line 718 ⟶ 936:
func main() {
fmt.Printf("%q\n", ms("a!===b=!=c", []string{"==", "!=", "="}))
}</langsyntaxhighlight>
{{out}}
<pre>
Line 725 ⟶ 943:
 
=={{header|Haskell}}==
<langsyntaxhighlight Haskelllang="haskell">import Data.List
(isPrefixOf, stripPrefix, genericLength, intercalate)
intercalate,
isPrefixOf,
stripPrefix,
)
------------------------ MULTISPLIT ----------------------
 
trysplitmultisplit :: [String] -> [String] -> Maybe [(String, String, Int)]
trysplit smultisplit delims = go [] 0
where
go acc pos [] = [(acc, [], pos)]
go acc pos l@(s : sx) =
case trysplit delims l of
Nothing -> go (s : acc) (pos + 1) sx
Just (d, sxx) ->
(acc, d, pos) :
go [] (pos + genericLength d) sxx
 
trysplit :: [String] -> String -> Maybe (String, String)
trysplit delims s =
case filter (`isPrefixOf` s) delims of
[] -> Nothing
(d : _) -> Just (d, (\(Just x) -> x) $ stripPrefix d s)
 
multisplit :: String -> [String] -> [(String, String, Int)]
multisplit list delims =
let ms [] acc pos = [(acc, [], pos)]
ms l@(s:sx) acc pos =
case trysplit l delims of
Nothing -> ms sx (s : acc) (pos + 1)
Just (d, sxx) -> (acc, d, pos) : ms sxx [] (pos + genericLength d)
in ms list [] 0
 
--------------------------- TEST -------------------------
main :: IO ()
main = do
let parsed = multisplit "a!===b=!=c" ["==", "!=", "="] "a!===b=!=c"
mapM_
putStrLn
[ "split string:",
, intercalate "," $ map (\(a, _, _) -> a) parsed,
, "with [(string, delimiter, offset)]:",
, show parsed
]</langsyntaxhighlight>
{{out}}
<pre>split string:
Line 761 ⟶ 988:
Or as a fold:
 
<langsyntaxhighlight lang="haskell">import Data.List (find, isPrefixOf, foldl') --'
import Data.Bool (bool)
 
Line 778 ⟶ 1,005:
in reverse $ (ts, [], length s) : ps
main :: IO ()
main = print $ multiSplit ["==", "!=", "="] "a!===b=!=c"</langsyntaxhighlight>
{{Out}}
<pre>[("a","!=",1),("","==",3),("b","=",6),("","!=",7),("c","",10)]</pre>
 
=={{header|Icon}} and {{header|Unicon}}==
<langsyntaxhighlight Iconlang="icon">procedure main()
s := "a!===b=!=c"
# just list the tokens
Line 803 ⟶ 1,030:
procedure arb()
suspend .&subject[.&pos:&pos <- &pos to *&subject + 1]
end</langsyntaxhighlight>
 
{{out}}
Line 810 ⟶ 1,037:
 
=={{header|J}}==
<syntaxhighlight lang ="j">multisplit=: 4 :0{{
'sep begin sep'=. |: tbs=. y _,~/:~;(,.&.:(|."1)@;@(>i.@#@[) ,.L:0"0y I.@E.L:0) x NB.
endlen=. begin + sep { #@>y NB.
lastr=. next=i.3 0
rj=. 2k=. 0$0
while. nextj<#beginx do.
while. j>k{begin do. k=.k+1 end.
r=. r,.(last}.x{.~next{begin);next{t
'b s'=. k{bs NB. character index where separator appears, separator index
last=. next{end
if. _=b do. r,.(j}.x);'';'' return. end.
next=. 1 i.~(begin>next{begin)*.begin>:last
txt=. (j + i. b-j){x
j=. b+s{len
r=.r,.txt;(s{::y);b
end.
}}</syntaxhighlight>
r=. r,.'';~last}.x
)</lang>
 
Explanation:
 
First find all potentially relevant separator instances, and sort them in increasing order, by starting location and separator index. <code>sep</code> is separator index, and <code>begin</code> is starting location. <code>end</code> is ending location.
 
Then, loop through the possibilities, skipping over those separators which would overlap with previously used separators.
 
The result consists of twothree rows: The first row is the extracted substrings, the second rowand isthird rows are the "extra credit" part -- for each extracted substring, the numbers in the second row are the separator index for: the following separator (0 for the first separator, 1 for the second, ...), and the locationposition in the original string where the beginning of thethat separator appeared (which is the same as where the end of the extracted substring appeared)started. Note that the very last substring does not have a separator following it, so the extra credit part is blank for that substring.
 
Example use:
 
<langsyntaxhighlight lang="j"> S=: multisplit 'a!===b=';'!=c';'='
┌──┬──┬─┬──┬─┐
S multisplit '==';'!=';'='
│a │ │b│ │c│
┌───┬───┬───┬───┬─┐
├──┼──┼─┼──┼─┤
│a │ │b │ │c│
│!=│==│=│!=│ │
├───┼───┼───┼───┼─┤
├──┼──┼─┼──┼─┤
│1 1│0 3│2 6│1 7│ │
│1 │3 │6│7 │ │
└───┴───┴───┴───┴─┘
└──┴──┴─┴──┴─┘
S multisplit '=';'!=';'=='
┌──┬─┬─┬─┬──┬─┐
┌───┬───┬───┬───┬───┬─┐
│a │ │b │b│ │c│
├──┼─┼─┼─┼──┼─┤
├───┼───┼───┼───┼───┼─┤
│!=│=│=│=│!=│ │
│1 1│0 3│0 4│0 6│1 7│ │
├──┼─┼─┼─┼──┼─┤
└───┴───┴───┴───┴───┴─┘
│1 │3│4│6│7 │ │
└──┴─┴─┴─┴──┴─┘
'X123Y' multisplit '1';'12';'123';'23';'3'
┌─┬──┬─┐
┌───┬───┬─┐
│X │ │X│ │Y│
├─┼──┼─┤
├───┼───┼─┤
│0 1│3 2││1│23│
├─┼──┼─┤
└───┴───┴─┘</lang>
│1│2 │ │
└─┴──┴─┘</syntaxhighlight>
 
=={{header|Java}}==
<langsyntaxhighlight lang="java">import java.util.*;
 
public class MultiSplit {
Line 886 ⟶ 1,120:
return result;
}
}</langsyntaxhighlight>
 
<pre>Regex split:
Line 898 ⟶ 1,132:
Based on Ruby example.
{{libheader|Underscore.js}}
<langsyntaxhighlight JavaScriptlang="javascript">RegExp.escape = function(text) {
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}
Line 905 ⟶ 1,139:
var sep_regex = RegExp(_.map(seps, function(sep) { return RegExp.escape(sep); }).join('|'));
return string.split(sep_regex);
}</langsyntaxhighlight>
 
===ES6===
Line 911 ⟶ 1,145:
 
{{Trans|Haskell}} (Multisplit by fold example)
<langsyntaxhighlight lang="javascript">(() => {
 
/// Delimiter list -> String -> list of parts, delimiters, offsets
Line 1,029 ⟶ 1,263:
multiSplit(delims, strTest)
);
})();</langsyntaxhighlight>
{{Out}}
<pre>[
Line 1,067 ⟶ 1,301:
Both helper functions could be made inner functions of the main function, but are kept separate here for clarity.
 
<langsyntaxhighlight lang="jq"># peeloff(delims) either peels off a delimiter or
# a single character from the input string.
# The input should be a nonempty string, and delims should be
Line 1,113 ⟶ 1,347:
then .[0:length-1] + [ .[length-1] + $x ]
else . + [$x]
end ) ;</langsyntaxhighlight>
'''Examples'''
("a!===b=!=c",
Line 1,124 ⟶ 1,358:
=={{header|Julia}}==
From REPL:
<langsyntaxhighlight lang="julia">
julia> split(s, r"==|!=|=")
5-element Array{SubString{String},1}:
Line 1,132 ⟶ 1,366:
""
"c"
</syntaxhighlight>
</lang>
 
=={{header|Kotlin}}==
<langsyntaxhighlight lang="scala">// version 1.0.6
 
fun main(args: Array<String>) {
Line 1,165 ⟶ 1,399:
println("\nThe delimiters matched and the indices at which they occur are:")
println(matches)
}</langsyntaxhighlight>
 
{{out}}
Line 1,178 ⟶ 1,412:
=={{header|Lua}}==
The function I've written here is really excessive for this task but it has historically been hard to find example code for a good Lua split function on the Internet. This one behaves the same way as Julia's Base.split and I've included a comment describing its precise operation.
<langsyntaxhighlight Lualang="lua">--[[
Returns a table of substrings by splitting the given string on
occurrences of the given character delimiters, which may be specified
Line 1,230 ⟶ 1,464:
for k, v in pairs(multisplit) do
print(k, v)
end</langsyntaxhighlight>
{{Out}}
<pre>Key Value
Line 1,243 ⟶ 1,477:
Code from BBC BASIC with little changes to fit in M2000.
 
<syntaxhighlight lang="m2000 interpreter">
<lang M2000 Interpreter>
Module CheckIt {
DIM sep$()
Line 1,273 ⟶ 1,507:
}
CheckIt
</syntaxhighlight>
</lang>
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
Just use the built-in function "StringSplit":
<langsyntaxhighlight lang="mathematica">StringSplit["a!===b=!=c", {"==", "!=", "="}]</langsyntaxhighlight>
{{Out}}
<pre>{a,,b,,c}</pre>
 
=={{header|MiniScript}}==
<langsyntaxhighlight MiniScriptlang="miniscript">parseSep = function(s, pats)
result = []
startPos = 0
Line 1,300 ⟶ 1,534:
end function
 
print parseSep("a!===b=!=c", ["==", "!=", "="])</langsyntaxhighlight>
{{Out}}
<pre>["a", "{!=}", "", "{==}", "b", "{=}", "", "{!=}"]</pre>
 
=={{header|Nim}}==
<langsyntaxhighlight lang="nim">import strutils
iterator tokenize(text: string; sep: openArray[string]): tuple[token: string, isSep: bool] =
Line 1,323 ⟶ 1,557:
if isSep: stdout.write '{',token,'}'
else: stdout.write token
echo ""</langsyntaxhighlight>
 
{{out}}
Line 1,330 ⟶ 1,564:
=={{header|Perl}}==
 
<langsyntaxhighlight Perllang="perl">sub multisplit {
my ($sep, $string, %opt) = @_ ;
$sep = join '|', map quotemeta($_), @$sep;
Line 1,340 ⟶ 1,574:
print "\n";
print "'$_' " for multisplit ['==','!=','='], "a!===b=!=c", keep_separators => 1;
print "\n";</langsyntaxhighlight>
 
{{Out}}
Line 1,349 ⟶ 1,583:
 
=={{header|Phix}}==
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">multisplit</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">delims</span><span style="color: #0000FF;">)</span>
Line 1,373 ⟶ 1,607:
<span style="color: #000000;">multisplit</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"a!===b=!=c"</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">"=="</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"!="</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"="</span><span style="color: #0000FF;">})</span>
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 1,384 ⟶ 1,618:
 
=={{header|PicoLisp}}==
<langsyntaxhighlight PicoLisplang="picolisp">(de multisplit (Str Sep)
(setq Sep (mapcar chop Sep))
(make
Line 1,402 ⟶ 1,636:
 
(println (multisplit "a!===b=!=c" '("==" "!=" "=")))
(println (multisplit "a!===b=!=c" '("=" "!=" "==")))</langsyntaxhighlight>
{{out}}
<pre>("a" (1 "!=") NIL (3 "==") "b" (6 "=") NIL (7 "!=") "c")
Line 1,408 ⟶ 1,642:
 
=={{header|Pike}}==
<langsyntaxhighlight Pikelang="pike">string input = "a!===b=!=c";
array sep = ({"==", "!=", "=" });
 
Line 1,424 ⟶ 1,658:
 
result;
Result: ({"a", ({"!=", 1}), "", ({"==", 3}), "b", ({"=", 6}), "", ({"!=", 7}), "c"})</langsyntaxhighlight>
 
=={{header|PowerShell}}==
<syntaxhighlight lang="powershell">
<lang PowerShell>
$string = "a!===b=!=c"
$separators = [regex]"(==|!=|=)"
Line 1,439 ⟶ 1,673:
 
$matchInfo
</syntaxhighlight>
</lang>
{{Out}}
<pre>
Line 1,451 ⟶ 1,685:
=={{header|Prolog}}==
Works with SWI-Prolog.
<langsyntaxhighlight Prologlang="prolog">multisplit(_LSep, '') -->
{!},
[].
Line 1,491 ⟶ 1,725:
my_sort(<, (N, N1, _), (N, N2, _)) :-
N1 > N2.
</syntaxhighlight>
</lang>
{{out}}
<pre>?- multisplit(['==', '!=', '='], 'ax!===b=!=c', Lst, []).
Line 1,500 ⟶ 1,734:
===Procedural===
Using regular expressions:
<langsyntaxhighlight lang="python">>>> import re
>>> def ms2(txt="a!===b=!=c", sep=["==", "!=", "="]):
if not txt or not sep:
Line 1,514 ⟶ 1,748:
['a', (1, 1), '', (0, 3), 'b', (2, 6), '', (1, 7), 'c']
>>> ms2(txt="a!===b=!=c", sep=["=", "!=", "=="])
['a', (1, 1), '', (0, 3), '', (0, 4), 'b', (0, 6), '', (1, 7), 'c']</langsyntaxhighlight>
 
Not using regular expressions:
'''Inspired by C-version'''
<langsyntaxhighlight lang="python">def multisplit(text, sep):
lastmatch = i = 0
matches = []
Line 1,540 ⟶ 1,774:
>>> multisplit('a!===b=!=c', ['!=', '==', '='])
['a', (0, 1), (1, 3), 'b', (2, 6), (0, 7), 'c']
</syntaxhighlight>
</lang>
 
'''Alternative version'''
<langsyntaxhighlight lang="python">def min_pos(List):
return List.index(min(List))
 
Line 1,609 ⟶ 1,843:
S = "a!===b=!=c"
multisplit(S, ["==", "!=", "="]) # output: ['a', [1, 1], '', [0, 3], 'b', [2, 6], '', [1, 7], 'c']
multisplit(S, ["=", "!=", "=="]) # output: ['a', [1, 1], '', [0, 3], '', [0, 4], 'b', [0, 6], '', [1, 7], 'c']</langsyntaxhighlight>
 
===Functional===
In terms of a fold (reduce), without use of regular expressions:
{{Works with|Python|3.7}}
<langsyntaxhighlight lang="python">'''Multisplit'''
 
 
Line 1,623 ⟶ 1,857:
def multiSplit(separators):
'''List of triples:
[(token, separator, start index of separator in string)].
'''
def go(s):
Line 1,631 ⟶ 1,865:
inDelim = offset > i
return maybe(
(tokens if inDelim else c + tokens, parts, offset)
tokens if inDelim
else c + tokens, parts, offset
)
)(
lambda x: ('', [(tokens, x, i)] + parts, i + len(x))
'',
[(tokens, x, i)] + parts,
i + len(x)
)
)(
Nothing()None if inDelim else find(
s[i:].startswith
)(separators)
Line 1,644 ⟶ 1,885:
 
 
# -------------------------- TEST-- -------------------------
# main :: IO ()
def main():
Line 1,655 ⟶ 1,896:
 
 
# -------------------- GENERIC FUNCTIONS- -------------------
 
# Justfind :: (a -> MaybeBool) -> [a] -> (a | None)
def Just(x):
'''Constructor for an inhabited Maybe (option type) value.
Wrapper containing the result of a computation.
'''
return {'type': 'Maybe', 'Nothing': False, 'Just': x}
 
 
# Nothing :: Maybe a
def Nothing():
'''Constructor for an empty Maybe (option type) value.
Empty wrapper returned where a computation is not possible.
'''
return {'type': 'Maybe', 'Nothing': True}
 
 
# find :: (a -> Bool) -> [a] -> Maybe a
def find(p):
'''Just the first element in the list that matches p,
or NothingNone if no elements match.
'''
def go(xs):
for x in xstry:
return next(x for x in xs if p(x):)
except return Just(x)StopIteration:
return Nothing()None
return lambda xs: go(xs)
 
 
# maybe :: b -> (a -> b) -> Maybe (a | None) -> b
def maybe(v):
'''Either the default value v, if m is NothingNone,
or the application of f to x,.
where m is Just(x).
'''
return lambda f: lambda m: v if (
None is m or m.get('Nothing')
) else f(m.get('Just'))
 
 
# MAIN ---
if __name__ == '__main__':
main()</langsyntaxhighlight>
{{Out}}
<pre>[('a', '!=', 1), ('', '==', 3), ('b', '=', 6), ('', '!=', 7), ('c', '', 10)]</pre>
Line 1,705 ⟶ 1,929:
=={{header|Racket}}==
 
<langsyntaxhighlight lang="racket">
#lang racket
(regexp-match* #rx"==|!=|=" "a!===b=!=c" #:gap-select? #t #:match-select values)
;; => '("a" ("!=") "" ("==") "b" ("=") "" ("!=") "c")
</syntaxhighlight>
</lang>
 
=={{header|Raku}}==
(formerly Perl 6)
<syntaxhighlight lang="raku" line>sub multisplit($str, @seps) { $str.split: / ||@seps /, :v }
{{works with|Rakudo|2020.08.1}}
<lang perl6>sub multisplit($str, @seps) { $str.split(/ ||@seps /, :v) }
 
my @chunks = multisplit( 'a!===b=!=c==d', < == != = > );
 
# Print the strings.
Line 1,723 ⟶ 1,946:
# Print the positions of the separators.
for grep Match, @chunks -> $s {
say " {$s.fmt: '%2s'} from {$s.from().fmt: '%2d'} to {$s.to().fmt: '%2d'}";
}</langsyntaxhighlight>
{{out}}
<pre>("a", "!=", "", "==", "b", "=", "", "!=", "c", "==", "d")
!= from 1 to 3
== from 3 to 5
= from 6 to 7
!= from 7 to 9
== from 10 to 12</pre>
Using the array <tt>@seps</tt> in a pattern automatically does alternation.
By default this would do longest-term matching (that is, <tt>|</tt> semantics), but we can force it to do left-to-right matching by embedding the array in a short-circuit alternation (that is, <tt>||</tt> semantics).
Line 1,739 ⟶ 1,962:
 
=={{header|REXX}}==
<langsyntaxhighlight lang="rexx">/*REXX program splits a (character) string based on different separator delimiters.*/
parse arg $ /*obtain optional string from the C.L. */
if $='' then $= "a!===b=!=c" /*None specified? Then use the default*/
Line 1,764 ⟶ 1,987:
$=changestr(null, $, showNull) /* ··· showing of "null" chars. */
say 'new string:' $ /*now, display the new string to term. */
/*stick a fork in it, we're all done. */</langsyntaxhighlight>
Some older REXXes don't have a &nbsp; '''changestr''' &nbsp; BIF, so one is included here &nbsp; ──► &nbsp; [[CHANGESTR.REX]].
<br><br>'''output''' &nbsp; when using the default input:
Line 1,773 ⟶ 1,996:
 
=={{header|Ring}}==
<langsyntaxhighlight lang="ring">
# Project : Multisplit
 
Line 1,783 ⟶ 2,006:
see "" + n + ": " + substr(str, 1, pos-1) + " Sep By: " + sep[n] + nl
next
</syntaxhighlight>
</lang>
Output:
<pre>
Line 1,796 ⟶ 2,019:
The simple method, using a regular expression to split the text.
 
<langsyntaxhighlight lang="ruby">text = 'a!===b=!=c'
separators = ['==', '!=', '=']
 
Line 1,804 ⟶ 2,027:
 
p multisplit_simple(text, separators) # => ["a", "", "b", "", "c"]
</syntaxhighlight>
</lang>
 
The version that also returns the information about the separations.
 
<langsyntaxhighlight lang="ruby">def multisplit(text, separators)
sep_regex = Regexp.union(separators)
separator_info = []
Line 1,825 ⟶ 2,048:
 
p multisplit(text, separators)
# => [["a", "", "b", "", "c"], [["!=", 1], ["==", 3], ["=", 6], ["!=", 7]]]</langsyntaxhighlight>
 
Also demonstrating a method to rejoin the string given the separator information.
 
<langsyntaxhighlight lang="ruby">def multisplit_rejoin(info)
str = info[0].zip(info[1])[0..-2].inject("") {|str, (piece, (sep, idx))| str << piece << sep}
str << info[0].last
Line 1,835 ⟶ 2,058:
 
p multisplit_rejoin(multisplit(text, separators)) == text
# => true</langsyntaxhighlight>
 
=={{header|Run BASIC}}==
<langsyntaxhighlight lang="runbasic">str$ = "a!===b=!=c"
sep$ = "=== != =! b =!="
 
Line 1,846 ⟶ 2,069:
split$ = word$(str$,1,theSep$)
print i;" ";split$;" Sep By: ";theSep$
wend</langsyntaxhighlight>
{{out}}
<pre>1 a! Sep By: ===
Line 1,855 ⟶ 2,078:
 
=={{header|Scala}}==
<langsyntaxhighlight lang="scala">import scala.annotation.tailrec
def multiSplit(str:String, sep:Seq[String])={
def findSep(index:Int)=sep find (str startsWith (_, index))
Line 1,870 ⟶ 2,093:
}
 
println(multiSplit("a!===b=!=c", Seq("!=", "==", "=")))</langsyntaxhighlight>
{{out}}
<pre>List(a, , b, , c)</pre>
Line 1,876 ⟶ 2,099:
=={{header|Scheme}}==
{{works with|Gauche Scheme}}
<langsyntaxhighlight Schemelang="scheme">(use srfi-13)
(use srfi-42)
 
Line 1,890 ⟶ 2,113:
(define (glean shards)
(list-ec (: x (index i) shards)
(if (even? i)) x))</langsyntaxhighlight>
<b>Testing:</b>
<pre>
Line 1,903 ⟶ 2,126:
First approach, using line delimiters. Lines are delimited by an array of separator strings, normally [CRLF, LF, CR, lineSeparator(0x2028), paragraphSeparator(0x2029)]. Supplying an alternate set of delimiters lets us split a string by a different (ordered) set of strings:
 
<langsyntaxhighlight lang="sensetalk">set source to "a!===b=!=c"
set separators to ["==", "!=", "="]
 
put each line delimited by separators of source</langsyntaxhighlight>
Output:
<syntaxhighlight lang ="sensetalk">(a,,b,,c)</langsyntaxhighlight>
 
Second approach, using a pattern. SenseTalk's pattern language lets us define a pattern (a regex) which can then be used to split the string and also to display the actual separators that were found.
<langsyntaxhighlight lang="sensetalk">set source to "a!===b=!=c"
set separatorPattern to <"==" or "!=" or "=">
 
Line 1,917 ⟶ 2,140:
 
put each occurrence of separatorPattern in source
</syntaxhighlight>
</lang>
Output:
<langsyntaxhighlight lang="sensetalk">(a,,b,,c)
(!=,==,=,!=)</langsyntaxhighlight>
 
=={{header|Sidef}}==
<langsyntaxhighlight lang="ruby">func multisplit(sep, str, keep_sep=false) {
sep = sep.map{.escape}.join('|');
var re = Regex.new(keep_sep ? "(#{sep})" : sep);
Line 1,931 ⟶ 2,154:
[false, true].each { |bool|
say multisplit(%w(== != =), 'a!===b=!=c', keep_sep: bool);
}</langsyntaxhighlight>
{{out}}
<pre>
Line 1,944 ⟶ 2,167:
{{trans|Python}}
 
<langsyntaxhighlight lang="swift">extension String {
func multiSplit(on seps: [String]) -> ([Substring], [(String, (start: String.Index, end: String.Index))]) {
var matches = [Substring]()
Line 1,979 ⟶ 2,202:
let (matches, matchedSeps) = "a!===b=!=c".multiSplit(on: ["==", "!=", "="])
 
print(matches, matchedSeps.map({ $0.0 }))</langsyntaxhighlight>
 
 
Line 1,988 ⟶ 2,211:
=={{header|Tcl}}==
This simple version does not retain information about what the separators were:
<langsyntaxhighlight lang="tcl">proc simplemultisplit {text sep} {
set map {}; foreach s $sep {lappend map $s "\uffff"}
return [split [string map $map $text] "\uffff"]
}
puts [simplemultisplit "a!===b=!=c" {"==" "!=" "="}]</langsyntaxhighlight>
{{out}}
<pre>a {} b {} c</pre>
Line 1,999 ⟶ 2,222:
to the match information (because the two collections of information
are of different lengths).
<langsyntaxhighlight lang="tcl">proc multisplit {text sep} {
foreach s $sep {lappend sr [regsub -all {\W} $s {\\&}]}
set sepRE [join $sr "|"]
Line 2,012 ⟶ 2,235:
}
return [list [lappend pieces [string range $text $start end]] $match]
}</langsyntaxhighlight>
Demonstration code:
<langsyntaxhighlight lang="tcl">set input "a!===b=!=c"
set matchers {"==" "!=" "="}
lassign [multisplit $input $matchers] substrings matchinfo
puts $substrings
puts $matchinfo</langsyntaxhighlight>
{{out}}
<pre>
Line 2,035 ⟶ 2,258:
The <code>:gap 0</code> makes the horizontal collect repetitions strictly adjacent. This means that <code>coll</code> will quit when faced with a nonmatching suffix portion of the data rather than scan forward (no gap allowed!). This creates an opportunity for the <code>tail</code> variable to grab the suffix which remains, which may be an empty string.
 
<langsyntaxhighlight lang="txr">@(next :args)
@(coll :gap 0)@(choose :shortest tok)@\
@tok@{sep /==/}@\
Line 2,045 ⟶ 2,268:
@(output)
@(rep)"@tok" {@sep} @(end)"@tail"
@(end)</langsyntaxhighlight>
 
Runs:
Line 2,070 ⟶ 2,293:
{{trans|Racket}}
 
<langsyntaxhighlight lang="sh">$ txr -p '(tok-str "a!===b=!=c" #/==|!=|=/ t)'
("a" "!=" "" "==" "b" "=" "" "!=" "c")</langsyntaxhighlight>
 
Here the third boolean argument means "keep the material between the tokens", which in the Racket version seems to be requested by the argument <code>#:gap-select? #:t</code>.
Line 2,077 ⟶ 2,300:
=={{header|UNIX Shell}}==
{{works with|bash}}
<langsyntaxhighlight lang="bash">multisplit() {
local str=$1
shift
Line 2,105 ⟶ 2,328:
if [[ $original == $recreated ]]; then
echo "successfully able to recreate original string"
fi</langsyntaxhighlight>
 
{{out}}
Line 2,116 ⟶ 2,339:
 
=={{header|VBScript}}==
<syntaxhighlight lang="vb">
<lang vb>
Function multisplit(s,sep)
arr_sep = Split(sep,"|")
Line 2,143 ⟶ 2,366:
WScript.StdOut.WriteLine
WScript.StdOut.Write "Extra Credit: " & multisplit_extra("a!===b=!=c","!=|==|=")
WScript.StdOut.WriteLine</langsyntaxhighlight>
{{out}}
<pre>
Line 2,150 ⟶ 2,373:
</pre>
 
=={{header|V (Vlang)}}==
Without using additional libraries or regular expressions:
<syntaxhighlight lang="v (vlang)">fn main() {
str := "a!===b=!=c"
sep := ["==","!=","="]
Line 2,158 ⟶ 2,381:
}
 
fn ms(txt string, sep []string) (map[int]string, []string, []string) {
 
mut ans, mut extra := []string{}, []string{}
mut place := map[int]string{}
Line 2,193 ⟶ 2,415:
println('Extra: $extra')
return place, ans, extra
}</langsyntaxhighlight>
{{out}}
<pre>
Line 2,205 ⟶ 2,427:
{{libheader|Wren-pattern}}
{{libheader|Wren-fmt}}
<langsyntaxhighlight ecmascriptlang="wren">import "./pattern" for Pattern
import "./fmt" for Fmt
 
var input = "a!===b=!=c"
Line 2,217 ⟶ 2,439:
var parts = p.splitAll(input)
System.print("\nThe substrings between the separators are:")
System.print(parts.map { |p| (p != "") ? Fmt.q(p) : "empty string" }.toList)</langsyntaxhighlight>
 
{{out}}
Line 2,230 ⟶ 2,452:
["a", empty string, "b", empty string, "c"]
</pre>
 
=={{header|XPL0}}==
<syntaxhighlight lang "XPL0">include xpllib; \for StrLen, StrNCmp, and Print
 
proc MultiSplit(Str, Seps, N);
char Str; int Seps, N;
int S, Ch, SepLen;
[while Str(0) # 0 do
[for S:= 0 to N-1 do
[SepLen:= StrLen(Seps(S));
if StrNCmp(Str, Seps(S), SepLen) = 0 then
[Print(" (%s) ", Seps(S));
Str:= Str + SepLen;
S:= 100;
];
];
if S < 100 then
[Ch:= Str(0); Str:= Str+1;
if Ch # 0 then ChOut(0, Ch);
];
];
];
 
MultiSplit("a!===b=!=c", ["==", "!=", "="], 3)</syntaxhighlight>
{{out}}
<pre>
a (!=) (==) b (=) (!=) c</pre>
 
=={{header|Yabasic}}==
<langsyntaxhighlight Yabasiclang="yabasic">t$ = "a!===b=!=c"
s$ = "==,!=,="
 
Line 2,250 ⟶ 2,499:
print left$(t$, l - 1), " with separator ", n$(j)
t$ = right$(t$, len(t$) - (l + len(n$(j))) + 1)
loop</langsyntaxhighlight>
 
=={{header|zkl}}==
{{trans|Python}}
<langsyntaxhighlight lang="zkl">fcn multisplit(text, sep){
lastmatch := i := 0; matches := List();
while(i < text.len()){
Line 2,270 ⟶ 2,519:
if(i > lastmatch) matches.append(text[lastmatch,i-lastmatch]);
return(matches);
}</langsyntaxhighlight>
<langsyntaxhighlight lang="zkl">multisplit("a!===b=!=c", T("==", "!=", "=")).println();
multisplit("a!===b=!=c", T("!=", "==", "=")).println();</langsyntaxhighlight>
{{out}}
<pre>
2,054

edits