Natural sorting: Difference between revisions

m
→‎{{header|AppleScript}}: New sort handler URL, tidy-up.
m (→‎{{header|AppleScript}}: New sort handler URL, tidy-up.)
Line 242:
 
=={{header|AppleScript}}==
 
AppleScript doesn't have a built-in sort facility, but its string comparisons are normalised and several attributes can be specifically either considered or ignored, making it fairly simple for a sort coded in the language to produce the results required here.
 
<syntaxhighlight lang="applescript">use AppleScript version "2.4" -- OS X 10.10 (Yosemite) or later
use framework "Foundation"
use sorter : script ¬
use sorter : script "Custom Iterative Ternary Merge Sort" -- <https://macscripter.net/viewtopic.php?pid=194430#p194430>
"Custom Iterative Ternary Merge Sort" -- <www.macscripter.net/t/timsort-and-nigsort/71383/3>
 
on naturalSort(listOfText)
-- ProduceGet versionsdoctored copies of the strings forin sorting purposes, doctoredorder to get roundaround situations that
-- the situations AppleScript's text comparison attributes don't handle naturally. ie. Reduce any
-- run of white space to a single character, zap any leading/trailing space, move
-- any article word at the beginning to the end, and replace any "ſ" or "ʒ" with "s".
script o
property inputdoctored : listOfText's items
property doctored : {}
end script
set regex to current application's NSRegularExpressionSearch
set substitutions to {{"\\s++", space}, {"^ | $", ""}, ¬
repeat with i from 1 to (count listOfText)
{"^(?i)(The|An?) (.++)$", "$2 $1"}, {"[\\u0292\\u017f]", "s"}}
set thisString to (current application's class "NSMutableString"'s stringWithString:(item i of o's input))
repeat with i from 1 to (count o's doctored)
-- AppleScript's 'ignoring white space' setting ignores ALL white space. So, since the existence of
set mutableString to (current application's class "NSMutableString"'s ¬
-- white space between words has to be considered, physically remove or alter any to be ignored.
stringWithString:(o's doctored's item i))
-- Firstly reduce runs of any type of white space to single 'space' characters.
repeat with thisSub in substitutions
tell thisString to replaceOccurrencesOfString:("\\s++") withString:(space) options:(regex) range:({0, its |length|()})
-- Then remove any leadingset and/or{searchStr, trailingreplacement} spaces.to thisSub
tell mutableString to replaceOccurrencesOfString:(searchStr) ¬
tell thisString to replaceOccurrencesOfString:("^ | $") withString:("") options:(regex) range:({0, its |length|()})
withString:(replacement) options:(regex) range:({0, its |length|()})
-- Move any instance of "The ", "A ", or "An " at the front of a string to the end assuming the string to be a title.
end repeat
-- This allows the article to act as a tie-breaker if necessary.
set o's doctored's item i to mutableString as text
tell thisString to replaceOccurrencesOfString:("^(?i)(The|An?) (.++)$") withString:("$2 $1") options:(regex) ¬
range:({0, its |length|()})
-- For the sake of this task, replace any instances of "ſ" or "ʒ" with "s".
tell thisString to replaceOccurrencesOfString:("[\\u0292\\u017f]") withString:("s") options:(regex) ¬
range:({0, its |length|()})
set end of o's doctored to thisString as text
end repeat
-- Sort the doctored strings with the relevant AppleScript comparison attributes
-- Set AppleScript's string comparison attributes for the sort.
-- explicitly either set or not, echoing the moves in the original list.
-- Ligatures are always compared by their component characters and AppleScript has no setting to change this.
-- 'Numeric strings' are runs of digit characters only.
-- The white space, hyphens, and case settings here are the defaults,
-- but are set explicitly in case this handler's called from a different setting.
considering numeric strings, white space and hyphens but ignoring diacriticals, punctuation and case
-- Sort items 1 thru -1 of the doctored strings, rearranging the original list in parallel.
tell sorter to sort(o's doctored, 1, -1, {slave:{listOfText}})
end considering
Line 288 ⟶ 281:
end naturalSort
 
on join(lst, delim)
(* Tests: *)
set astid to AppleScript's text item delimiters
-- Leading, trailing, and multiple white spaces ignored:
set AppleScript's text item delimiters to delim
naturalSort({" ignore superfluous spaces: 1-3", "ignore superfluous spaces: 1-1", " ignore superfluous spaces: 1-2", ¬
set txt to lst as text
" ignore superfluous spaces: 1-4", "ignore superfluous spaces: 1-7", "ignore superfluous spaces: 1-5 ", ¬
set AppleScript's text item delimiters to astid
"ignore superfluous spaces: 1-6", " ignore superfluous spaces: 1-8"})
return txt
--> {"ignore superfluous spaces: 1-1", " ignore superfluous spaces: 1-2", " ignore superfluous spaces: 1-3", " ignore superfluous spaces: 1-4", "ignore superfluous spaces: 1-5 ", "ignore superfluous spaces: 1-6", "ignore superfluous spaces: 1-7", " ignore superfluous spaces: 1-8"}
end join
 
on tests()
set output to {"(* Leading, trailing, and multiple white spaces ignored *)"}
set output's end to ¬
naturalSort({" ignore superfluous spaces: 1-3", "ignore superfluous spaces: 1-1", ¬
" ignore superfluous spaces: 1-2", " ignore superfluous spaces: 1-4", ¬
"ignore superfluous spaces: 1-7", "ignore superfluous spaces: 1-5 ", ¬
"ignore superfluous spaces: 1-6", " ignore superfluous spaces: 1-8"})
set output's end to linefeed & "(* All white space characters treated as equivalent *)"
set output's end to naturalSort({"Equiv. spaces: 2-6", "Equiv." & return & "spaces: 2-5", ¬
"Equiv." & (character id 12) & "spaces: 2-4", ¬
"Equiv." & (character id 11) & "spaces: 2-3", ¬
"Equiv." & linefeed & "spaces: 2-2", "Equiv." & tab & "spaces: 2-1"})
set output's end to linefeed & ¬
"(* Case ignored. (The sort order would actually be the same with case considered,
since case only decides the issue when the strings are otherwise identical.) *)"
set output's end to naturalSort({"cASE INDEPENDENT: 3-1", "caSE INDEPENDENT: 3-2", ¬
"CASE independent: 3-3", "casE INDEPENDENT: 3-4", "case INDEPENDENT: 3-5"})
set output's end to linefeed & "(* Numerics considered by number value *)"
set output's end to naturalSort({"foo1000bar99baz10.txt", "foo100bar99baz0.txt", ¬
"foo100bar10baz0.txt", "foo1000bar99baz9.txt"})
set output's end to linefeed & "(* Title sort *)"
set output's end to ¬
naturalSort({"The Wind in the Willows", "The 40th Step More", ¬
"A Matter of Life and Death", "The 39 steps", ¬
"An Inspector Calls", "Wanda"})
set output's end to linefeed & "(* Diacriticals (and case) ignored *)"
set output's end to naturalSort({"Equiv. " & (character id 253) & " accents: 6-1", ¬
"Equiv. " & (character id 221) & " accents: 6-3", ¬
"Equiv. y accents: 6-4", "Equiv. Y accents: 6-2"})
set output's end to linefeed & "(* Ligatures *)"
set output's end to naturalSort({(character id 306) & " ligatured", ¬
"of", "ij no ligature", (character id 339), "od"})
set output's end to linefeed & ¬
"(* Custom \"s\" equivalents and Esszet (NB. Esszet normalises to \"ss\") *)"
set output's end to naturalSort({"Start with an " & (character id 658) & ": 8-1", ¬
"Start with an " & (character id 383) & ": 8-2", ¬
"Start with an " & (character id 223) & ": 8-3", ¬
"Start with an s: 8-4", "Start with an ss: 8-5"})
return join(output, linefeed)
end tests
 
tests()</syntaxhighlight>
 
{{output}}
<syntaxhighlight lang="applescript">"(* Leading, trailing, and multiple white spaces ignored *)
ignore superfluous spaces: 1-1
ignore superfluous spaces: 1-2
ignore superfluous spaces: 1-3
ignore superfluous spaces: 1-4
ignore superfluous spaces: 1-5
ignore superfluous spaces: 1-6
ignore superfluous spaces: 1-7
ignore superfluous spaces: 1-8
 
--(* All white space characters treated as equivalent: *)
Equiv. spaces: 2-1
naturalSort({"Equiv. spaces: 2-6", "Equiv." & return & "spaces: 2-5", "Equiv." & (character id 12) & "spaces: 2-4", ¬
Equiv.
"Equiv." & (character id 11) & "spaces: 2-3", "Equiv." & linefeed & "spaces: 2-2", "Equiv." & tab & "spaces: 2-1"})
spaces: 2-2
(* -->
{"Equiv. spaces�spaces: 2-1", "Equiv.3
spaces: 2-2", "Equiv.�spaces: 2-3", "Equiv.�spaces: 2-4", "Equiv.
Equiv.
spaces: 2-5", "Equiv. spaces: 2-6"}
spaces: 2-5
*)
Equiv. spaces: 2-6
 
--(* Case ignored. (The sort order would actually be the same with case considered,
-- because since case only decides the issue when strings are otherwise identical.) *)
cASE INDEPENDENT: 3-1
naturalSort({"cASE INDEPENDENT: 3-1", "caSE INDEPENDENT: 3-2", "CASE independent: 3-3", "casE INDEPENDENT: 3-4", ¬
"casecaSE INDEPENDENT: 3-5"})2
CASE independent: 3-3
--> {"cASE INDEPENDENT: 3-1", "caSE INDEPENDENT: 3-2", "CASE independent: 3-3", "casE INDEPENDENT: 3-4", "case INDEPENDENT: 3-5"}
casE INDEPENDENT: 3-4
case INDEPENDENT: 3-5
 
--(* Numerics considered by number value: *)
foo100bar10baz0.txt
naturalSort({"foo1000bar99baz10.txt", "foo100bar99baz0.txt", "foo100bar10baz0.txt", "foo1000bar99baz9.txt"})
foo100bar99baz0.txt
--> {"foo100bar10baz0.txt", "foo100bar99baz0.txt", "foo1000bar99baz9.txt", "foo1000bar99baz10.txt"}
foo1000bar99baz9.txt
foo1000bar99baz10.txt
 
--(* Title sort: *)
The 39 steps
naturalSort({"The Wind in the Willows", "The 40th Step More", "A Matter of Life and Death", "The 39 steps", ¬
The 40th Step More
"An Inspector Calls", "Wanda"})
An Inspector Calls
--> {"The 39 steps", "The 40th Step More", "An Inspector Calls", "A Matter of Life and Death", "Wanda", "The Wind in the Willows"}
A Matter of Life and Death
Wanda
The Wind in the Willows
 
-->(* Diacriticals (and case) ignored: *)
Equiv. ý accents: 6-1
naturalSort({"Equiv. " & (character id 253) & " accents: 6-1", "Equiv. " & (character id 221) & " accents: 6-3", ¬
"Equiv. y accents: 6-4", "Equiv. Y accents: 6-2"})
--> {"Equiv. ý accents: 6-1", "Equiv. Y accents: 6-2", "Equiv. Ý accents: 6-3", "Equiv. y accents: 6-4"}
Equiv. y accents: 6-4
 
--(* Ligatures: *)
IJ ligatured
naturalSort({(character id 306) & " ligatured", "of", "ij no ligature", (character id 339), "od"})
--> {"IJ ligatured", "ij no ligature", "od", "œ", "of"}
od
œ
of
 
--(* Custom \"s\" equivalents and Esszet (NB. Esszet normalises to \"ss):\") *)
Start with an ʒ: 8-1
naturalSort({"Start with an " & (character id 658) & ": 8-1", "Start with an " & (character id 383) & ": 8-2", ¬
Start with an ſ: 8-2
"Start with an " & (character id 223) & ": 8-3", "Start with an s: 8-4", "Start with an ss: 8-5"})
Start with an s: 8-4
--> {"Start with an ʒ: 8-1", "Start with an ſ: 8-2", "Start with an s: 8-4", "Start with an ß: 8-3", "Start with an ss: 8-5"}</syntaxhighlight>
Start with an ß: 8-3
Start with an ss: 8-5"</syntaxhighlight>
 
=={{header|ATS}}==
557

edits