Natural sorting: Difference between revisions

Line 2,203:
]</lang>
</div>
 
 
=={{header|Julia}}==
The functional programming principle used was to customize the "lt" comparison option of Julia's basic sort() to the "natural" sort features required.
<lang julia>#1
natural1(x, y) = strip(x) < strip(y)
 
#2
natural2(x, y) = replace(x, r"\s+" => " ") < replace(y, r"\s+" => " ")
 
#3
natural3(x, y) = lowercase(x) < lowercase(y)
 
#4
splitbynum(x) = split(x, r"(?<=\D)(?=\d)|(?<=\d)(?=\D)")
numstringtonum(arr) = [(n = tryparse(Float32, e)) != nothing ? n : e for e in arr]
function natural4(x, y)
xarr = numstringtonum(splitbynum(x))
yarr = numstringtonum(splitbynum(y))
for i in 1:min(length(xarr), length(yarr))
if typeof(xarr[i]) != typeof(yarr[i])
a = string(xarr[i]); b = string(yarr[i])
else
a = xarr[i]; b = yarr[i]
end
if a == b
continue
else
return a < b
end
end
return length(xarr) < length(yarr)
end
 
#5
deart(x) = replace(x, r"^[T|t]he\s+|^[A|a]n\s+|^[A|a]\s+" => "")
natural5(x, y) = deart(x) < deart(y)
 
#6
const accentdict = Dict(
'À'=> 'A', 'Á'=> 'A', 'Â'=> 'A', 'Ã'=> 'A', 'Ä'=> 'A',
'Å'=> 'A', 'Æ'=> 'A', 'Ç'=> 'C', 'È'=> 'E', 'É'=> 'E',
'Ê'=> 'E', 'Ë'=> 'E', 'Ì'=> 'I', 'Í'=> 'I', 'Î'=> 'I',
'Ï'=> 'I', 'Ð'=> 'D', 'Ñ'=> 'N', 'Ò'=> 'O', 'Ó'=> 'O',
'Ô'=> 'O', 'Õ'=> 'O', 'Ö'=> 'O', 'Ù'=> 'U', 'Ú'=> 'U',
'Û'=> 'U', 'Ü'=> 'U', 'Ý'=> 'Y', 'à'=> 'a', 'á'=> 'a',
'â'=> 'a', 'ã'=> 'a', 'ä'=> 'a', 'å'=> 'a', 'è'=> 'e',
'é'=> 'e', 'ê'=> 'e', 'ë'=> 'e', 'ì'=> 'i', 'í'=> 'i',
'î'=> 'i', 'ï'=> 'i', 'ð'=> 'd', 'ñ'=> 'n', 'ò'=> 'o',
'ó'=> 'o', 'ô'=> 'o', 'õ'=> 'o', 'ö'=> 'o', 'ù'=> 'u',
'ú'=> 'u', 'û'=> 'u', 'ü'=> 'u', 'ý'=> 'y', 'ÿ'=> 'y')
function tr(str, dict=accentdict)
for (i, ch) in enumerate(str)
if haskey(dict, ch)
arr = split(str, "")
arr[i] = string(dict[ch])
str = join(arr)
end
end
str
end
 
deaccent(x) = tr(s)
natural6(x, y) = tr(x) < tr(y)
 
#7
const ligaturedict = Dict(
'œ' => "oe", 'Œ' => "OE", 'æ' => "ae", 'Æ' => "AE", 'IJ' => "IJ"
)
expandlig(x) = tr(x, ligaturedict)
natural7(x, y) = expandlig(x) < expandlig(y)
 
#8
const altsdict = Dict('ß' => "ss", 'ſ' => 's', 'ʒ' => 's')
altstos(x) = tr(x, altsdict)
natural8(x, y) = altstos(x) < altstos(y)
 
preprocessors = [natural1, natural2, natural2, natural3, natural4, natural5, natural6, natural7, natural8]
 
const testarrays = Vector{Vector{String}}([
["ignore leading spaces: 2-2", " ignore leading spaces: 2-1", " ignore leading spaces: 2+0", " ignore leading spaces: 2+1"],
["ignore m.a.s spaces: 2-2", "ignore m.a.s spaces: 2-1", "ignore m.a.s spaces: 2+0", "ignore m.a.s spaces: 2+1"],
["Equiv. spaces: 3-3", "Equiv.\rspaces: 3-2", "Equiv.\x0cspaces: 3-1", "Equiv.\x0bspaces: 3+0", "Equiv.\nspaces: 3+1", "Equiv.\tspaces: 3+2"],
["cASE INDEPENENT: 3-2", "caSE INDEPENENT: 3-1", "casE INDEPENENT: 3+0", "case INDEPENENT: 3+1"],
["foo100bar99baz0.txt", "foo100bar10baz0.txt", "foo1000bar99baz10.txt", "foo1000bar99baz9.txt"],
["The Wind in the Willows", "The 40th step more", "The 39 steps", "Wanda"],
["Equiv. ý accents: 2-2", "Equiv. Ý accents: 2-1", "Equiv. y accents: 2+0", "Equiv. Y accents: 2+1"],
["IJ ligatured ij", "no ligature"],
["Start with an ʒ: 2-2", "Start with an ſ: 2-1", "Start with an ß: 2+0", "Start with an s: 2+1"]])
 
for (i, ltfunction) in enumerate(preprocessors)
println("Testing sorting mod number $i. Sorted is: $(sort(testarrays[i], lt=ltfunction)).")
end
</lang>{{output}}<pre>
Testing sorting mod number 1. Sorted is: [" ignore leading spaces: 2+0", " ignore leading spaces: 2+1", " ignore leading spaces: 2-1", "ignore leading spaces: 2-2"].
Testing sorting mod number 2. Sorted is: ["ignore m.a.s spaces: 2+0", "ignore m.a.s spaces: 2+1", "ignore m.a.s spaces: 2-1", "ignore m.a.s spaces: 2-2"].
Testing sorting mod number 3. Sorted is: ["Equiv.\vspaces: 3+0", "Equiv.\nspaces: 3+1", "Equiv.\tspaces: 3+2", "Equiv.\fspaces: 3-1", "Equiv.\rspaces: 3-2", "Equiv. spaces: 3-3"].
Testing sorting mod number 4. Sorted is: ["casE INDEPENENT: 3+0", "case INDEPENENT: 3+1", "caSE INDEPENENT: 3-1", "cASE INDEPENENT: 3-2"].
Testing sorting mod number 5. Sorted is: ["foo100bar10baz0.txt", "foo100bar99baz0.txt", "foo1000bar99baz9.txt", "foo1000bar99baz10.txt"].
Testing sorting mod number 6. Sorted is: ["The 39 steps", "The 40th step more", "Wanda", "The Wind in the Willows"].
Testing sorting mod number 7. Sorted is: ["Equiv. Y accents: 2+1", "Equiv. Ý accents: 2-1", "Equiv. y accents: 2+0", "Equiv. ý accents: 2- 2"].
Testing sorting mod number 8. Sorted is: ["IJ ligatured ij", "no ligature"].
Testing sorting mod number 9. Sorted is: ["Start with an s: 2+1", "Start with an ſ: 2-1", "Start with an ʒ: 2-2", "Start with an ß: 2+0"].
</pre>
 
=={{header|Kotlin}}==
4,102

edits