Category talk:Wren-str

Source code

/* Module "str.wren" */

/*
   Char contains routines to perform various operations on characters.
   A 'character' for this purpose is a single Unicode codepoint.
   Categorization and casing is supported for characters < 256 (Latin-1) but no higher.
   The 'symbol' category includes 'other letter', 'other number' and soft hyphen (ªº¹²³¼½¾¯).
   For convenience a string containing more than one character can be passed
   as an argument but the methods will only operate on the first character.
*/
class Char {
    // Returns the codepoint of the first character of a string.
    static code(c) { (c is String && !c.isEmpty) ? c.codePoints[0] :
                      Fiber.abort("Argument must be a non-empty string.") }

    // Convenience method to return a character from its codepoint.
    static fromCode(c) { String.fromCodePoint(c) }

    // Checks if the first character of a string falls into a particular category.
    static isAscii(c)       { code(c) < 128 }
    static isLatin1(c)      { code(c) < 256 }

    // ASCII categories.
    static isDigit(c)         { (c = code(c)) && c >= 48 && c <= 57 }
    static isAsciiLower(c)    { (c = code(c)) && c >= 97 && c <= 122 }
    static isAsciiUpper(c)    { (c = code(c)) && c >= 65 && c <= 90 }
    static isAsciiLetter(c)   { isAsciiLower(c) || isAsciiUpper(c) }
    static isAsciiAlphaNum(c) { isAsciiLower(c) || isAsciiUpper(c) || isDigit(c) }
    static isSpace(c)         { (c = code(c)) && (c == 32 || c == 9 || c == 10 || c == 13) }

    // Latin-1 categories.
    static isLower(c) {
        var d = code(c)
        return (d >= 97 && d <= 122) || (d == 181) || (d >= 223 && d <= 246) ||
               (d >= 248 && d <= 255)
    }

    static isUpper(c) {
        var d = code(c)
        return (d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)
    }

    static isLetter(c)       { isLower(c) || isUpper(c) }
    static isAlphaNumeric(c) { isLower(c) || isUpper(c) || isDigit(c) }

    static isControl(c) {
        var d = code(c)
        return d < 32 || (d >= 127 && d < 160)
    }

    static isPrintable(c) {
        var d = code(c)
        return (d >= 32 && d < 127) || (d >= 160 && d < 256)
    }

    static isGraphic(c) {
        var d = code(c)
        return (d >= 33 && d < 127) || (d >= 161 && d < 256)
    }

    static isWhitespace(c) {
        var d = code(c)
        return d == 32 || (d >= 9 && d <= 13) || d == 160
    }
    
    static isPunctuation(c) { code(c) && "!\"#\%&'()*,-./:;?@[\\]_{}¡§«¶·»¿".contains(c[0]) }

    static isSymbol(c) { isGraphic(c) && !isAlpaNumeric(c) && !isPunctuation(c) }

    static category(c) {
        var d = code(c)
        return (d  <  32)             ? "control"     :
               (d ==  32)             ? "space"       :
               (d >=  48 && d <= 57)  ? "digit"       :
               (d >=  65 && d <= 90)  ? "upper"       :
               (d >=  97 && d <= 122) ? "lower"       :
               (d >= 127 && d <= 159) ? "control"     :
               (d == 160)             ? "space"       :
               (d == 181)             ? "lower"       :
               (d >= 192 && d <= 214) ? "upper"       :
               (d >= 216 && d <= 222) ? "upper"       :
               (d >= 223 && d <= 246) ? "lower"       :
               (d >= 248 && d <= 255) ? "lower"       :
               (d >= 256)             ? "non-latin1"  :
               isPunctuation(c)       ? "punctuation" : "symbol"
    }

    // Returns the first character of a string converted to lower case.
    static lower(c) {
        var d = code(c)
        if ((d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)) {
            return fromCode(d+32)
        }
        return c[0]
    }

    // Returns the first character of a string converted to upper case.
    static upper(c) {
        var d = code(c)
        if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) {
            return fromCode(d-32)
        }
        return c[0]
    }

    // Swaps the case of the first character in a string.
    static swapCase(c) {
        var d = code(c)
        if ((d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)) {
            return fromCode(d+32)
        }
        if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) {
            return fromCode(d-32)
        }
        return c[0]
    }
}

/* Str supplements the String class with various other operations on strings. */
class Str {
    // Mimics the comparison operators <, <=, >, >=
    // not supported by the String class.
    static lt(s1, s2) { compare(s1, s2) <  0 }
    static le(s1, s2) { compare(s1, s2) <= 0 }
    static gt(s1, s2) { compare(s1, s2) >  0 }
    static ge(s1, s2) { compare(s1, s2) >= 0 }

    // Compares two strings lexicographically by codepoint.
    // Returns -1, 0 or +1 depending on whether
    // s1 < s2, s1 == s2 or s1 > s2 respectively.
    static compare(s1, s2)  {
        if (s1 == s2) return 0
        var cp1 = s1.codePoints.toList
        var cp2 = s2.codePoints.toList
        var len = (cp1.count <= cp2.count) ? cp1.count : cp2.count
        for (i in 0...len) {
            if (cp1[i] < cp2[i]) return -1
            if (cp1[i] > cp2[i]) return 1
        }
        return (cp1.count < cp2.count) ? -1 : 1
    }

    // Checks if a string falls into a particular category.
    static allAscii(s)         { s != "" && s.codePoints.all { |c| c < 128             } }
    static allLatin1(s)        { s != "" && s.codePoints.all { |c| c < 256             } }
    static allDigits(s)        { s != "" && s.codePoints.all { |c| c >= 48 && c <= 57  } }
    static allAsciiLower(s)    { s != "" && s.codePoints.all { |c| c >= 97 && c <= 122 } }
    static allAsciiUpper(s)    { s != "" && s.codePoints.all { |c| c >= 65 && c <= 90  } }
    static allAsciiLetters(s)  { s != "" && s.toList.all { |c| Char.isAsciiLetter(c)   } }
    static allAsciiAlphaNum(s) { s != "" && s.toList.all { |c| Char.isAsciiAlphaNum(c) } }
    static allSpace(s)         { s != "" && s.toList.all { |c| Char.isSpace(c)         } }
    static allLower(s)         { s != "" && s.toList.all { |c| Char.isLower(c)         } }
    static allUpper(s)         { s != "" && s.toList.all { |c| Char.isUpper(c)         } }
    static allLetters(s)       { s != "" && s.toList.all { |c| Char.isLetter(c)        } }
    static allAlphaNumeric(s)  { s != "" && s.toList.all { |c| Char.isAlphanumeric(c)  } }
    static allPrintable(s)     { s != "" && s.toList.all { |c| Char.isPrintable(c)     } }
    static allGraphic(s)       { s != "" && s.toList.all { |c| Char.isGraphic(c)       } }
    static allWhitespace(s)    { s != "" && s.toList.all { |c| Char.isWhitespace(c)    } }

    // Checks whether a string can be parsed to a number, an integer or a non-integer (float).
    static isNumeric(s)  { Num.fromString(s)                  }
    static isIntegral(s) { (s = isNumeric(s)) && s.isInteger  }
    static isFloat(s)    { (s = isNumeric(s)) && !s.isInteger }

    // Converts a string to lower case.
    static lower(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var chars = s.toList
        var count = chars.count
        var i = 0
        for (c in s.codePoints) {
            if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {
                chars[i] = String.fromCodePoint(c + 32)
            }
            i = i + 1
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Converts a string to upper case.
    static upper(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var chars = s.toList
        var count = chars.count
        var i = 0
        for (c in s.codePoints) {
            if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {
                chars[i] = String.fromCodePoint(c - 32)
            }
            i = i + 1
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Swaps the case of each character in a string.
    static swapCase(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var chars = s.toList
        var count = chars.count
        var i = 0
        for (c in s.codePoints) {
            if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {
                chars[i] = String.fromCodePoint(c + 32)
            } else if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) ||
                       (c >= 248 && c <= 254)) {
                chars[i] = String.fromCodePoint(c - 32)
            }
            i = i + 1
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Capitalizes the first character of a string.
    static capitalize(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var start = (s.startsWith("[") && s.count > 1) ? 1 : 0
        var c = s[start].codePoints[0]
        if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {
            var cs = String.fromCodePoint(c - 32) + s[start+1..-1]
            if (start == 1) cs = "[" + cs
            return cs
        }
        return s
    }

    // Capitalizes the first character of each word of a string.
    static title(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var words = s.split(" ")
        return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
    }

    // Removes accents and cedillas from all Latin-1 supplement characters in a string
    // and also expands digraphs before returning the result.
    static unaccent(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var accented = [
            "àáâãäå", "ÀÁÂÃÄÅ", "ç", "Ç", "ð", "Ð", "èéêë", "ÈÉÊË", "ìíîï", "ÌÍÎÏ",
            "ñ", "Ñ", "òóôõöø", "ÒÓÔÕÖØ", "ùúûü", "ÙÚÛÜ", "ýÿ", "Ý"
        ]
        var unaccented = "aAcCdDeEiInNoOuUyY"
        var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss" }
        var r = ""
        var chars = s.toList
        var count = chars.count
        var i = 0
        for (c in s.codePoints) {
            if (c >= 0xc0 && c <= 0xff) {
                var found = false
                for (j in 0...accented.count) {
                    if (accented[j].indexOf(chars[i]) >= 0) {
                        chars[i] = unaccented[j]
                        found = true
                        break
                    }
                }
                if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
            }
            i = i + 1
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Reverses the characters (not necessarily single bytes) of a string.
    static reverse(s) {
        if (!(s is String)) s = "%(s)"
        return (s != "") ? s[-1..0] : s
    }

    // Performs a circular shift of the characters of 's' 'n' places to the left.
    // If 'n' is negative performs a circular right shift by '-n' places instead.
    static lshift(s, n) {
        if (!(s is String)) s = "%(s)"
        if (!(n is Num) || !n.isInteger) Fiber.abort("'n' must be an integer.")
        var chars = s.toList
        var count = chars.count
        if (count < 2) return s
        if (n < 0) return rshift(s, -n)
        n = n % count
        if (n == 0) return s
        for (i in 1..n) {
            var t = chars[0]
            for (j in 0..count-2) chars[j] = chars[j+1]
            chars[-1] = t
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Performs a circular shift of the characters of 's' 'n' places to the right.
    // If 'n' is negative performs a circular left shift by '-n' places instead.
    static rshift(s, n) {
        if (!(s is String)) s = "%(s)"
        if (!(n is Num) || !n.isInteger) Fiber.abort("'n' must be an integer.")
        var chars = s.toList
        var count = chars.count
        if (count < 2) return s
        if (n < 0) return lshift(s, -n)
        n = n % count
        if (n == 0) return s
        for (i in 1..n) {
            var t = chars[-1]
            for (j in count-2..0) chars[j+1] = chars[j]
            chars[0] = t
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Convenience versions of the above methods which shift by just 1 place.
    static lshift(s) { lshift(s, 1) }
    static rshift(s) { rshift(s, 1) }

    /* The indices (or ranges thereof) for all the following functions are measured in codepoints
       (not bytes). Negative indices count backwards from the end of the string. As with core
       library methods, the indices must be within bounds or errors will be generated. */

    // Extracts the sub-string of 's' over the range 'r'.
    static sub(s, r) {
        if (!(r is Range)) Fiber.abort("Second argument must be a range.")
        if (!(s is String)) s = "%(s)"
        return Strs.concat(s.toList[r])
    }

    // Private helper method to check whether an index is valid.
    static checkIndex_(s, index, inc) {
        if (index.type != Num || !index.isInteger) Fiber.abort("Index must be an integer.")
        var c = s.count + inc
        if (index >= c || index < -c) Fiber.abort("Index is out of bounds.")
    }

    // Gets the character of 's' at index 'i'. Throws an error if 'i is out of bounds.
    static get(s, i) {
        if (!(s is String)) s = "%(s)"
        checkIndex_(s, i, 0)
        if (i < 0) i = s.count + i 
        return s.toList[i]
    }

    // Gets the character of 's' at index 'i'. Returns null if 'i is out of bounds.
    static getOrNull(s, i) {
        if (!(s is String)) s = "%(s)"
        if (!(i is Num && i.isInteger)) Fiber.abort("Index must be an integer.")
        if (i < 0) i = s.count + i
        return (i >= 0 && i < s.count) ? s.toList[i] : null
    }

    // Returns the codepoint index (not byte index) at which 'search' first occurs in 's'
    // or -1 if 'search' is not found.
    static indexOf(s, search) {
        if (!(search is String)) Fiber.abort("Search argument must be a string.")
        if (!(s is String)) s = "%(s)"
        var ix = s.indexOf(search)
        if (ix == -1) return -1
        if (ix == 0) return 0
        var cpCount = 1
        var byteCount = 0
        for (cp in s.codePoints) {
            byteCount = byteCount + Utf8.byteCount(cp)
            if (ix == byteCount) return cpCount
            cpCount = cpCount + 1
        }
    }

    // Returns the codepoint index (not byte index) at which 'search' first occurs in 's' 
    // or -1 if 'search' is not found, starting from codepoint offset 'start'.
    static indexOf(s, search, start) {
        var ss = (start > 0) ? Str.sub(s, start..-1) : s
        var ix = Str.indexOf(ss, search)
        return (ix >= 0) ? start + ix : -1
    }

    // Returns the codepoint index (not byte index) at which 'search' last occurs in 's'
    // or -1 if 'search' is not found.
    static lastIndexOf(s, search) {
        if (!(search is String)) Fiber.abort("Search argument must be a string.")
        if (!(s is String)) s = "%(s)"
        var l = s.toList
        for (i in l.count-1..0) {
            if (l[i] == search) return i
        }
        return -1
    }

    // Returns the number of non-overlapping occurrences of the string 't'
    // within the string 's'.
    static occurs(s, t) { s.split(t).count - 1 }

    // Returns the number of non-overlapping occurrences of the string 't'
    // within the string 's' starting from codepoint offset 'start'.
    static occurs(s, t, start) {
        if (start == 0) return occurs(s, t)
        return occurs(Str.sub(s, start..-1), t)
    }

    // Changes the character of 's' at index 'i' to the string 't'.
    static change(s, i, t) {
        if (!(t is String)) Fiber.abort("Replacement must be a string.")
        if (!(s is String)) s = "%(s)"
        checkIndex_(s, i, 0)
        if (i < 0) i = s.count + i
        var chars = s.toList
        chars[i] = t
        return Strs.concat(chars)
    }

    // Inserts at index 'i' of 's' the string 't'.
    static insert(s, i, t) {
        if (!(t is String)) Fiber.abort("Insertion must be a string.")
        if (!(s is String)) s = "%(s)"
        checkIndex_(s, i, 1)
        if (i < 0) i = s.count + i + 1
        var chars = s.toList
        chars.insert(i, t)
        return Strs.concat(chars)
    }

    // Deletes the character of 's' at index 'i'.
    static delete(s, i) {
        if (!(s is String)) s = "%(s)"
        checkIndex_(s, i, 0)
        if (i < 0) i = s.count + i
        var chars = s.toList
        chars.removeAt(i)
        return Strs.concat(chars)
    }

    // Exchanges the characters of 's' at indices 'i' and 'j'
    static exchange(s, i, j) {
        if (!(s is String)) s = "%(s)"
        checkIndex_(s, i, 0)
        if (i < 0) i = s.count + i
        checkIndex_(s, j, 0)
        if (j < 0) j = s.count + j
        if (i == j) return s
        var chars = s.toList
        chars.swap(i, j)
        return Strs.concat(chars)
    }

    // Returns 's' with 'from' replaced by 'to' up to 'n' times (all times if n is negative)
    // but skipping the first 'skip' matches. 
    static replace(s, from, to, n, skip) {
        if (!(from is String)) Fiber.abort("'from 'must be a string.")
        if (!(to is String)) Fiber.abort("'to' must be a string.")
        if (!(n is Num && n.isInteger)) Fiber.abort("'n' must be an integer.") 
        if (!(skip is Num && skip.isInteger && skip >= 0)) {
            Fiber.abort("'skip' must be a non-negative integer.")
        }    
        if (!(s is String)) s = "%(s)"
        if (n < 0) {
            if (skip == 0) return s.replace(from, to)
            n = Num.maxSafeInteger
        }
        if (n == 0 || skip >= n) return s
        var count = 0
        var split = s.split(from)
        var res = ""
        for (i in 0...split.count-1) {
            count = count + 1
            res = res + split[i] + ((count <= skip || count > n) ? from : to)                 
        }
        return res + split[-1]
    }

    // Convenience version of 'replace' where 'skip' is always zero.
    static replace(s, from, to, n) { replace(s, from, to, n, 0) }

    // Adds 'by' to the start of each line of 's'
    // and returns the result.
    static indent(s, by) {
        if (!(s  is String)) Fiber.abort("First argument must be a string.")
        if (!(by is String)) Fiber.abort("Second argument must be a string.")
        var lines = s.split("\n")
        return lines.map { |line| by + line }.join("\n")
    }

    // Removes 'by' from the start of each line of 's' which begins with it 
    // and returns the result.    
    static dedent(s, by) {
        if (!(s  is String)) Fiber.abort("First argument must be a string.")
        if (!(by is String)) Fiber.abort("Second argument must be a string.")
        var lines = s.split("\n")
        var c = by.bytes.count
        return lines.map { |line| 
            if (line.startsWith(by)) return line[c..-1]
            return line
        }.join("\n")
    }

    // Removes all spaces and tabs from the end of each line of s
    // and returns the result. 
    static tidy(s) {
        if (!(s is String)) Fiber.abort("Argument must be a string.")
        var lines = s.split("\n")
        return lines.map { |line| line.trimEnd(" \t") }.join("\n")
    }   

    // Returns 's' repeated 'reps' times.
    static repeat(s, reps) {
        if (!(s is String)) s = "%(s)"
        if (!(reps is Num && reps.isInteger && reps >= 0)) {
            Fiber.abort("Repetitions must be a non-negative integer.")
        }
        var rs = ""
        if (reps < 10) {
            for (i in 0...reps) rs = rs + s
        } else {
            while (true) {
                if (reps % 2 == 1) rs = rs + s
                reps = reps >> 1
                if (reps == 0) break
                s = s + s
            }
        }
        return rs
    }

    // Splits a string 's' into chunks of not more than 'size' characters.
    // Returns a list of these chunks, preserving order.
    static chunks(s, size) {
        if (!(size is Num && size.isInteger && size > 0)) {
            Fiber.abort("Size must be a positive integer.")
        }
        if (!(s is String)) s = "%(s)"
        var c = s.count
        if (size >= c) return [s]
        var res = []
        var n = (c/size).floor
        var final = c % size
        var first = 0
        var last  = first + size - 1
        for (i in 0...n) {
            res.add(sub(s, first..last))
            first = last + 1
            last  = first + size - 1
        }
        if (final > 0) res.add(sub(s, first..-1))
        return res
    }

    // Splits 's' into a list of one or more strings separated by 'sep' but removes
    // any empty elements from the list.
    static splitNoEmpty(s, sep) {
        if (!(s is String)) s = "%(s)"
        if (!(sep is String) || sep.isEmpty) Fiber.abort("Separator must be a non-empty string.")
        var split = s.split(sep)
        return split.where { |e| !e.isEmpty }.toList
    }

    // Splits a CSV 'line' into a list of one or more strings separated by 'sep' which must be
    // a single character (except \v). Deals properly with embedded separators in quoted fields.
    // Removes leading and trailing quotes from quoted fields if 'dequote' is true.
    static splitCsv(line, sep, dequote) {
        if (!(line is String)) line = "%(line)"
        if (!(sep is String) || sep.count != 1) {
           Fiber.abort("Separator must be a single character string.")
        }
        if (!(dequote is Bool)) Fiber.abort("Dequote must be a boolean.")
        var fields = line.split(sep)
        var count = 0
        var quoted = false
        var chars = line.toList
        for (i in 0...fields.count) {
            var f = fields[i]
            var fc = f.count
            if (fc > 0) {
                count = count + fc
                if (!quoted && f[0] == "\"") {
                    if (f[-1] != "\"") {
                        quoted = true
                        chars[count] = "\v"
                    }
                } else if (quoted && f[-1] == "\"") {
                    quoted = false
                } else if (quoted) {
                    chars[count] = "\v"
                }
            } else if (quoted) {
                chars[count] = "\v"
            }
            count = count + 1
        }
        fields = chars.join("").split(sep)
        for (i in 0...fields.count) fields[i] = fields[i].replace("\v", sep)
        if (dequote) {
            for (i in 0...fields.count) {
                var f = fields[i]
                var fc = f.count
                if (fc < 2) continue
                if (f[0] == "\"" && f[-1] == "\"") fields[i] = f[1...-1]
            }
        }
        return fields
    }

    // Convenience versions of the above method which use default parameters.
    static splitCsv(line, sep) {  splitCsv(line, sep, true) }
    static splitCsv(line)      {  splitCsv(line, ",", true) }

    // Splits a string 's' into two parts, before and after the first occurrence
    // of 'delim' and returns a list of those parts.
    // The 'delim' itself can be optionally included in the second part.
    // If 'delim' does not occur in 's', returns [s, ""].
    static bisect(s, delim, include) {
        if (!(delim is String)) Fiber.abort("Delimiter must be a string.")
        if (!(include is Bool)) Fiber.abort("Include must be true or false.")
        if (!(s is String)) s = "%(s)"
        var ix = s.indexOf(delim)
        if (ix == -1) return [s, ""]
        if (include) return [s[0...ix], s[ix..-1]]
        var len = delim.bytes.count
        return [s[0...ix], s[ix + len..-1]]
    }

    // Convenience version of bisect method which never includes the delimiter.
    static bisect(s, delim) { bisect(s, delim, false) } 

    // Creates and returns a string from a list of bytes.
    static fromBytes(ba) {
        if (!(ba is List)) Fiber.abort("Argument must be list of bytes.")
        var count = ba.count
        if (count == 0) return ""
        var chars = ba.map { |b| String.fromByte(b) }.toList
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Creates and returns a string from a list of code points.
    static fromCodePoints(ca) {
        if (!(ca is List)) Fiber.abort("Argument must be list of code points.")
        var count = ca.count
        if (count == 0) return ""
        var chars = ca.map { |c| String.fromCodePoint(c) }.toList
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // After trimming whitespace from the string 's', takes as many characters as possible
    // to form a valid number and converts it thereto using the Num.fromString method.
    // Returns null if such a conversion is impossible.
    static toNum(s) {
        if (s is Num) return s
        if (!(s is String)) s = "%(s)"
        s = s.trim()
        var n = Num.fromString(s)
        if (n) return n
        if (s.count < 2) return null
        var chars = s.toList
        for (i in chars.count-1..1) {
            chars.removeAt(i)
            if (n = Num.fromString(chars.join())) return n
        }
        return null
    }

    // Converts a pattern into a list of tokens for processing by the 'isMatch' method.
    // Characters within the pattern are represented as follows:
    //   Non-wildcard characters as themselves (i.e. single character strings);
    //   * (or **) by the number 0;
    //   ? (or *?) by the number 1;
    //   [set] by a list of the tokens within the set:
    //     single characters by themselves;
    //     a range of characters, a-b, by a Range of codepoints from 'a' to 'b'.
    //   If the first character of the set is '!' then the number -1 is inserted
    //   as a separate token immediately before the list.
    static tokenize(pattern) {
        var tokens = []
        var i = 0
        var j
        while (i < pattern.count) {
            var c = pattern[i]
            if (c == "*") {
                if (i == 0 || tokens[-1] != 0) tokens.add(0)
            } else if (c == "?") {
                if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1)
            } else if (c == "[") {
                if (i == pattern.count - 1) {
                    tokens.add(c)
                } else if ((j = indexOf(pattern, "]", i + 1)) == -1) {
                    tokens.add(c)
                } else {
                    var l = []
                    var s = sub(pattern, i+1...j)
                    var k = 0
                    while (k < s.count) {
                        var d = s[k]
                        if (d == "!") {
                            if (k == 0) tokens.add(-1) else l.add(d)
                        } else if (k < s.count - 2 && s[k+1] == "-") {
                            l.add(d.codePoints[0]..s[k+2].codePoints[0])
                            k = k + 2
                        } else {
                            l.add(d)
                        }
                        k = k + 1
                    }
                    if (l.count == 0) Fiber.abort("set cannot be empty.")
                    tokens.add(l)
                    i = i + s.count + 1
                }
            } else {
                tokens.add(c)
            }
            i = i + 1
        }
        return tokens
    }

    // Returns whether a string 's' matches a 'pattern' which may already be tokenized
    // if many strings are to be matched. Matching is case sensitive.
    // Patterns may contain the following wildcards:
    //   * (or **) matches zero or more characters until the next token (if any) matches
    //   and doesn't backtrack in the event of subsequent failure;
    //   ? (or *?) matches exactly one character;
    //   [set] matches a single character from the set within the brackets e.g. [aeiou].
    //   The set can  also contain ranges of characters separated by '-' e.g. [a-zA-Z].
    //   If the first character of the set is '!' then only characters NOT within the rest
    //   of the set are matched e.g. [!0-9] matches any character other than a digit.
    static isMatch(s, pattern) {
        var tokens = pattern
        if (tokens is String) tokens = tokenize(tokens)
        if (!((tokens is List) && tokens.count > 0)) {
            Fiber.abort("'pattern' must be a non-empty string or list of tokens.")
        }
        var i = 0
        var j = 0
        var star = false
        var neg = false
        while (i < s.count && j < tokens.count) {
            var c = s[i]
            var t = tokens[j]
            if (t is Num) {
                if (t == 0) {
                    star = true
                } else if (t == 1) {
                    i = i + 1
                    star = false
                } else if (t == -1) {
                    neg = true
                } else {
                    Fiber.abort("'%(t)' is not a recognized token.")
                }
                j = j + 1
            } else if (t is String) {
                if (!star && c != t) return false
                if (star && c == t) star = false
                i = i + 1
                if (!star) j = j + 1
            } else if (t is List) {
                var matched = false
                for (e in t) {
                    if (e is String) {
                        if (e == c) {
                            matched = true
                            break
                        }
                    } else if (e is Range){
                        var cp = c.codePoints[0]
                        if (cp >= e.from && cp <= e.to) {
                            matched = true
                            break
                        }
                    } else {
                        Fiber.abort("'%(e)' is not a recognized token within a set.")
                    }
                }
                if (!star && !neg && !matched) return false
                if (!star && neg && matched) return false
                if (star && matched) star = false
                i = i + 1
                neg = false
                if (!star) j = j + 1
            } else {
                Fiber.abort("'%(t)' is not a recognized token.")
            }
        }
        if (i == s.count && j == tokens.count) return true
        if (j == tokens.count && tokens[-1] == 0) return true
        if (j == tokens.count - 1 && tokens[-1] == 0) return true
        return false
    }
}

/*
    Strs contains routines applicable to lists of strings.
*/
class Strs {
    // Private helper method for 'concat'.
    static concat_(ls) {
        var s = ""
        for (e in ls) {
            s = s + e
        }
        return s
    }

    // Returns the strings in the list 'ls' concatenated together.
    // If 'chunkSize' is chosen appropriately, this should be much faster than Sequence.join()
    // for a large list of strings. For extra speed, only minimal type checks are made.
    static concat(ls, chunkSize) {
        if (!(ls is List)) Fiber.abort("First argument must be a list of strings.")
        if (chunkSize.type != Num || !chunkSize.isInteger || chunkSize < 1) {
            Fiber.abort("Second argument must be a positive integer.")
        }
        var count = ls.count
        if (count == 0) return ""
        if (ls[0].type != String) Fiber.abort("First argument must be a list of strings.")
        var chunks = (count/chunkSize).floor
        if (chunks == 0) return concat_(ls)
        var lastSize = count % chunkSize
        if (lastSize == 0) {
            lastSize = chunkSize
        } else {
            chunks = chunks + 1
        }
        var s = ""
        for (i in 0...chunks) {
            var endSize = (i < chunks-1) ? chunkSize : lastSize
            s = s + concat_(ls[i*chunkSize...(i*chunkSize + endSize)])
        }
        return s
    }

    // Convenience version of the above which uses a 'chunkSize' of 1000. This usually gives a good result.
    static concat(ls) { concat(ls, 1000) }

    // Private helper method for 'join'.
    static join_(ls, sep) {
        var first = true
        var s = ""
        for (e in ls) {
            if (!first) s = s + sep
            first = false
            s = s + e
        }
        return s
    }

    // Returns the strings in the list 'ls' joined together using the separator 'sep'.
    // If 'chunkSize' is chosen appropriately, this should be much faster than Sequence.join(sep)
    // for a large list of strings. For extra speed, only minimal type checks are made.
    static join(ls, sep, chunkSize) {
        if (!(ls is List)) Fiber.abort("First argument must be a list of strings.")
        if (sep.type != String) Fiber.abort("Second argument must be a string")
        if (sep == "") return concat(ls, chunkSize)
        if (chunkSize.type != Num || !chunkSize.isInteger || chunkSize < 1) {
            Fiber.abort("Third argument must be a positive integer.")
        }
        var count = ls.count
        if (count == 0) return ""
        if (ls[0].type != String) Fiber.abort("First argument must be a list of strings.")
        var chunks = (count/chunkSize).floor
        if (chunks == 0) return join_(ls, sep)
        var lastSize = count % chunkSize
        if (lastSize == 0) {
            lastSize = chunkSize
        } else {
            chunks = chunks + 1
        }
        var s = ""
        for (i in 0...chunks) {
            if (i > 0) s = s + sep
            var endSize = (i < chunks-1) ? chunkSize : lastSize
            s = s + join_(ls[i*chunkSize...(i*chunkSize + endSize)], sep)
        }
        return s
    }

    // Convenience version of the above which uses a 'chunkSize' of 1000. This usually gives a good result.
    static join(ls, sep) { join(ls, sep, 1000) }
}
 
/* 
    Utf8 contains routines which are specific to the UTF-8 encoding of a string's bytes or codepoints.
*/
class Utf8 {
    // Returns the number of bytes in the UTF-8 encoding of its codepoint argument.
    static byteCount(cp) {
        if (cp < 0 || cp > 0x10ffff) Fiber.abort("Codepoint is out of range.")
        if (cp < 0x80) return 1
        if (cp < 0x800) return 2
        if (cp < 0x10000) return 3
        return 4
    }

    // Converts a Unicode codepoint into its constituent UTF-8 bytes.
    static encode(cp) { String.fromCodePoint(cp).bytes.toList }

    // Converts a list of UTF-8 encoded bytes into the equivalent Unicode codepoint.
    static decode(b) {
        if (!((b is List) && b.count >= 1 && b.count <= 4 && (b[0] is Num) && b[0].isInteger)) {
            Fiber.abort("Argument must be a byte list of length 1 to 4.")
        }
        var mbMask = 0x3f // non-first bytes start 10 and carry 6 bits of data
        var b0 = b[0]
        if (b0 < 0x80) {
            return b0
        } else if (b0 < 0xe0) {
            var b2Mask = 0x1f // first byte of a 2-byte encoding starts 110 and carries 5 bits of data
            return (b0 & b2Mask) <<  6 | (b[1] & mbMask)
        } else if (b0 < 0xf0) {
            var b3Mask = 0x0f // first byte of a 3-byte encoding starts 1110 and carries 4 bits of data
            return (b0 & b3Mask) << 12 | (b[1] & mbMask) <<  6 | (b[2] & mbMask)
        } else {
            var b4Mask = 0x07 // first byte of a 4-byte encoding starts 11110 and carries 3 bits of data
            return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask)
        }
    }

    /* The next four methods extend the casing performed by the corresponding 'Str' methods to include
       Latin Extended-A, parts of Latin Extended-B, Latin Extended Additional, Greek, Cyrillic,
       Armenian and Georgian. */

    // Converts a UTF-8 string to lower case.
    static lower(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var chars = s.toList
        var count = chars.count
        var i = 0
        for (c in s.codePoints) {
            if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {
                chars[i] = String.fromCodePoint(c + 32)
            } else if (c < 256) {
                // catch other Latin-1 characters quickly.
            } else if ((c >= 0x0100 && c <= 0x0136) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if ((c >= 0x0139 && c <= 0x0147) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if ((c >= 0x014A && c <= 0x0176) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if (c == 0x0178) {
                chars[i] = "ÿ"
            } else if (c == 0x0179 || c == 0x017B || c == 0x017D ||
                       c == 0x01A0 || c == 0x01AF || c == 0x01F4) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if (c == 0x01C4 || c == 0x01C7 || c == 0x01CA || c == 0x01F1) {
                chars[i] = String.fromCodePoint(c + 2)
            } else if (c == 0x01C5 || c == 0x01C8 || c == 0x01CB || c == 0x01F2) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if ((c >= 0x01DE && c <= 0x01EE) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if ((c >= 0x01F8 && c <= 0x021E) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if ((c >= 0x1E00 && c <= 0x1E94) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if (c == 0x1E9E) {
                chars[i] = "ß"
            } else if ((c >= 0x1EA0 && c <= 0x1EFE) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if (c == 0x0386) {
                chars[i] = "ά"
            } else if (c == 0x0388 || c == 0x0389 || c == 0x038A) {
                chars[i] = String.fromCodePoint(c + 37)
            } else if (c == 0x038C) {
                chars[i] = "ό"
            } else if (c == 0x038E || c == 0x038F) {
                chars[i] = String.fromCodePoint(c + 63)
            } else if (c >= 0x0391 && c <= 0x03A1) {
                chars[i] = String.fromCodePoint(c + 32)
            } else if (c == 0x03A3) {
                chars[i] = (i == count - 1) ? "ς" : "σ"
            } else if (c >= 0x03A4 && c <= 0x03AB) {
                chars[i] = String.fromCodePoint(c + 32)
            } else if (c >= 0x0400 && c <= 0x041F) {
                chars[i] = String.fromCodePoint(c + 80)
            } else if (c >= 0x0410 && c <= 0x042F) {
                chars[i] = String.fromCodePoint(c + 32)
            } else if ((c >= 0x048A && c <= 0x04BE) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if ((c >= 0x04C1 && c <= 0x04CD) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if ((c >= 0x04D0 && c <= 0x052E) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c + 1)
            } else if (c >= 0x0531 && c <= 0x0556) {
                chars[i] = String.fromCodePoint(c + 48)
            } else if (c >= 0x10A0 && c <= 0x10C5) {
                chars[i] = String.fromCodePoint(c + 48)
            }
            i = i + 1
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Converts a UTF-8 string to upper case.
    static upper(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var chars = s.toList
        var count = chars.count
        var i = 0
        for (c in s.codePoints) {
            if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {
                chars[i] = String.fromCodePoint(c - 32)
            } else if (c == 223) {
                chars[i] = "ẞ"
            } else if (c == 255) {
                chars[i] = "Ŷ"
            } else if (c < 255) {
                // catch other Latin-1 characters quickly.
            } else if ((c >= 0x0101 && c <= 0x0137) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if ((c >= 0x013A && c <= 0x0148) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if ((c >= 0x014B && c <= 0x0177) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if (c == 0x017A || c == 0x017C || c == 0x017E ||
                       c == 0x01A1 || c == 0x01B0 || c == 0x01F5) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if (c == 0x01C5 || c == 0x01C8 || c == 0x01CB || c == 0x01F2) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if (c == 0x01C6 || c == 0x01C9 || c == 0x01CC || c == 0x01F3) {
                chars[i] = String.fromCodePoint(c - 2)
            } else if ((c >= 0x01DF && c <= 0x01EF) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if ((c >= 0x01F9 && c <= 0x021F) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if ((c >= 0x1E01 && c <= 0x1E95) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if (c == 0x1E9E) {
                chars[i] = "ß"
            } else if ((c >= 0x1EA1 && c <= 0x1EFF) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if (c == 0x03AC) {
                chars[i] = "Ά"
            } else if (c == 0x03AD || c == 0x03AE || c == 0x03AF) {
                chars[i] = String.fromCodePoint(c - 37)
            } else if (c >= 0x03B1 && c <= 0x03C1) {
                chars[i] = String.fromCodePoint(c - 32)
            } else if (c == 0x03C2) {
                chars[i] = "Σ"
            } else if (c >= 0x03C3 && c <= 0x03CB) {
                chars[i] = String.fromCodePoint(c - 32)
            } else if (c == 0x03CC) {
                chars[i] = "Ό"
            } else if (c == 0x03CD || c == 0x03CE) {
                chars[i] = String.fromCodePoint(c - 63)
            } else if (c >= 0x0430 && c <= 0x044F) {
                chars[i] = String.fromCodePoint(c - 32)
            } else if (c >= 0x0450 && c <= 0x045F) {
                chars[i] = String.fromCodePoint(c - 80)
            } else if ((c >= 0x048B && c <= 0x04BF) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if ((c >= 0x04C2 && c <= 0x04CE) && (c % 2 == 0)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if ((c >= 0x04D1 && c <= 0x052F) && (c % 2 == 1)) {
                chars[i] = String.fromCodePoint(c - 1)
            } else if (c >= 0x0561 && c <= 0x0586) {
                chars[i] = String.fromCodePoint(c - 48)
            } else if (c >= 0x10D0 && c <= 0x10F5) {
                chars[i] = String.fromCodePoint(c - 48)
            }
            i = i + 1
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Capitalizes the first character of a UTF-8 string.
    // Uses title rather than upper case variant if it's one of 4 supported digraphs.
    static capitalize(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var start = (s.startsWith("[") && s.count > 1) ? 1 : 0
        var cs = upper(s[start])
        var c = cs.codePoints[0]
        if (c == 0x01C4 || c == 0x01C7 || c == 0x01CA || c == 0x01F1) {
            cs = String.fromCodePoint(c + 1)
        }
        if (s.count > start + 1) cs = cs + s[start+1..-1]
        if (start == 1) cs = "[" + cs
        return cs
    }

    // Capitalizes the first character of each word of a UTF-8 string.
    // Uses title rather than upper case variant if it's one of 4 supported digraphs.
    static title(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var words = s.split(" ")
        return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
    }

    // Removes accents and other diacritical marks from all characters in a string,
    // expands digraphs and removes all combining characters before returning the result.
    // As well as Latin-1 Supplement, coverage includes Latin Extended-A and various
    // other characters found in modern European languages which use the Latin alphabet.
    static unaccent(s) {
        if (!(s is String)) s = "%(s)"
        if (s == "") return s
        var accented = [
            "àáâãäåāăą", "ÀÁÂÃÄÅĀĂĄ", "ḃ", "Ḃ", "çćĉċč", "ÇĆĈĊČ", "ðďđḋ", "ÐĎĐḊ",
            "èéêëēĕėęě", "ÈÉÊËĒĔĖĘĚ", "ḟ", "Ḟ", "ĝğġģ", "ĜĞĠĢ", "ĥħ", "ĤĦ",
            "ìíîïĩīĭįı", "ÌÍÎÏĨĪĬĮİ", "Ĵ", "Ĵ", "ķĸ", "Ķ", "ĺļľŀł", "ĹĻĽĿŁ",
            "ṁ", "Ṁ", "ñńņňŉ", "ÑŃŅŇ", "òóôõöøōŏő", "ÒÓÔÕÖØŌŎŐ", "ṗ", "Ṗ",
            "ŕŗř", "ŔŖŘ", "śŝşšșſ", "ŚŜŞŠȘ", "ţťŧṱț", "ŢŤŦṰȚ", "ùúûüũūŭůűų",
            "ÙÚÛÜŨŪŬŮŰŲ", "ŵẁẃẅ", "ŴẀẂẄ", "ýÿỳŷ", "ÝŸỲŶ", "źżž", "ŹŻŽ"
        ]
        var unaccented = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPrRsStTuUwWyYzZ"
        var digraphs = {
            "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss", "ẞ": "SS",
            "ĳ": "ij", "Ĳ": "IJ", "ŋ": "ng", "Ŋ": "NG", "œ": "OE", "Œ": "OE"
        }
        var r = ""
        var chars = s.toList
        var count = chars.count
        var i = 0
        for (c in s.codePoints) {
            if ((c >= 0x00c0 && c <= 0x012B) || c >= 0x1e02 && c <= 0x1e9e) {
                var found = false
                for (j in 0...accented.count) {
                    if (accented[j].indexOf(chars[i]) >= 0) {
                        chars[i] = unaccented[j]
                        found = true
                        break
                    }
                }
                if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
            } else if (c >= 0x0300 && c <= 0x036F) chars[i] = ""
            i = i + 1
        }
        return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
    }

    // Converts a Windows-1252 encoded byte string to a UTF-8 encoded string.
    static fromWin1252(win1252) {
        if (!(win1252 is String)) System.print("Argument must be a byte string.")
        if (win1252.count == 0) return ""
        // mapping for Windows 1252 bytes 128-159.
        // Unused bytes are mapped to the corresponding ISO-8859-1 control codes.
        var bm = [
            0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
            0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
            0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
            0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
        ]
        var bytes = win1252.bytes
        var utf8 = List.filled(bytes.count, 0)  
        for (i in 0...bytes.count) {
            var b = bytes[i]
            if (b < 128 || b > 159) {
                utf8[i] = String.fromCodePoint(b)
            } else {
                utf8[i] = String.fromCodePoint(bm[b-128])
            }
        }
        return utf8.join()
    }
}

/*
   'Greek' enables characters from the Greek alphabet to be found from their name.
   These characters are often used as mathematical or scientific symbols.
*/
class Greek {
    // Returns the Greek alphabet, lower then upper case characters.
    static alphabet { "αβγδεζηθικλμνξοπρςστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\u03a2ΣΤΥΦΧΨΩ" }

    // Returns a list of the names of all Greek letters in alphabetical order.
    static names {
        return [
            "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta",
            "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi",
            "rho", "sigma final", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega"
        ]
    }

    // Returns the name of a Greek character or null if not found.
    // Upper case characters are returned with the initial letter capitalized.
    static name(char) {
        if (char.count != 1) return null
        var ix = alphabet.toList.indexOf(char)
        if (ix == -1) return null
        if (ix < 25) return names[ix]
        return Str.capitalize(names[ix-25])
    }

    // Finds and returns a Greek lower case character from its name.
    static lower(name) {
        name = Str.lower(name)
        var ix = names.indexOf(name)
        if (ix == -1) Fiber.abort("Name not found.")
        return String.fromCodePoint(0x03b1 + ix)
    }

    // Finds and returns a Greek upper case character from its name.
    static upper(name) {
        name = Str.lower(name)
        var ix = names.indexOf(name)
        if (ix == -1) Fiber.abort("Name not found.")
        if (name == "sigma final") ix = ix + 1
        return String.fromCodePoint(0x0391 + ix)
    }
}