Category talk:Wren-str

From Rosetta Code
Revision as of 10:36, 17 July 2020 by PureFox (talk | contribs) (→‎Source code: Added a Str.indexOf method and a Utf8 class.)

Source code

<lang ecmascript>/* Module "str.wren" */

/*

  Char contains routines to perform various operations on characters.
  For convenience a string containing more than one character can be passed 
  as an argument but the methods will only operate on the first character.
  • /

class Char {

   // Returns the codepoint of the first character of a string.
   static code(c) { (c is String && !c.isEmpty) ? c.codePoints[0] :
                     Fiber.abort("Argument must be a non-empty string.") }
   // Convenience method to return a character from its codepoint.
   static fromCode(c) { String.fromCodePoint(c) }
   // Checks if the first character of a string falls into a particular category.
   static isAscii(c)       { code(c) < 128 }
   static isSymbol(c)      { code(c) && "$+<=>^`|~".contains(c[0])  }
   static isControl(c)     { (c = code(c)) && (c <  32 || c == 127) }
   static isDigit(c)       { (c = code(c)) &&  c >= 48 && c <= 57   }
   static isLower(c)       { (c = code(c)) &&  c >= 97 && c <= 122  }
   static isUpper(c)       { (c = code(c)) &&  c >= 65 && c <= 90   }
   static isPrintable(c)   { (c = code(c)) &&  c >= 32 && c < 127   }
   static isSpace(c)       { (c = code(c)) && (c == 32 || c == 9 || c == 10 || c == 13) }
   static isWhitespace(c)  { (c = code(c)) && (c == 32 || (c >= 9 && c <= 13)) }
   /* Rather than use combinations of the above, these only call the 'code' method once. */
   static isLetter(c) {
       var d = code(c)
       return (d >= 65 && d <= 90) || (d >= 97 && d <= 122)
   }
   static isAlphanumeric(c) {
       var d = code(c)
       return (d >= 65 && d <= 90) || (d >= 97 && d <= 122) || (d >= 48 && d <= 57)
   }
   static isPunctuation(c) {
       var d = code(c)
       if (d < 33 || d > 126) return false
       if ((d >= 65 && d <= 90) || (d >= 97 && d <= 122) || (d >= 48 && d <= 57)) return false
       if ("$+<=>^`|~".contains(c[0])) return false
       return true
   }
   static category(c) {
       var d = code(c)
       return (d  < 32 || d == 127)      ? "control"   :
              (d == 32)                  ? "space"     :
              (d >= 48 && d <= 57)       ? "digit"     :
              (d >= 64 && d <= 90)       ? "upper"     :
              (d >= 97 && d <= 122)      ? "lower"     :
              (d >=128)                  ? "non-ascii" :
              "$+<=>^`|~".contains(c[0]) ? "symbol"    : "punctuation"
   }
   // Return the first character of a string converted to the appropriate case.
   static upper(c) { ((c = code(c)) && c >= 97 && c <= 122) ? fromCode(c-32) : fromCode(c) } 
   static lower(c) { ((c = code(c)) && c >= 65 && c <=  90) ? fromCode(c+32) : fromCode(c) }
   static swapCase(c) {
       var d = code(c)
       if (d >= 65 && d <=  90) return fromCode(d+32)
       if (d >= 97 && d <= 122) return fromCode(d-32)
       return c[0]
   }

}

/* Str supplements the String class with various other operations on strings. */ class Str {

   // Mimics the comparison operators <, <=, >, >=
   // not supported by the String class.
   static lt(s1, s2) { compare(s1, s2) <  0 }
   static le(s1, s2) { compare(s1, s2) <= 0 }
   static gt(s1, s2) { compare(s1, s2) >  0 }
   static ge(s1, s2) { compare(s1, s2) >= 0 }
   // Compares two strings lexicographically by codepoint.
   // Returns -1, 0 or +1 depending on whether
   // s1 < s2, s1 == s2 or s1 > s2 respectively.
   static compare(s1, s2)  {
       if (s1 == s2) return 0
       var cp1 = s1.codePoints
       var cp2 = s2.codePoints
       var len = (cp1.count <= cp2.count) ? cp1.count : cp2.count
       for (i in 0...len) {
           if (cp1[i] < cp2[i]) return -1
           if (cp1[i] > cp2[i]) return 1
       }
       return (cp1.count < cp2.count) ? -1 : 1
   }
   // Checks if a string falls into a particular category.
   static allAscii(s)      { s.codePoints.all { |c| c < 128                        } }
   static allDigits(s)     { s.codePoints.all { |c| c >= 48 && c <= 57             } }
   static allLower(s)      { s.codePoints.all { |c| c >= 97 && c <= 122            } }
   static allUpper(s)      { s.codePoints.all { |c| c >= 65 && c <= 90             } }
   static allPrintable(s)  { s.codePoints.all { |c| c >= 32 && c < 127             } }
   static allWhitespace(s) { s.codePoints.all { |c| c == 32 || (c >= 9 && c <= 13) } }
   static allLetters(s) { s.codePoints.all { |c|
       return (c >= 65 && c <= 90) || (c >= 97 && c <= 122)
   } }
   static allAlphanumeric(s) { s.codepoints.all { |c|
       return (c >= 65 && c <=  90) || (c >= 97 && c <= 122) || (c >= 48 && c <= 57)
   } }
   // Checks whether a string can be parsed to a number, an integer or a non-integer (float).
   static isNumeric(s)  { Num.fromString(s)                  }
   static isIntegral(s) { (s = isNumeric(s)) && s.isInteger  }
   static isFloat(s)    { (s = isNumeric(s)) && !s.isInteger }
   // Converts a string to lower case.
   static lower(s) {
       if (!(s is String)) s = "%(s)"
       if (s == "") return s
       var cps = s.codePoints.toList
       for (i in 0...cps.count) {
           var c = cps[i]
           if (c >= 65 && c <= 90) cps[i] = c + 32
       }
       return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
   }
   // Converts a string to upper case.
   static upper(s) {
       if (!(s is String)) s = "%(s)"
       if (s == "") return s
       var cps = s.codePoints.toList
       for (i in 0...cps.count) {
           var c = cps[i]
           if (c >= 97 && c <= 122) cps[i] = c - 32
       }
       return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
   }
   // Swaps the case of each character in a string.
   static swapCase(s) {
       if (!(s is String)) s = "%(s)"
       if (s == "") return s
       var cps = s.codePoints.toList
       for (i in 0...cps.count) {
           var c = cps[i]
           if (c >= 65 && c <= 90) {
               cps[i] = c + 32
           } else if (c >= 97 && c <= 122) {
               cps[i] = c - 32
           }
       }
       return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
   }
   // Capitalizes the first character of a string.
   static capitalize(s) {
       if (!(s is String)) s = "%(s)"
       if (s == "") return s
       var cps = s.codePoints.toList
       var start = (s.startsWith("[") && cps.count > 1) ? 1 : 0
       var c = cps[start]
       if (c >= 97 && c <= 122) {
           cps[start] = c - 32
           return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
       }
       return s
   }
   // Capitalizes the first character of each word of a string.
   static title(s) {
       if (!(s is String)) s = "%(s)"
       if (s == "") return s
       var words = s.split(" ")
       return words.map { |w| capitalize(w) }.join(" ")
   }
   // Reverses the characters (not necessarily single bytes) of a string.
   static reverse(s) {
       if (!(s is String)) s = "%(s)"
       return (s != "") ? s[-1..0] : s
   }
   // Performs a circular shift of the characters of 's' one place to the left.
   static lshift(s) {
       if (!(s is String)) s = "%(s)"
       var chars = s.toList
       var count = chars.count
       if (count < 2) return s
       var t = chars[0]
       for (i in 0..count-2) chars[i] = chars[i+1]
       chars[-1] = t
       return chars.join()
   }
   // Performs a circular shift of the characters of 's' one place to the right.
   static rshift(s) {
       if (!(s is String)) s = "%(s)"
       var chars = s.toList
       var count = chars.count
       if (count < 2) return s
       var t = chars[-1]
       for (i in count-2..0) chars[i+1] = chars[i]
       chars[0] = t
       return chars.join()
   }
   /* The indices (or ranges thereof) for all the following functions are measured in codepoints (not bytes).
      As with core library methods, the indices must be within bounds or errors will be generated. */
   // Extracts the sub-string of 's' over the range 'r'.
   static sub(s, r) {
       if (!(r is Range)) Fiber.abort("Second argument must be a range.")
       if (!(s is String)) s = "%(s)"
       return s.toList[r].join()
   }
   // Gets the character of 's' at index 'i'. Throws an error if 'i is out of bounds.
   static get(s, i) {
       if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("Index must be a non-negative integer.")
       if (!(s is String)) s = "%(s)"
       return s.toList[i]
   }
   // Gets the character of 's' at index 'i'. Returns null if 'i is out of bounds.
   static getOrNull(s, i) {
       if (!(i is Num && i.isInteger)) Fiber.abort("Index must be an integer.")
       if (!(s is String)) s = "%(s)"
       return (i >= 0 && i < s.count) ? s.toList[i] : null
   }
   // Returns the codepoint index (not byte index) at which 'search' first occurs in 's'
   // or -1 if 'search' is not found.
   static indexOf(s, search) {
       if (!(search is String)) Fiber.abort("Search argument must be a string.")
       if (!(s is String)) s = "%(s)"
       var ix = s.indexOf(search)
       if (ix == -1) return -1
       if (ix == 0) return 0
       var cpCount = 1
       var byteCount = 0
       for (cp in s.codePoints) {
           byteCount = byteCount + Utf8.byteCount(cp)
           if (ix == byteCount) return cpCount
           cpCount = cpCount + 1
       }
   }
   // Changes the character of 's' at index 'i' to the string 't'.
   static change(s, i, t) {
       if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("Index must be a non-negative integer.")
       if (!(t is String)) Fiber.abort("Replacement must be a string.")
       if (!(s is String)) s = "%(s)"
       var chars = s.toList
       chars[i] = t
       return chars.join()
   }
   // Inserts at index 'i' of 's' the string 't'.
   static insert(s, i, t) {
       if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("Index must be a non-negative integer.")
       if (!(t is String)) Fiber.abort("Insertion must be a string.")
       if (!(s is String)) s = "%(s)"
       var chars = s.toList
       chars.insert(i, t)
       return chars.join()
   }
   // Deletes the character of 's' at index 'i'.
   static delete(s, i) {
       if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("Index must be a non-negative integer.")
       if (!(s is String)) s = "%(s)"
       var chars = s.toList
       chars.removeAt(i)
       return chars.join()
   }
   // Exchanges the characters of 's' at indices 'i' and 'j'
   static exchange(s, i, j) {
       if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("First index must be a non-negative integer.")
       if (!(j is Num && j.isInteger && j >= 0)) Fiber.abort("Second index must be a non-negative integer.")
       if (!(s is String)) s = "%(s)"
       if (i == j) return s
       var chars = s.toList
       var t = chars[i]
       chars[i] = chars[j]
       chars[j] = t
       return chars.join()
   }
   // Splits a string 's' into chunks of not more than 'size' characters.
   // Returns a list of these chunks, preserving order.
   static chunks(s, size) {
       if (!(size is Num && size.isInteger && size > 0)) {
           Fiber.abort("Size must be a positive integer.")
       }
       if (!(s is String)) s = "%(s)"
       var c = s.count
       if (size >= c) return [s]
       var res = []
       var n = (c/size).floor
       var final = c % size
       var first = 0
       var last  = first + size - 1
       for (i in 0...n) {
           res.add(sub(s, first..last))
           first = last + 1
           last  = first + size - 1
       }
       if (final > 0) res.add(sub(s, first..-1))
       return res
   }

}

/*

   Utf8 contains routines which are specific to the UTF-8 encoding of a string's bytes or codepoints.
  • /

class Utf8 {

   // Returns the number of bytes in the UTF-8 encoding of its codepoint argument.
   static byteCount(cp) {
       if (cp < 0 || cp > 0x10ffff) Fiber.abort("Codepoint is out of range.")
       if (cp < 0x80) return 1
       if (cp < 0x800) return 2
       if (cp < 0x10000) return 3
       return 4
   }
   // Converts a Unicode codepoint into its constituent UTF-8 bytes.
   static encode(cp) { String.fromCodePoint(cp).bytes.toList }
   // Converts a list of UTF-8 encoded bytes into the equivalent Unicode codepoint.
   static decode(b) {
       if (!((b is List) && b.count >= 1 && b.count <= 4 && (b[0] is Num) && b[0].isInteger)) {
           Fiber.abort("Argument must be a byte list of length 1 to 4.")
       }
       var mbMask = 0x3f // non-first bytes start 10 and carry 6 bits of data
       var b0 = b[0]
       if (b0 < 0x80) {
           return b0
       } else if (b0 < 0xe0) {
           var b2Mask = 0x1f // first byte of a 2-byte encoding starts 110 and carries 5 bits of data
           return (b0 & b2Mask) <<  6 | (b[1] & mbMask)
       } else if (b0 < 0xf0) {
           var b3Mask = 0x0f // first byte of a 3-byte encoding starts 1110 and carries 4 bits of data
           return (b0 & b3Mask) << 12 | (b[1] & mbMask) <<  6 | (b[2] & mbMask)
       } else {
           var b4Mask = 0x07 // first byte of a 4-byte encoding starts 11110 and carries 3 bits of data
           return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask)
       }
   }

}

// Type aliases for classes in case of any name clashes with other modules. var Str_Char = Char var Str_Str = Str var Str_Utf8 = Utf8</lang>