Category talk:Wren-str: Difference between revisions

Content added Content deleted
(→‎Source code: Added a Str.indexOf method and a Utf8 class.)
(→‎Source code: Added new Strs class & partially rewrote Str class to improve performance for large strings.)
Line 119: Line 119:
if (!(s is String)) s = "%(s)"
if (!(s is String)) s = "%(s)"
if (s == "") return s
if (s == "") return s
var cps = s.codePoints.toList
var chars = s.toList
for (i in 0...cps.count) {
var count = chars.count
var c = cps[i]
var i = 0
if (c >= 65 && c <= 90) cps[i] = c + 32
for (c in s.codePoints) {
if (c >= 65 && c <= 90) chars[i] = String.fromCodePoint(c + 32)
i = i + 1
}
}
return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
}


Line 131: Line 133:
if (!(s is String)) s = "%(s)"
if (!(s is String)) s = "%(s)"
if (s == "") return s
if (s == "") return s
var cps = s.codePoints.toList
var chars = s.toList
for (i in 0...cps.count) {
var count = chars.count
var c = cps[i]
var i = 0
if (c >= 97 && c <= 122) cps[i] = c - 32
for (c in s.codePoints) {
if (c >= 97 && c <= 122) chars[i] = String.fromCodePoint(c - 32)
i = i + 1
}
}
return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
}


Line 143: Line 147:
if (!(s is String)) s = "%(s)"
if (!(s is String)) s = "%(s)"
if (s == "") return s
if (s == "") return s
var cps = s.codePoints.toList
var chars = s.toList
for (i in 0...cps.count) {
var count = chars.count
var c = cps[i]
var i = 0
for (c in s.codePoints) {
if (c >= 65 && c <= 90) {
if (c >= 65 && c <= 90) {
cps[i] = c + 32
chars[i] = String.fromCodePoint(c + 32)
} else if (c >= 97 && c <= 122) {
} else if (c >= 97 && c <= 122) {
cps[i] = c - 32
chars[i] = String.fromCodePoint(c - 32)
}
}
i = i + 1
}
}
return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
}


Line 159: Line 165:
if (!(s is String)) s = "%(s)"
if (!(s is String)) s = "%(s)"
if (s == "") return s
if (s == "") return s
var cps = s.codePoints.toList
var start = (s.startsWith("[") && s.count > 1) ? 1 : 0
var start = (s.startsWith("[") && cps.count > 1) ? 1 : 0
var c = s[start].codePoints[0]
var c = cps[start]
if (c >= 97 && c <= 122) {
if (c >= 97 && c <= 122) {
cps[start] = c - 32
var cs = String.fromCodePoint(c - 32) + s[start+1..-1]
return cps.reduce("") { |acc, c| acc + String.fromCodePoint(c) }
if (start == 1) cs = "[" + cs
return cs
}
}
return s
return s
Line 174: Line 180:
if (s == "") return s
if (s == "") return s
var words = s.split(" ")
var words = s.split(" ")
return words.map { |w| capitalize(w) }.join(" ")
return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
}
}


Line 192: Line 198:
for (i in 0..count-2) chars[i] = chars[i+1]
for (i in 0..count-2) chars[i] = chars[i+1]
chars[-1] = t
chars[-1] = t
return chars.join()
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
}


Line 204: Line 210:
for (i in count-2..0) chars[i+1] = chars[i]
for (i in count-2..0) chars[i+1] = chars[i]
chars[0] = t
chars[0] = t
return chars.join()
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
}


Line 214: Line 220:
if (!(r is Range)) Fiber.abort("Second argument must be a range.")
if (!(r is Range)) Fiber.abort("Second argument must be a range.")
if (!(s is String)) s = "%(s)"
if (!(s is String)) s = "%(s)"
return s.toList[r].join()
return Strs.concat(s.toList[r])
}
}


Line 255: Line 261:
var chars = s.toList
var chars = s.toList
chars[i] = t
chars[i] = t
return chars.join()
return Strs.concat(chars)
}
}


Line 265: Line 271:
var chars = s.toList
var chars = s.toList
chars.insert(i, t)
chars.insert(i, t)
return chars.join()
return Strs.concat(chars)
}
}


Line 274: Line 280:
var chars = s.toList
var chars = s.toList
chars.removeAt(i)
chars.removeAt(i)
return chars.join()
return Strs.concat(chars)
}
}


Line 287: Line 293:
chars[i] = chars[j]
chars[i] = chars[j]
chars[j] = t
chars[j] = t
return chars.join()
return Strs.concat(chars)
}
}

// Private helper method for 'repeat'.
static repeat_(s, reps) {
var rs = ""
for (i in 0...reps) rs = rs + s
return rs
}

// Returns 's' repeated 'reps' times.
// If 'chunkSize' is chosen appropriately, this should be much faster than String's * operator
// for a large number of repetitions.
static repeat(s, reps, chunkSize) {
if (!(s is String)) s = "%(s)"
if (!(reps is Num && reps.isInteger && reps > 0)) {
Fiber.abort("Repetitions must be a positive integer.")
}
if (!(chunkSize is Num && chunkSize.isInteger && chunkSize > 0)) {
Fiber.abort("Chunk size must be a positive integer.")
}
if (reps == 0) return ""
var chunks = (reps/chunkSize).floor
if (chunks == 0) return repeat_(s, reps)
var lastSize = reps % chunkSize
if (lastSize == 0) {
lastSize = chunkSize
} else {
chunks = chunks + 1
}
var rs = ""
var chunk = repeat_(s, chunkSize)
var lastChunk = repeat_(s, lastSize)
for (i in 0...chunks) {
rs = rs + ((i < chunks - 1) ? chunk : lastChunk)
}
return rs
}

// Convenience version of the above which uses a 'chunkSize' of 8000. This usually gives a good result.
static repeat(s, reps) { repeat(s, reps, 8000) }


// Splits a string 's' into chunks of not more than 'size' characters.
// Splits a string 's' into chunks of not more than 'size' characters.
Line 314: Line 359:
}
}


/*
Strs contains routines applicable to lists of strings.
*/
class Strs {
// Private helper method for 'concat'.
static concat_(ls) {
var s = ""
for (e in ls) {
s = s + e
}
return s
}

// Returns the strings in the list 'ls' concatenated together.
// If 'chunkSize' is chosen appropriately, this should be much faster than Sequence.join()
// for a large list of strings. For extra speed, only minimal type checks are made.
static concat(ls, chunkSize) {
if (!(ls is List)) Fiber.abort("First argument must be a list of strings.")
if (chunkSize.type != Num || !chunkSize.isInteger || chunkSize < 1) {
Fiber.abort("Second argument must be a positive integer.")
}
var count = ls.count
if (count == 0) return ""
if (ls[0].type != String) Fiber.abort("First argument must be a list of strings.")
var chunks = (count/chunkSize).floor
if (chunks == 0) return concat_(ls)
var lastSize = count % chunkSize
if (lastSize == 0) {
lastSize = chunkSize
} else {
chunks = chunks + 1
}
var s = ""
for (i in 0...chunks) {
var endSize = (i < chunks-1) ? chunkSize : lastSize
s = s + concat_(ls[i*chunkSize...(i*chunkSize + endSize)])
}
return s
}

// Convenience version of the above which uses a 'chunkSize' of 1000. This usually gives a good result.
static concat(ls) { concat(ls, 1000) }

// Private helper method for 'join'.
static join_(ls, sep) {
var first = true
var s = ""
for (e in ls) {
if (!first) s = s + sep
first = false
s = s + e
}
return s
}

// Returns the strings in the list 'ls' joined together using the separator 'sep'.
// If 'chunkSize' is chosen appropriately, this should be much faster than Sequence.join(sep)
// for a large list of strings. For extra speed, only minimal type checks are made.
static join(ls, sep, chunkSize) {
if (!(ls is List)) Fiber.abort("First argument must be a list of strings.")
if (sep.type != String) Fiber.abort("Second argument must be a string")
if (sep == "") return concat(ls, chunkSize)
if (chunkSize.type != Num || !chunkSize.isInteger || chunkSize < 1) {
Fiber.abort("Third argument must be a positive integer.")
}
var count = ls.count
if (count == 0) return ""
if (ls[0].type != String) Fiber.abort("First argument must be a list of strings.")
var chunks = (count/chunkSize).floor
if (chunks == 0) return join_(ls, sep)
var lastSize = count % chunkSize
if (lastSize == 0) {
lastSize = chunkSize
} else {
chunks = chunks + 1
}
var s = ""
for (i in 0...chunks) {
if (i > 0) s = s + sep
var endSize = (i < chunks-1) ? chunkSize : lastSize
s = s + join_(ls[i*chunkSize...(i*chunkSize + endSize)], sep)
}
return s
}

// Convenience version of the above which uses a 'chunkSize' of 1000. This usually gives a good result.
static join(ls, sep) { join(ls, sep, 1000) }
}
/*
/*
Utf8 contains routines which are specific to the UTF-8 encoding of a string's bytes or codepoints.
Utf8 contains routines which are specific to the UTF-8 encoding of a string's bytes or codepoints.
Line 355: Line 489:
var Str_Char = Char
var Str_Char = Char
var Str_Str = Str
var Str_Str = Str
var Str_Strs = Strs
var Str_Utf8 = Utf8</lang>
var Str_Utf8 = Utf8</lang>