Strip whitespace from a string/Top and tail: Difference between revisions

Content added Content deleted

Inline

@@ Line 1,038: / Line 1,038: @@
     public static String ltrim(String s) {
         int i = 0;
-        while (i < s.length() && Character.isWhitespace(s.charAt(i))){
+        while (i < s.length() && Character.isWhitespace(s.charAt(i))) {
             i++;
         }
@@ Line 1,046: / Line 1,046: @@
     public static String rtrim(String s) {
         int i = s.length() - 1;
-        while (i > 0 && Character.isWhitespace(s.charAt(i))){
+        while (i > 0 && Character.isWhitespace(s.charAt(i))) {
             i--;
         }
@@ Line 1,056: / Line 1,056: @@
     }
-    public static void main(String[] args){
+    public static void main(String[] args) {
         String s = " \t \r \n String with spaces \u2009 \t  \r  \n  ";
         System.out.printf("[%s]\n", ltrim(s));
@@ Line 1,063: / Line 1,063: @@
     }
 }</lang>
+===Supplementary-correct version===
+Unicode *happens* to not have any whitespace characters outside of Basic Multilingual Plane (aka, they all fit inside a <code>char</code>). However, this not something you should ''generally'' rely on, and should assume your strings contain characters in the Supplementary Planes. As such, instead of iterating using <code>String.charAt</code>, prefer instead using <code>String.codePointAt</code> and <code>String.codePointBefore</code>, iterating over actual Unicode Code Points:
+<lang java>    public static String ltrim(String s) {
+        int offset = 0;
+        while (offset < s.length()) {
+            int codePoint = s.codePointAt(offset);
+            if (!Character.isWhitespace(codePoint)) break;
+            offset += Character.charCount(codePoint);
+        }
+        return s.substring(offset);
+    }
+    public static String rtrim(String s) {
+        int offset = s.length();
+        while (offset > 0) {
+            int codePoint = s.codePointBefore(offset);
+            if (!Character.isWhitespace(codePoint)) break;
+            offset -= Character.charCount(codePoint);
+        }
+        return s.substring(0, offset);
+    }
+</lang>
 =={{header|Javascript}}==