Character codes: Difference between revisions

← Older edit

Character codes (view source)

Revision as of 17:21, 18 April 2024

6,143 bytes added , 29 days ago

m

Added Plain English

Elenderg

18

edits

Revision as of 15:00, 20 January 2023 (view source) Ijo Nanpa (talk \| contribs) m (→‎{{header\|Kotlin}}) ← Older edit		Latest revision as of 17:21, 18 April 2024 (view source) Elenderg (talk \| contribs) m (Added Plain English)
(23 intermediate revisions by 17 users not shown)
Line 493: babel =={{header\|BASIC}}== {{works with\|QBasic\|1.1}} {{works with\|QuickBasic\|4.5}} <syntaxhighlight lang="qbasic">charCode = 97 Line 527 ⟶ 528: $: 36 €: 8364, 36</pre> ==={{header\|Chipmunk Basic}}=== <syntaxhighlight lang="qbasic">10 print "a - > ";asc("a") 20 print "98 -> ";chr$(98)</syntaxhighlight> ==={{header\|Commodore BASIC}}=== Line 536 ⟶ 541: {{Out}}<pre>A 66</pre> ==={{header\|GW-BASIC}}=== {{works with\|PC-BASIC\|any}} {{works with\|BASICA}} <syntaxhighlight lang="qbasic">10 PRINT "a - > "; ASC("a") 20 PRINT "98 -> "; CHR$(98)</syntaxhighlight> ==={{header\|IS-BASIC}}=== <syntaxhighlight lang="is-basic">100 PRINT ORD("A") 110 PRINT CHR$(65)</syntaxhighlight> ==={{header\|MSX Basic}}=== {{works with\|MSX BASIC\|any}} <syntaxhighlight lang="qbasic">10 PRINT "a - > "; ASC("a") 20 PRINT "98 -> "; CHR$(98)</syntaxhighlight> ==={{header\|QBasic}}=== {{works with\|BASICA}} {{works with\|Chipmunk Basic}} {{works with\|FreeBASIC}} {{works with\|GW-BASIC}} {{works with\|MSX BASIC}} {{works with\|PC-BASIC}} {{works with\|Run BASIC}} {{works with\|Yabasic}} <syntaxhighlight lang="qbasic">PRINT "a - > "; ASC("a") PRINT "98 -> "; CHR$(98)</syntaxhighlight> Line 555 ⟶ 577: <pre>38 A</pre> ==={{header\|SmallBASIC}}=== <syntaxhighlight lang="qbasic"> Print "a -> "; Asc("a") Print "98 -> "; Chr(98) </syntaxhighlight> ==={{header\|True BASIC}}=== Line 560 ⟶ 588: PRINT "98 -> "; chr$(98) END</syntaxhighlight> ==={{header\|XBasic}}=== {{works with\|Windows XBasic}} {{works with\|Linux XBasic}} <syntaxhighlight lang="qbasic">PROGRAM "Character codes" VERSION "0.0000" DECLARE FUNCTION Entry () FUNCTION Entry () PRINT "a - >"; ASC("a") PRINT "98 -> "; CHR$(98) END FUNCTION END PROGRAM</syntaxhighlight> ==={{header\|Yabasic}}=== Line 839 ⟶ 881: =={{header\|Ecstasy}}== <syntaxhighlight lang="java"> module CharacterCodes { { @Inject Console console; void run() { for (Char char : ['\0', '\d', 'A', '$', '¢', '~', '˜']) { { ~~for (Char char : ['\0', '\d', 'A', '$', '¢', '~', '˜'])~~ { // character to its integer value UInt32 codepoint = char.codepoint; Line 858 ⟶ 897: \| char from codepoint={fromCodePoint.quoted()} ); } } } } </syntaxhighlight> Line 909 ⟶ 948: Limitations: There is no "put_character_32" feature for standard io (FILE class), so there appears to be no way to print Unicode characters. =={{header\|Elena}}== ELENA 46.x : <syntaxhighlight lang="elena">import extensions; Line 916 ⟶ 955: var ch := $97; console.printLine:(ch); console.printLine(ch.toInt()) }</syntaxhighlight> Line 924 ⟶ 963: 97 </pre> =={{header\|Elixir}}== A String in Elixir is a UTF-8 encoded binary. Line 933 ⟶ 973: <syntaxhighlight lang="lisp">(string-to-char "a") ;=> 97 (format "%c" 97) ;=> "a"</syntaxhighlight> =={{header\|EMal}}== <syntaxhighlight lang="emal"> ^\|ord and chr work with Unicode code points\|^ writeLine(ord("a")) # prints "97" writeLine(chr(97)) # prints "a" writeLine(ord("π")) # prints "960" writeLine(chr(960)) # prints "π" writeLine() var cps = int[] for each var c in text["a", "π", "字", "🐘"] var cp = ord(c) cps.append(cp) writeLine(c + " = " + cp) end writeLine() for each int i in cps var c = chr(i) writeLine(i + " = " + c) end </syntaxhighlight> {{out}} <pre> 97 a 960 π a = 97 π = 960 字 = 23383 🐘 = 128024 97 = a 960 = π 23383 = 字 128024 = 🐘 </pre> =={{header\|Erlang}}== In Erlang, lists and strings are the same, only the representation changes. Thus: Line 967 ⟶ 1,046: fansh> 'a'.toInt 97</syntaxhighlight> =={{header\|Fennel}}== <syntaxhighlight lang="fennel"> (string.byte :A) ; 65 (string.char 65) ; "A" </syntaxhighlight> =={{header\|Forth}}== As with C, characters are just integers on the stack which are treated as ASCII. Line 1,169 ⟶ 1,255: 97 98 99 9786</syntaxhighlight> <code>7 u:</code> converts ~~from~~to utf-16 (<code>8 u:</code> would convert to utf-8, and <code>9 u:</code> would convert to utf-32), and <code>3 u:</code> converts what the uncode consortium calls "code points" to numeric form. Since J character literals are utf-8 (primarily because that's how OS interfaces work), by itself <code>3 u:</code> would give us: <syntaxhighlight lang="j"> 3 u: 'abc☺' Line 1,180 ⟶ 1,266: a.i.'abc' 97 98 99</syntaxhighlight> =={{header\|Java}}== In Java, a <code>char</code> is a 2-byte unsigned value, so it will fit within an 4-byte <code>int</code>.<br /> <tt>char</tt> is already an integer type in Java, and it gets automatically promoted to <tt>int</tt>. So you can use a character where you would otherwise use an integer. Conversely, you can use an integer where you would normally use a character, except you may need to cast it, as <tt>char</tt> is smaller. <br /> To convert a character to it's ASCII code, cast the <code>char</code> to an <code>int</code>.<br /> The following will yield <kbd>97</kbd>. <syntaxhighlight lang="java"> (int) 'a' </syntaxhighlight> You could also specify a unicode hexadecimal value, using the <kbd>\u</kbd> escape sequence. <syntaxhighlight lang="java"> (int) '\u0061' </syntaxhighlight> To convert an ASCII code to it's ASCII representation, cast the <code>int</code> value to a <code>char</code>. <syntaxhighlight lang="java"> (char) 97 </syntaxhighlight> <br /> Java also offers the <code>Character</code> class, comprised of several utilities for Unicode based operations.<br /> Here are a few examples.<br /><br /> Get the integer value represented by the ASCII character.<br /> The second parameter here, is the radix. This will return an <code>int</code> with the value of <kbd>1</kbd>. <syntaxhighlight lang="java"> Character.digit('1', 10) </syntaxhighlight> Inversely, get the ASCII representation of the integer.<br /> Again, the second parameter is the radix. This will return a <code>char</code> with the value of '<kbd>1</kbd>'. <syntaxhighlight lang="java"> Character.forDigit(1, 10) </syntaxhighlight> In this case, the <tt>println</tt> method is overloaded to handle integer (outputs the decimal representation) and character (outputs just the character) types differently, so we need to cast it in both cases. ~~<syntaxhighlight lang="java">public class Foo {~~ ~~public static void main(String[] args) {~~ ~~System.out.println((int)'a'); // prints "97"~~ ~~System.out.println((char)97); // prints "a"~~ } ~~}</syntaxhighlight>~~ ~~Java characters support Unicode:~~ ~~<syntaxhighlight lang="java">public class Bar {~~ ~~public static void main(String[] args) {~~ ~~System.out.println((int)'π'); // prints "960"~~ ~~System.out.println((char)960); // prints "π"~~ } ~~}</syntaxhighlight>~~ =={{header\|JavaScript}}== Here character is just a string of length 1 Line 1,265 ⟶ 1,367: {{VI snippet}}<br/> [[File:LabVIEW_Character_codes.png]] =={{header\|Lang}}== {{trans\|Python}} <syntaxhighlight lang="lang"> fn.println(fn.toValue(a)) # Prints "97" fn.println(fn.toChar(97)) # Prints "a" # Unicode fn.println(fn.toValue(π)) # Prints "960" fn.println(fn.toChar(960)) # Prints "π" </syntaxhighlight> =={{header\|Lang5}}== <syntaxhighlight lang="lang5">: CHAR "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[" comb Line 1,278 ⟶ 1,392: Langur has code point literals (enclosed in straight single quotes), which may use escape codes. They are integers. The s2cp(), cp2s(), and ~~cp2s~~s2gc() functions convert between code point integers, grapheme clusters and strings. Also, string indexing is by code point. <syntaxhighlight lang="langur">val .a1 = 'a' Line 1,290 ⟶ 1,404: writeln .a3 == .a4 writeln "numbers: ", join ", ", [.a1, .a2, .a3, .a4, .a5] writeln "letters: ", join ", ", map cp2s, [~~cp2s(~~.a1), ~~cp2s(~~.a2), ~~cp2s(~~.a3), ~~cp2s(~~.a4), ~~cp2s(~~.a5)]</syntaxhighlight> {{out}} Line 1,299 ⟶ 1,413: letters: a, a, a, a, aaaa </pre> =={{header\|Lasso}}== <syntaxhighlight lang="lasso">'a'->integer Line 1,471 ⟶ 1,586: The character for '65' is: A. Press any key to continue...</syntaxhighlight> =={{header\|MiniScript}}== {{trans\|Wren}} MiniScript does not have a ''character'' type as such but one can use single character strings instead. Strings can contain any Unicode code point. <syntaxhighlight lang="miniscript">cps = [] for c in ["a", "π", "字", "🐘"] cp = c.code cps.push cp print c + " = " + cp end for print for i in cps print i + " = " + char(i) end for</syntaxhighlight> {{out}} <pre>a = 97 π = 960 字 = 23383 🐘 = 128024 97 = a 960 = π 23383 = 字 128024 = 🐘 </pre> =={{header\|Modula-2}}== <syntaxhighlight lang="modula2">MODULE asc; Line 1,493 ⟶ 1,634: <syntaxhighlight lang="modula-2">jan@Beryllium:~/modula/rosetta$ ./asc a 97 1</syntaxhighlight> =={{header\|Modula-3}}== The built in functions <code>ORD</code> and <code>VAL</code> work on characters, among other things. Line 1,695 ⟶ 1,837: <syntaxhighlight lang="pascal">writeln(ord('a')); writeln(chr(97));</syntaxhighlight> =={{header\|Plain English}}== <syntaxhighlight> \ Obs: The little-a byte is a byte equal to 97. Write the little-a byte's whereabouts on the console. Put 97 into a number. Write the number's target on the console. </syntaxhighlight> =={{header\|Perl}}== ===Narrow=== Line 1,947 ⟶ 2,096: 'Unicode script', 'Unicode block', 'Added in Unicode version', 'Ordinal(s)', 'Hex ordinal(s)', Line 1,954 ⟶ 2,104: 'Round trip by name', 'Round trip by ordinal' ]».fmt('%~~21s~~25s:') Z [ $_, Line 1,961 ⟶ 2,111: .uniprops('Script').join(', '), .uniprops('Block').join(', '), .uniprops('Age').join(', '), .ords, .ords.fmt('0x%X'), Line 1,972 ⟶ 2,123: }</syntaxhighlight> {{out}} <pre> Character: A Character name: LATIN CAPITAL LETTER A Unicode property: Lu Unicode script: Latin Unicode block: Basic Latin Added in Unicode version: 1.1 ~~Ordinal(s): 65~~ ~~Hex~~ ~~ordinal~~ Ordinal(s): ~~0x41~~65 Hex ~~UTF-8~~ordinal(s): 410x41 UTF-~~16LE~~8: ~~4100~~41 UTF-~~16BE~~16LE: ~~0041~~4100 UTF-16BE: 0041 ~~Round trip by name: A~~ Round trip by ~~ordinal~~name: A Round trip by ordinal: A Character: Α Character name: GREEK CAPITAL LETTER ALPHA Unicode property: Lu Unicode script: Greek Unicode block: Greek and Coptic Added in Unicode version: 1.1 ~~Ordinal(s): 913~~ ~~Hex~~ ~~ordinal~~ Ordinal(s): ~~0x391~~913 Hex ~~UTF-8~~ordinal(s): ~~CE 91~~0x391 UTF-~~16LE~~8: ~~9103~~CE 91 UTF-~~16BE~~16LE: ~~0391~~9103 UTF-16BE: 0391 ~~Round trip by name: Α~~ Round trip by ~~ordinal~~name: Α Round trip by ordinal: Α Character: А Character name: CYRILLIC CAPITAL LETTER A Unicode property: Lu Unicode script: Cyrillic Unicode block: Cyrillic Added in Unicode version: 1.1 ~~Ordinal(s): 1040~~ ~~Hex~~ ~~ordinal~~ Ordinal(s): ~~0x410~~1040 Hex ~~UTF-8~~ordinal(s): ~~D0 90~~0x410 UTF-~~16LE~~8: ~~1004~~D0 90 UTF-~~16BE~~16LE: ~~0410~~1004 UTF-16BE: 0410 ~~Round trip by name: А~~ Round trip by ~~ordinal~~name: А Round trip by ordinal: А Character: 𪚥 Character name: CJK UNIFIED IDEOGRAPH-2A6A5 Unicode property: Lo Unicode script: Han Unicode block: CJK Unified Ideographs Extension B Added in Unicode version: 3.1 ~~Ordinal(s): 173733~~ ~~Hex~~ ~~ordinal~~ Ordinal(s): ~~0x2A6A5~~173733 Hex ~~UTF-8~~ordinal(s): ~~F0 AA 9A A5~~0x2A6A5 UTF-~~16LE~~8: ~~69D8~~F0 AA 9A ~~A5DE~~A5 UTF-~~16BE~~16LE: ~~D869~~69D8 ~~DEA5~~A5DE UTF-16BE: D869 DEA5 ~~Round trip by name: 𪚥~~ Round trip by ~~ordinal~~name: 𪚥 Round trip by ordinal: 𪚥 Character: 🇺🇸 Character name: REGIONAL INDICATOR SYMBOL LETTER U, REGIONAL INDICATOR SYMBOL LETTER S Unicode property: So, So Unicode script: Common, Common Unicode block: Enclosed Alphanumeric Supplement, Enclosed Alphanumeric Supplement Added in Unicode version: 6.0, 6.0 ~~Ordinal(s): 127482 127480~~ ~~Hex~~ ~~ordinal~~ Ordinal(s): ~~0x1F1FA~~127482 ~~0x1F1F8~~127480 Hex ~~UTF-8~~ordinal(s): ~~F0 9F 87 BA F0 9F 87~~0x1F1FA B80x1F1F8 UTF-~~16LE~~8: ~~3CD8~~F0 9F 87 BA F0 ~~FADD~~9F ~~3CD8~~87 ~~F8DD~~B8 UTF-~~16BE~~16LE: ~~D83C~~3CD8 ~~DDFA~~FADD ~~D83C~~3CD8 ~~DDF8~~F8DD UTF-16BE: D83C DDFA D83C DDF8 ~~Round trip by name: 🇺🇸~~ Round trip by ~~ordinal~~name: 🇺🇸 Round trip by ordinal: 🇺🇸 Character: 👨‍👩‍👧‍👦 Character name: MAN, ZERO WIDTH JOINER, WOMAN, ZERO WIDTH JOINER, GIRL, ZERO WIDTH JOINER, BOY Unicode property: So, Cf, So, Cf, So, Cf, So Unicode script: Common, Inherited, Common, Inherited, Common, Inherited, Common Unicode block: Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs Added in Unicode version: 6.0, 1.1, 6.0, 1.1, 6.0, 1.1, 6.0 Ordinal(s): 128104 8205 128105 8205 128103 8205 128102 Hex ordinal(s): 0x1F468 0x200D 0x1F469 0x200D 0x1F467 0x200D 0x1F466 UTF-8: F0 9F 91 A8 E2 80 8D F0 9F 91 A9 E2 80 8D F0 9F 91 A7 E2 80 8D F0 9F 91 A6 UTF-16LE: 3DD8 68DC 0D20 3DD8 69DC 0D20 3DD8 67DC 0D20 3DD8 66DC UTF-16BE: D83D DC68 200D D83D DC69 200D D83D DC67 200D D83D DC66 Round trip by name: 👨‍👩‍👧‍👦 Round trip by ordinal: 👨‍👩‍👧‍👦</pre> ~~Character: 👨‍👩‍👧‍👦~~ ~~Character name: MAN, ZERO WIDTH JOINER, WOMAN, ZERO WIDTH JOINER, GIRL, ZERO WIDTH JOINER, BOY~~ ~~Unicode property: So, Cf, So, Cf, So, Cf, So~~ ~~Unicode script: Common, Inherited, Common, Inherited, Common, Inherited, Common~~ Unicode block: Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs ~~Ordinal(s): 128104 8205 128105 8205 128103 8205 128102~~ ~~Hex ordinal(s): 0x1F468 0x200D 0x1F469 0x200D 0x1F467 0x200D 0x1F466~~ ~~UTF-8: F0 9F 91 A8 E2 80 8D F0 9F 91 A9 E2 80 8D F0 9F 91 A7 E2 80 8D F0 9F 91 A6~~ ~~UTF-16LE: 3DD8 68DC 0D20 3DD8 69DC 0D20 3DD8 67DC 0D20 3DD8 66DC~~ ~~UTF-16BE: D83D DC68 200D D83D DC69 200D D83D DC67 200D D83D DC66~~ ~~Round trip by name: 👨‍👩‍👧‍👦~~ ~~Round trip by ordinal: 👨‍👩‍👧‍👦</pre>~~ =={{header\|RapidQ}}== <syntaxhighlight lang="vb"> Line 2,131 ⟶ 2,289: see char(97) + nl </syntaxhighlight> =={{header\|RPL}}== {{in}} <pre> "a" NUM 97 CHR </pre> {{out}} <pre> 2: 97 1: "a" </pre> =={{header\|Ruby}}== In Ruby 1.9 characters are represented as length-1 strings; same as in Python. The previous "character literal" syntax <tt>?a</tt> is now the same as <tt>"a"</tt>. Subscripting a string also gives a length-1 string. There is now an "ord" method of strings to convert a character into its integer code. Line 2,138 ⟶ 2,308: > 97.chr => "a"</syntaxhighlight> =={{header\|Run BASIC}}== <syntaxhighlight lang="runbasic">print chr$(97) 'prints a Line 2,377 ⟶ 2,548: A 65</pre> =={{header\|SparForte}}== As a structured script. <syntaxhighlight lang="ada">#!/usr/local/bin/spar pragma annotate( summary, "charcode" ) @( description, "Given a character value in your language, print its code (could be" ) @( description, "ASCII code, Unicode code, or whatever your language uses). For example," ) @( description, "the character 'a' (lowercase letter A) has a code of 97 in ASCII (as" ) @( description, "well as Unicode, as ASCII forms the beginning of Unicode). Conversely," ) @( description, "given a code, print out the corresponding character. " ) @( category, "tutorials" ) @( see_also, "http://rosettacode.org/wiki/Character_codes" ) @( author, "Ken O. Burtch"); pragma license( unrestricted ); pragma restriction( no_external_commands ); procedure charcode is code : constant natural := 97; ch : constant character := 'a'; begin put_line( "character code" & strings.image( code ) & " = character " & strings.val( code ) ); put_line( "character " & ch & " = character code" & strings.image( numerics.pos( ch ) ) ); end charcode;</syntaxhighlight> =={{header\|SPL}}== In SPL all characters are used in UTF-16LE encoding. Line 2,532 ⟶ 2,728: test = (chr97,asc`a)</syntaxhighlight> {{Out}}<pre>(`a,97)</pre> =={{header\|Uxntal}}== <syntaxhighlight lang="Uxntal"> ( uxnasm char-codes.tal char-codes.rom && uxncli char-codes.rom ) \|00 @System &vector $2 &expansion $2 &wst $1 &rst $1 &metadata $2 &r $2 &g $2 &b $2 &debug $1 &state $1 \|10 @Console &vector $2 &read $1 &pad $4 &type $1 &write $1 &error $1 \|0100 [ LIT "a ] print-hex newline #61 .Console/write DEO newline ( exit ) #80 .System/state DEO BRK @print-hex DUP #04 SFT print-digit #0f AND print-digit JMP2r @print-digit DUP #09 GTH #27 MUL ADD #30 ADD .Console/write DEO JMP2r @newline #0a .Console/write DEO JMP2r</syntaxhighlight> Output: <pre>61 a</pre> =={{header\|VBA}}== <syntaxhighlight lang="vba">Debug.Print Chr(97) 'Prints a Line 2,576 ⟶ 2,806: =={{header\|Wren}}== Wren does not have a ''character'' type as such but one can use single character strings instead. Strings can contain any Unicode code point. <syntaxhighlight lang="~~ecmascript~~wren">var cps = [] for (c in ["a", "π", "字", "🐘"]) { var cp = c.codePoints[0] Line 2,600 ⟶ 2,830: 128024 = 🐘 </pre> =={{header\|XLISP}}== In a REPL: Line 2,642 ⟶ 2,873: =={{header\|Zig}}== <syntaxhighlight lang="zig">const std = @import("std"); ~~const debug = std.debug;~~ const unicode = std.unicode; pub fn main() !void { ~~test "character codes" {~~ const stdout = std.io.getStdOut().writer(); ~~debug.warn("\n", .{});~~ try characterAsciiCodes(stdout); try characterUnicodeCodes(stdout); } fn characterAsciiCodes(writer: anytype) !void { try writer.writeAll("Sample ASCII characters and codes:\n"); // Zig's string is just an array of bytes (u8). const message: []const u8 = "ABCabc"; for (message) \|val\| { ~~debug~~try writer.~~warn~~print(" '{c}' code: {d} [hexa: 0x{x}]\n", .{ val, val, val }); } try writer.writeByte('\n'); } fn characterUnicodeCodes(writer: anytype) !void { ~~test "character (uni)codes" {~~ try writer.writeAll("Sample Unicode characters and codes:\n"); ~~debug.warn("\n", .{});~~ const message: []const u8 = "あいうえお"; const utf8_view = unicode.Utf8View.initUnchecked(message); Line 2,667 ⟶ 2,904: while (iter.nextCodepoint()) \|val\| { var array: [4]u8 = undefined; ~~var~~const slice = array[0..try unicode.utf8Encode(val, &array)]; ~~debug~~try writer.~~warn~~print(" '{s}' code: {d} [hexa: U+{x}]\n", .{ slice, val, val }); } try writer.writeByte('\n'); }</syntaxhighlight> {{out}} <pre>~~Test~~Sample ~~[1/2]~~ASCII ~~test~~characters ~~"character~~and codes~~"...~~: 'A' code: 65 [hexa: 0x41] 'B' code: 66 [hexa: 0x42] Line 2,681 ⟶ 2,919: 'b' code: 98 [hexa: 0x62] 'c' code: 99 [hexa: 0x63] ~~Test [2/2] test "character (uni)codes"...~~ Sample Unicode characters and codes: 'あ' code: 12354 [hexa: U+3042] 'い' code: 12356 [hexa: U+3044] 'う' code: 12358 [hexa: U+3046] 'え' code: 12360 [hexa: U+3048] 'お' code: 12362 [hexa: U+304a]</pre> ~~All 2 tests passed.</pre>~~ =={{header\|zkl}}== The character set is 8 bit ASCII (but doesn't care if you use UTF-8 or unicode characters).