Character codes: Difference between revisions

← Older edit

Character codes (view source)

Revision as of 17:21, 18 April 2024

4,058 bytes added , 1 month ago

m

Added Plain English

Elenderg

18

edits

Revision as of 16:45, 14 February 2023 (view source) Aerobar (talk \| contribs) (→‎{{header\|RPL}}) ← Older edit		Latest revision as of 17:21, 18 April 2024 (view source) Elenderg (talk \| contribs) m (Added Plain English)
(17 intermediate revisions by 13 users not shown)
Line 493: babel =={{header\|BASIC}}== {{works with\|QBasic\|1.1}} {{works with\|QuickBasic\|4.5}} <syntaxhighlight lang="qbasic">charCode = 97 Line 527 ⟶ 528: $: 36 €: 8364, 36</pre> ==={{header\|Chipmunk Basic}}=== <syntaxhighlight lang="qbasic">10 print "a - > ";asc("a") 20 print "98 -> ";chr$(98)</syntaxhighlight> ==={{header\|Commodore BASIC}}=== Line 536 ⟶ 541: {{Out}}<pre>A 66</pre> ==={{header\|GW-BASIC}}=== {{works with\|PC-BASIC\|any}} {{works with\|BASICA}} <syntaxhighlight lang="qbasic">10 PRINT "a - > "; ASC("a") 20 PRINT "98 -> "; CHR$(98)</syntaxhighlight> ==={{header\|IS-BASIC}}=== <syntaxhighlight lang="is-basic">100 PRINT ORD("A") 110 PRINT CHR$(65)</syntaxhighlight> ==={{header\|MSX Basic}}=== {{works with\|MSX BASIC\|any}} <syntaxhighlight lang="qbasic">10 PRINT "a - > "; ASC("a") 20 PRINT "98 -> "; CHR$(98)</syntaxhighlight> ==={{header\|QBasic}}=== {{works with\|BASICA}} {{works with\|Chipmunk Basic}} {{works with\|FreeBASIC}} {{works with\|GW-BASIC}} {{works with\|MSX BASIC}} {{works with\|PC-BASIC}} {{works with\|Run BASIC}} {{works with\|Yabasic}} <syntaxhighlight lang="qbasic">PRINT "a - > "; ASC("a") PRINT "98 -> "; CHR$(98)</syntaxhighlight> Line 555 ⟶ 577: <pre>38 A</pre> ==={{header\|SmallBASIC}}=== <syntaxhighlight lang="qbasic"> Print "a -> "; Asc("a") Print "98 -> "; Chr(98) </syntaxhighlight> ==={{header\|True BASIC}}=== Line 560 ⟶ 588: PRINT "98 -> "; chr$(98) END</syntaxhighlight> ==={{header\|XBasic}}=== {{works with\|Windows XBasic}} {{works with\|Linux XBasic}} <syntaxhighlight lang="qbasic">PROGRAM "Character codes" VERSION "0.0000" DECLARE FUNCTION Entry () FUNCTION Entry () PRINT "a - >"; ASC("a") PRINT "98 -> "; CHR$(98) END FUNCTION END PROGRAM</syntaxhighlight> ==={{header\|Yabasic}}=== Line 839 ⟶ 881: =={{header\|Ecstasy}}== <syntaxhighlight lang="java"> module CharacterCodes { { @Inject Console console; void run() { for (Char char : ['\0', '\d', 'A', '$', '¢', '~', '˜']) { { ~~for (Char char : ['\0', '\d', 'A', '$', '¢', '~', '˜'])~~ { // character to its integer value UInt32 codepoint = char.codepoint; Line 858 ⟶ 897: \| char from codepoint={fromCodePoint.quoted()} ); } } } } </syntaxhighlight> Line 909 ⟶ 948: Limitations: There is no "put_character_32" feature for standard io (FILE class), so there appears to be no way to print Unicode characters. =={{header\|Elena}}== ELENA 46.x : <syntaxhighlight lang="elena">import extensions; Line 916 ⟶ 955: var ch := $97; console.printLine:(ch); console.printLine(ch.toInt()) }</syntaxhighlight> Line 924 ⟶ 963: 97 </pre> =={{header\|Elixir}}== A String in Elixir is a UTF-8 encoded binary. Line 933 ⟶ 973: <syntaxhighlight lang="lisp">(string-to-char "a") ;=> 97 (format "%c" 97) ;=> "a"</syntaxhighlight> =={{header\|EMal}}== <syntaxhighlight lang="emal"> ^\|ord and chr work with Unicode code points\|^ writeLine(ord("a")) # prints "97" writeLine(chr(97)) # prints "a" writeLine(ord("π")) # prints "960" writeLine(chr(960)) # prints "π" writeLine() var cps = int[] for each var c in text["a", "π", "字", "🐘"] var cp = ord(c) cps.append(cp) writeLine(c + " = " + cp) end writeLine() for each int i in cps var c = chr(i) writeLine(i + " = " + c) end </syntaxhighlight> {{out}} <pre> 97 a 960 π a = 97 π = 960 字 = 23383 🐘 = 128024 97 = a 960 = π 23383 = 字 128024 = 🐘 </pre> =={{header\|Erlang}}== In Erlang, lists and strings are the same, only the representation changes. Thus: Line 967 ⟶ 1,046: fansh> 'a'.toInt 97</syntaxhighlight> =={{header\|Fennel}}== <syntaxhighlight lang="fennel"> (string.byte :A) ; 65 (string.char 65) ; "A" </syntaxhighlight> =={{header\|Forth}}== As with C, characters are just integers on the stack which are treated as ASCII. Line 1,182 ⟶ 1,268: =={{header\|Java}}== In Java, a <code>char</code> is a 2-byte unsigned value, so it will fit within an 4-byte <code>int</code>.<br /> <tt>char</tt> is already an integer type in Java, and it gets automatically promoted to <tt>int</tt>. So you can use a character where you would otherwise use an integer. Conversely, you can use an integer where you would normally use a character, except you may need to cast it, as <tt>char</tt> is smaller. <br /> To convert a character to it's ASCII code, cast the <code>char</code> to an <code>int</code>.<br /> The following will yield <kbd>97</kbd>. <syntaxhighlight lang="java"> (int) 'a' </syntaxhighlight> You could also specify a unicode hexadecimal value, using the <kbd>\u</kbd> escape sequence. <syntaxhighlight lang="java"> (int) '\u0061' </syntaxhighlight> To convert an ASCII code to it's ASCII representation, cast the <code>int</code> value to a <code>char</code>. <syntaxhighlight lang="java"> (char) 97 </syntaxhighlight> <br /> Java also offers the <code>Character</code> class, comprised of several utilities for Unicode based operations.<br /> Here are a few examples.<br /><br /> Get the integer value represented by the ASCII character.<br /> The second parameter here, is the radix. This will return an <code>int</code> with the value of <kbd>1</kbd>. <syntaxhighlight lang="java"> Character.digit('1', 10) </syntaxhighlight> Inversely, get the ASCII representation of the integer.<br /> Again, the second parameter is the radix. This will return a <code>char</code> with the value of '<kbd>1</kbd>'. <syntaxhighlight lang="java"> Character.forDigit(1, 10) </syntaxhighlight> In this case, the <tt>println</tt> method is overloaded to handle integer (outputs the decimal representation) and character (outputs just the character) types differently, so we need to cast it in both cases. ~~<syntaxhighlight lang="java">public class Foo {~~ ~~public static void main(String[] args) {~~ ~~System.out.println((int)'a'); // prints "97"~~ ~~System.out.println((char)97); // prints "a"~~ } ~~}</syntaxhighlight>~~ ~~Java characters support Unicode:~~ ~~<syntaxhighlight lang="java">public class Bar {~~ ~~public static void main(String[] args) {~~ ~~System.out.println((int)'π'); // prints "960"~~ ~~System.out.println((char)960); // prints "π"~~ } ~~}</syntaxhighlight>~~ =={{header\|JavaScript}}== Here character is just a string of length 1 Line 1,266 ⟶ 1,367: {{VI snippet}}<br/> [[File:LabVIEW_Character_codes.png]] =={{header\|Lang}}== {{trans\|Python}} <syntaxhighlight lang="lang"> fn.println(fn.toValue(a)) # Prints "97" fn.println(fn.toChar(97)) # Prints "a" # Unicode fn.println(fn.toValue(π)) # Prints "960" fn.println(fn.toChar(960)) # Prints "π" </syntaxhighlight> =={{header\|Lang5}}== <syntaxhighlight lang="lang5">: CHAR "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[" comb Line 1,279 ⟶ 1,392: Langur has code point literals (enclosed in straight single quotes), which may use escape codes. They are integers. The s2cp(), cp2s(), and ~~cp2s~~s2gc() functions convert between code point integers, grapheme clusters and strings. Also, string indexing is by code point. <syntaxhighlight lang="langur">val .a1 = 'a' Line 1,291 ⟶ 1,404: writeln .a3 == .a4 writeln "numbers: ", join ", ", [.a1, .a2, .a3, .a4, .a5] writeln "letters: ", join ", ", map cp2s, [~~cp2s(~~.a1), ~~cp2s(~~.a2), ~~cp2s(~~.a3), ~~cp2s(~~.a4), ~~cp2s(~~.a5)]</syntaxhighlight> {{out}} Line 1,300 ⟶ 1,413: letters: a, a, a, a, aaaa </pre> =={{header\|Lasso}}== <syntaxhighlight lang="lasso">'a'->integer Line 1,472 ⟶ 1,586: The character for '65' is: A. Press any key to continue...</syntaxhighlight> =={{header\|MiniScript}}== {{trans\|Wren}} MiniScript does not have a ''character'' type as such but one can use single character strings instead. Strings can contain any Unicode code point. <syntaxhighlight lang="miniscript">cps = [] for c in ["a", "π", "字", "🐘"] cp = c.code cps.push cp print c + " = " + cp end for print for i in cps print i + " = " + char(i) end for</syntaxhighlight> {{out}} <pre>a = 97 π = 960 字 = 23383 🐘 = 128024 97 = a 960 = π 23383 = 字 128024 = 🐘 </pre> =={{header\|Modula-2}}== <syntaxhighlight lang="modula2">MODULE asc; Line 1,494 ⟶ 1,634: <syntaxhighlight lang="modula-2">jan@Beryllium:~/modula/rosetta$ ./asc a 97 1</syntaxhighlight> =={{header\|Modula-3}}== The built in functions <code>ORD</code> and <code>VAL</code> work on characters, among other things. Line 1,696 ⟶ 1,837: <syntaxhighlight lang="pascal">writeln(ord('a')); writeln(chr(97));</syntaxhighlight> =={{header\|Plain English}}== <syntaxhighlight> \ Obs: The little-a byte is a byte equal to 97. Write the little-a byte's whereabouts on the console. Put 97 into a number. Write the number's target on the console. </syntaxhighlight> =={{header\|Perl}}== ===Narrow=== Line 2,580 ⟶ 2,728: test = (chr97,asc`a)</syntaxhighlight> {{Out}}<pre>(`a,97)</pre> =={{header\|Uxntal}}== <syntaxhighlight lang="Uxntal"> ( uxnasm char-codes.tal char-codes.rom && uxncli char-codes.rom ) \|00 @System &vector $2 &expansion $2 &wst $1 &rst $1 &metadata $2 &r $2 &g $2 &b $2 &debug $1 &state $1 \|10 @Console &vector $2 &read $1 &pad $4 &type $1 &write $1 &error $1 \|0100 [ LIT "a ] print-hex newline #61 .Console/write DEO newline ( exit ) #80 .System/state DEO BRK @print-hex DUP #04 SFT print-digit #0f AND print-digit JMP2r @print-digit DUP #09 GTH #27 MUL ADD #30 ADD .Console/write DEO JMP2r @newline #0a .Console/write DEO JMP2r</syntaxhighlight> Output: <pre>61 a</pre> =={{header\|VBA}}== <syntaxhighlight lang="vba">Debug.Print Chr(97) 'Prints a Line 2,624 ⟶ 2,806: =={{header\|Wren}}== Wren does not have a ''character'' type as such but one can use single character strings instead. Strings can contain any Unicode code point. <syntaxhighlight lang="~~ecmascript~~wren">var cps = [] for (c in ["a", "π", "字", "🐘"]) { var cp = c.codePoints[0] Line 2,648 ⟶ 2,830: 128024 = 🐘 </pre> =={{header\|XLISP}}== In a REPL: Line 2,690 ⟶ 2,873: =={{header\|Zig}}== <syntaxhighlight lang="zig">const std = @import("std"); ~~const debug = std.debug;~~ const unicode = std.unicode; pub fn main() !void { ~~test "character codes" {~~ const stdout = std.io.getStdOut().writer(); ~~debug.warn("\n", .{});~~ try characterAsciiCodes(stdout); try characterUnicodeCodes(stdout); } fn characterAsciiCodes(writer: anytype) !void { try writer.writeAll("Sample ASCII characters and codes:\n"); // Zig's string is just an array of bytes (u8). const message: []const u8 = "ABCabc"; for (message) \|val\| { ~~debug~~try writer.~~warn~~print(" '{c}' code: {d} [hexa: 0x{x}]\n", .{ val, val, val }); } try writer.writeByte('\n'); } fn characterUnicodeCodes(writer: anytype) !void { ~~test "character (uni)codes" {~~ try writer.writeAll("Sample Unicode characters and codes:\n"); ~~debug.warn("\n", .{});~~ const message: []const u8 = "あいうえお"; const utf8_view = unicode.Utf8View.initUnchecked(message); Line 2,715 ⟶ 2,904: while (iter.nextCodepoint()) \|val\| { var array: [4]u8 = undefined; ~~var~~const slice = array[0..try unicode.utf8Encode(val, &array)]; ~~debug~~try writer.~~warn~~print(" '{s}' code: {d} [hexa: U+{x}]\n", .{ slice, val, val }); } try writer.writeByte('\n'); }</syntaxhighlight> {{out}} <pre>~~Test~~Sample ~~[1/2]~~ASCII ~~test~~characters ~~"character~~and codes~~"...~~: 'A' code: 65 [hexa: 0x41] 'B' code: 66 [hexa: 0x42] Line 2,729 ⟶ 2,919: 'b' code: 98 [hexa: 0x62] 'c' code: 99 [hexa: 0x63] ~~Test [2/2] test "character (uni)codes"...~~ Sample Unicode characters and codes: 'あ' code: 12354 [hexa: U+3042] 'い' code: 12356 [hexa: U+3044] 'う' code: 12358 [hexa: U+3046] 'え' code: 12360 [hexa: U+3048] 'お' code: 12362 [hexa: U+304a]</pre> ~~All 2 tests passed.</pre>~~ =={{header\|zkl}}== The character set is 8 bit ASCII (but doesn't care if you use UTF-8 or unicode characters).