Character codes: Difference between revisions

m
Added Plain English
m (Added Plain English)
 
(23 intermediate revisions by 17 users not shown)
Line 493:
babel
=={{header|BASIC}}==
{{works with|QBasic|1.1}}
{{works with|QuickBasic|4.5}}
<syntaxhighlight lang="qbasic">charCode = 97
Line 527 ⟶ 528:
$: 36
€: 8364, 36</pre>
 
==={{header|Chipmunk Basic}}===
<syntaxhighlight lang="qbasic">10 print "a - > ";asc("a")
20 print "98 -> ";chr$(98)</syntaxhighlight>
 
==={{header|Commodore BASIC}}===
Line 536 ⟶ 541:
{{Out}}<pre>A
66</pre>
 
==={{header|GW-BASIC}}===
{{works with|PC-BASIC|any}}
{{works with|BASICA}}
<syntaxhighlight lang="qbasic">10 PRINT "a - > "; ASC("a")
20 PRINT "98 -> "; CHR$(98)</syntaxhighlight>
 
==={{header|IS-BASIC}}===
<syntaxhighlight lang="is-basic">100 PRINT ORD("A")
110 PRINT CHR$(65)</syntaxhighlight>
 
==={{header|MSX Basic}}===
{{works with|MSX BASIC|any}}
<syntaxhighlight lang="qbasic">10 PRINT "a - > "; ASC("a")
20 PRINT "98 -> "; CHR$(98)</syntaxhighlight>
 
==={{header|QBasic}}===
{{works with|BASICA}}
{{works with|Chipmunk Basic}}
{{works with|FreeBASIC}}
{{works with|GW-BASIC}}
{{works with|MSX BASIC}}
{{works with|PC-BASIC}}
{{works with|Run BASIC}}
{{works with|Yabasic}}
<syntaxhighlight lang="qbasic">PRINT "a - > "; ASC("a")
PRINT "98 -> "; CHR$(98)</syntaxhighlight>
Line 555 ⟶ 577:
<pre>38
A</pre>
 
==={{header|SmallBASIC}}===
<syntaxhighlight lang="qbasic">
Print "a -> "; Asc("a")
Print "98 -> "; Chr(98)
</syntaxhighlight>
 
==={{header|True BASIC}}===
Line 560 ⟶ 588:
PRINT "98 -> "; chr$(98)
END</syntaxhighlight>
 
==={{header|XBasic}}===
{{works with|Windows XBasic}}
{{works with|Linux XBasic}}
<syntaxhighlight lang="qbasic">PROGRAM "Character codes"
VERSION "0.0000"
 
DECLARE FUNCTION Entry ()
 
FUNCTION Entry ()
PRINT "a - >"; ASC("a")
PRINT "98 -> "; CHR$(98)
END FUNCTION
END PROGRAM</syntaxhighlight>
 
==={{header|Yabasic}}===
Line 839 ⟶ 881:
=={{header|Ecstasy}}==
<syntaxhighlight lang="java">
module CharacterCodes {
{
@Inject Console console;
void run() {
for (Char char : ['\0', '\d', 'A', '$', '¢', '~', '˜']) {
{
for (Char char : ['\0', '\d', 'A', '$', '¢', '~', '˜'])
{
// character to its integer value
UInt32 codepoint = char.codepoint;
Line 858 ⟶ 897:
| char from codepoint={fromCodePoint.quoted()}
);
}
}
}
}
</syntaxhighlight>
 
Line 909 ⟶ 948:
Limitations: There is no "put_character_32" feature for standard io (FILE class), so there appears to be no way to print Unicode characters.
=={{header|Elena}}==
ELENA 46.x :
<syntaxhighlight lang="elena">import extensions;
 
Line 916 ⟶ 955:
var ch := $97;
 
console.printLine:(ch);
console.printLine(ch.toInt())
}</syntaxhighlight>
Line 924 ⟶ 963:
97
</pre>
 
=={{header|Elixir}}==
A String in Elixir is a UTF-8 encoded binary.
Line 933 ⟶ 973:
<syntaxhighlight lang="lisp">(string-to-char "a") ;=> 97
(format "%c" 97) ;=> "a"</syntaxhighlight>
 
=={{header|EMal}}==
<syntaxhighlight lang="emal">
^|ord and chr work with Unicode code points|^
writeLine(ord("a")) # prints "97"
writeLine(chr(97)) # prints "a"
writeLine(ord("π")) # prints "960"
writeLine(chr(960)) # prints "π"
writeLine()
var cps = int[]
for each var c in text["a", "π", "字", "🐘"]
var cp = ord(c)
cps.append(cp)
writeLine(c + " = " + cp)
end
writeLine()
for each int i in cps
var c = chr(i)
writeLine(i + " = " + c)
end
</syntaxhighlight>
{{out}}
<pre>
97
a
960
π
 
a = 97
π = 960
字 = 23383
🐘 = 128024
 
97 = a
960 = π
23383 = 字
128024 = 🐘
</pre>
 
=={{header|Erlang}}==
In Erlang, lists and strings are the same, only the representation changes. Thus:
Line 967 ⟶ 1,046:
fansh> 'a'.toInt
97</syntaxhighlight>
 
=={{header|Fennel}}==
<syntaxhighlight lang="fennel">
(string.byte :A) ; 65
(string.char 65) ; "A"
</syntaxhighlight>
 
=={{header|Forth}}==
As with C, characters are just integers on the stack which are treated as ASCII.
Line 1,169 ⟶ 1,255:
97 98 99 9786</syntaxhighlight>
 
<code>7 u:</code> converts fromto utf-16 (<code>8 u:</code> would convert to utf-8, and <code>9 u:</code> would convert to utf-32), and <code>3 u:</code> converts what the uncode consortium calls "code points" to numeric form. Since J character literals are utf-8 (primarily because that's how OS interfaces work), by itself <code>3 u:</code> would give us:
 
<syntaxhighlight lang="j"> 3 u: 'abc☺'
Line 1,180 ⟶ 1,266:
a.i.'abc'
97 98 99</syntaxhighlight>
 
=={{header|Java}}==
In Java, a <code>char</code> is a 2-byte unsigned value, so it will fit within an 4-byte <code>int</code>.<br />
<tt>char</tt> is already an integer type in Java, and it gets automatically promoted to <tt>int</tt>. So you can use a character where you would otherwise use an integer. Conversely, you can use an integer where you would normally use a character, except you may need to cast it, as <tt>char</tt> is smaller.
<br />
To convert a character to it's ASCII code, cast the <code>char</code> to an <code>int</code>.<br />
The following will yield <kbd>97</kbd>.
<syntaxhighlight lang="java">
(int) 'a'
</syntaxhighlight>
You could also specify a unicode hexadecimal value, using the <kbd>\u</kbd> escape sequence.
<syntaxhighlight lang="java">
(int) '\u0061'
</syntaxhighlight>
To convert an ASCII code to it's ASCII representation, cast the <code>int</code> value to a <code>char</code>.
<syntaxhighlight lang="java">
(char) 97
</syntaxhighlight>
<br />
Java also offers the <code>Character</code> class, comprised of several utilities for Unicode based operations.<br />
Here are a few examples.<br /><br />
Get the integer value represented by the ASCII character.<br />
The second parameter here, is the radix.
This will return an <code>int</code> with the value of <kbd>1</kbd>.
<syntaxhighlight lang="java">
Character.digit('1', 10)
</syntaxhighlight>
Inversely, get the ASCII representation of the integer.<br />
Again, the second parameter is the radix.
This will return a <code>char</code> with the value of '<kbd>1</kbd>'.
<syntaxhighlight lang="java">
Character.forDigit(1, 10)
</syntaxhighlight>
 
In this case, the <tt>println</tt> method is overloaded to handle integer (outputs the decimal representation) and character (outputs just the character) types differently, so we need to cast it in both cases.
<syntaxhighlight lang="java">public class Foo {
public static void main(String[] args) {
System.out.println((int)'a'); // prints "97"
System.out.println((char)97); // prints "a"
}
}</syntaxhighlight>
Java characters support Unicode:
<syntaxhighlight lang="java">public class Bar {
public static void main(String[] args) {
System.out.println((int)'π'); // prints "960"
System.out.println((char)960); // prints "π"
}
}</syntaxhighlight>
=={{header|JavaScript}}==
Here character is just a string of length 1
Line 1,265 ⟶ 1,367:
{{VI snippet}}<br/>
[[File:LabVIEW_Character_codes.png]]
 
=={{header|Lang}}==
{{trans|Python}}
<syntaxhighlight lang="lang">
fn.println(fn.toValue(a)) # Prints "97"
fn.println(fn.toChar(97)) # Prints "a"
 
# Unicode
fn.println(fn.toValue(π)) # Prints "960"
fn.println(fn.toChar(960)) # Prints "π"
</syntaxhighlight>
 
=={{header|Lang5}}==
<syntaxhighlight lang="lang5">: CHAR "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[" comb
Line 1,278 ⟶ 1,392:
Langur has code point literals (enclosed in straight single quotes), which may use escape codes. They are integers.
 
The s2cp(), cp2s(), and cp2ss2gc() functions convert between code point integers, grapheme clusters and strings. Also, string indexing is by code point.
 
<syntaxhighlight lang="langur">val .a1 = 'a'
Line 1,290 ⟶ 1,404:
writeln .a3 == .a4
writeln "numbers: ", join ", ", [.a1, .a2, .a3, .a4, .a5]
writeln "letters: ", join ", ", map cp2s, [cp2s(.a1), cp2s(.a2), cp2s(.a3), cp2s(.a4), cp2s(.a5)]</syntaxhighlight>
 
{{out}}
Line 1,299 ⟶ 1,413:
letters: a, a, a, a, aaaa
</pre>
 
=={{header|Lasso}}==
<syntaxhighlight lang="lasso">'a'->integer
Line 1,471 ⟶ 1,586:
The character for '65' is: A.
Press any key to continue...</syntaxhighlight>
=={{header|MiniScript}}==
{{trans|Wren}}
MiniScript does not have a ''character'' type as such but one can use single character strings instead. Strings can contain any Unicode code point.
<syntaxhighlight lang="miniscript">cps = []
for c in ["a", "π", "字", "🐘"]
cp = c.code
cps.push cp
print c + " = " + cp
end for
print
for i in cps
print i + " = " + char(i)
end for</syntaxhighlight>
 
{{out}}
<pre>a = 97
π = 960
字 = 23383
🐘 = 128024
 
97 = a
960 = π
23383 = 字
128024 = 🐘
</pre>
 
=={{header|Modula-2}}==
<syntaxhighlight lang="modula2">MODULE asc;
Line 1,493 ⟶ 1,634:
<syntaxhighlight lang="modula-2">jan@Beryllium:~/modula/rosetta$ ./asc
a 97 1</syntaxhighlight>
 
=={{header|Modula-3}}==
The built in functions <code>ORD</code> and <code>VAL</code> work on characters, among other things.
Line 1,695 ⟶ 1,837:
<syntaxhighlight lang="pascal">writeln(ord('a'));
writeln(chr(97));</syntaxhighlight>
=={{header|Plain English}}==
<syntaxhighlight>
\ Obs: The little-a byte is a byte equal to 97.
Write the little-a byte's whereabouts on the console.
Put 97 into a number.
Write the number's target on the console.
</syntaxhighlight>
=={{header|Perl}}==
===Narrow===
Line 1,947 ⟶ 2,096:
'Unicode script',
'Unicode block',
'Added in Unicode version',
'Ordinal(s)',
'Hex ordinal(s)',
Line 1,954 ⟶ 2,104:
'Round trip by name',
'Round trip by ordinal'
]».fmt('%21s25s:')
Z
[ $_,
Line 1,961 ⟶ 2,111:
.uniprops('Script').join(', '),
.uniprops('Block').join(', '),
.uniprops('Age').join(', '),
.ords,
.ords.fmt('0x%X'),
Line 1,972 ⟶ 2,123:
}</syntaxhighlight>
{{out}}
<pre> Character: A
Character name: LATIN CAPITAL LETTER A
Unicode property: Lu
Unicode script: Latin
Unicode block: Basic Latin
Added in Unicode version: 1.1
Ordinal(s): 65
Hex ordinal Ordinal(s): 0x4165
Hex UTF-8ordinal(s): 410x41
UTF-16LE8: 410041
UTF-16BE16LE: 00414100
UTF-16BE: 0041
Round trip by name: A
Round trip by ordinalname: A
Round trip by ordinal: A
 
Character: Α
Character name: GREEK CAPITAL LETTER ALPHA
Unicode property: Lu
Unicode script: Greek
Unicode block: Greek and Coptic
Added in Unicode version: 1.1
Ordinal(s): 913
Hex ordinal Ordinal(s): 0x391913
Hex UTF-8ordinal(s): CE 910x391
UTF-16LE8: 9103CE 91
UTF-16BE16LE: 03919103
UTF-16BE: 0391
Round trip by name: Α
Round trip by ordinalname: Α
Round trip by ordinal: Α
 
Character: А
Character name: CYRILLIC CAPITAL LETTER A
Unicode property: Lu
Unicode script: Cyrillic
Unicode block: Cyrillic
Added in Unicode version: 1.1
Ordinal(s): 1040
Hex ordinal Ordinal(s): 0x4101040
Hex UTF-8ordinal(s): D0 900x410
UTF-16LE8: 1004D0 90
UTF-16BE16LE: 04101004
UTF-16BE: 0410
Round trip by name: А
Round trip by ordinalname: А
Round trip by ordinal: А
 
Character: 𪚥
Character name: CJK UNIFIED IDEOGRAPH-2A6A5
Unicode property: Lo
Unicode script: Han
Unicode block: CJK Unified Ideographs Extension B
Added in Unicode version: 3.1
Ordinal(s): 173733
Hex ordinal Ordinal(s): 0x2A6A5173733
Hex UTF-8ordinal(s): F0 AA 9A A50x2A6A5
UTF-16LE8: 69D8F0 AA 9A A5DEA5
UTF-16BE16LE: D86969D8 DEA5A5DE
UTF-16BE: D869 DEA5
Round trip by name: 𪚥
Round trip by ordinalname: 𪚥
Round trip by ordinal: 𪚥
 
Character: 🇺🇸
Character name: REGIONAL INDICATOR SYMBOL LETTER U, REGIONAL INDICATOR SYMBOL LETTER S
Unicode property: So, So
Unicode script: Common, Common
Unicode block: Enclosed Alphanumeric Supplement, Enclosed Alphanumeric Supplement
Added in Unicode version: 6.0, 6.0
Ordinal(s): 127482 127480
Hex ordinal Ordinal(s): 0x1F1FA127482 0x1F1F8127480
Hex UTF-8ordinal(s): F0 9F 87 BA F0 9F 870x1F1FA B80x1F1F8
UTF-16LE8: 3CD8F0 9F 87 BA F0 FADD9F 3CD887 F8DDB8
UTF-16BE16LE: D83C3CD8 DDFAFADD D83C3CD8 DDF8F8DD
UTF-16BE: D83C DDFA D83C DDF8
Round trip by name: 🇺🇸
Round trip by ordinalname: 🇺🇸
Round trip by ordinal: 🇺🇸
 
Character: 👨‍👩‍👧‍👦
Character name: MAN, ZERO WIDTH JOINER, WOMAN, ZERO WIDTH JOINER, GIRL, ZERO WIDTH JOINER, BOY
Unicode property: So, Cf, So, Cf, So, Cf, So
Unicode script: Common, Inherited, Common, Inherited, Common, Inherited, Common
Unicode block: Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs
Added in Unicode version: 6.0, 1.1, 6.0, 1.1, 6.0, 1.1, 6.0
Ordinal(s): 128104 8205 128105 8205 128103 8205 128102
Hex ordinal(s): 0x1F468 0x200D 0x1F469 0x200D 0x1F467 0x200D 0x1F466
UTF-8: F0 9F 91 A8 E2 80 8D F0 9F 91 A9 E2 80 8D F0 9F 91 A7 E2 80 8D F0 9F 91 A6
UTF-16LE: 3DD8 68DC 0D20 3DD8 69DC 0D20 3DD8 67DC 0D20 3DD8 66DC
UTF-16BE: D83D DC68 200D D83D DC69 200D D83D DC67 200D D83D DC66
Round trip by name: 👨‍👩‍👧‍👦
Round trip by ordinal: 👨‍👩‍👧‍👦</pre>
 
Character: 👨‍👩‍👧‍👦
Character name: MAN, ZERO WIDTH JOINER, WOMAN, ZERO WIDTH JOINER, GIRL, ZERO WIDTH JOINER, BOY
Unicode property: So, Cf, So, Cf, So, Cf, So
Unicode script: Common, Inherited, Common, Inherited, Common, Inherited, Common
Unicode block: Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs, General Punctuation, Miscellaneous Symbols and Pictographs
Ordinal(s): 128104 8205 128105 8205 128103 8205 128102
Hex ordinal(s): 0x1F468 0x200D 0x1F469 0x200D 0x1F467 0x200D 0x1F466
UTF-8: F0 9F 91 A8 E2 80 8D F0 9F 91 A9 E2 80 8D F0 9F 91 A7 E2 80 8D F0 9F 91 A6
UTF-16LE: 3DD8 68DC 0D20 3DD8 69DC 0D20 3DD8 67DC 0D20 3DD8 66DC
UTF-16BE: D83D DC68 200D D83D DC69 200D D83D DC67 200D D83D DC66
Round trip by name: 👨‍👩‍👧‍👦
Round trip by ordinal: 👨‍👩‍👧‍👦</pre>
=={{header|RapidQ}}==
<syntaxhighlight lang="vb">
Line 2,131 ⟶ 2,289:
see char(97) + nl
</syntaxhighlight>
=={{header|RPL}}==
{{in}}
<pre>
"a" NUM
97 CHR
</pre>
{{out}}
<pre>
2: 97
1: "a"
</pre>
 
=={{header|Ruby}}==
In Ruby 1.9 characters are represented as length-1 strings; same as in Python. The previous "character literal" syntax <tt>?a</tt> is now the same as <tt>"a"</tt>. Subscripting a string also gives a length-1 string. There is now an "ord" method of strings to convert a character into its integer code.
Line 2,138 ⟶ 2,308:
> 97.chr
=> "a"</syntaxhighlight>
 
=={{header|Run BASIC}}==
<syntaxhighlight lang="runbasic">print chr$(97) 'prints a
Line 2,377 ⟶ 2,548:
A
65</pre>
 
=={{header|SparForte}}==
As a structured script.
<syntaxhighlight lang="ada">#!/usr/local/bin/spar
pragma annotate( summary, "charcode" )
@( description, "Given a character value in your language, print its code (could be" )
@( description, "ASCII code, Unicode code, or whatever your language uses). For example," )
@( description, "the character 'a' (lowercase letter A) has a code of 97 in ASCII (as" )
@( description, "well as Unicode, as ASCII forms the beginning of Unicode). Conversely," )
@( description, "given a code, print out the corresponding character. " )
@( category, "tutorials" )
@( see_also, "http://rosettacode.org/wiki/Character_codes" )
@( author, "Ken O. Burtch");
pragma license( unrestricted );
 
pragma restriction( no_external_commands );
 
procedure charcode is
code : constant natural := 97;
ch : constant character := 'a';
begin
put_line( "character code" & strings.image( code ) & " = character " & strings.val( code ) );
put_line( "character " & ch & " = character code" & strings.image( numerics.pos( ch ) ) );
end charcode;</syntaxhighlight>
 
=={{header|SPL}}==
In SPL all characters are used in UTF-16LE encoding.
Line 2,532 ⟶ 2,728:
test = (chr97,asc`a)</syntaxhighlight>
{{Out}}<pre>(`a,97)</pre>
 
=={{header|Uxntal}}==
<syntaxhighlight lang="Uxntal">
( uxnasm char-codes.tal char-codes.rom && uxncli char-codes.rom )
 
|00 @System &vector $2 &expansion $2 &wst $1 &rst $1 &metadata $2 &r $2 &g $2 &b $2 &debug $1 &state $1
|10 @Console &vector $2 &read $1 &pad $4 &type $1 &write $1 &error $1
 
|0100
[ LIT "a ] print-hex
newline
#61 .Console/write DEO
newline
 
( exit )
#80 .System/state DEO
BRK
 
@print-hex
DUP #04 SFT print-digit #0f AND print-digit
JMP2r
 
@print-digit
DUP #09 GTH #27 MUL ADD #30 ADD .Console/write DEO
JMP2r
 
@newline
#0a .Console/write DEO
JMP2r</syntaxhighlight>
 
Output:
<pre>61
a</pre>
 
=={{header|VBA}}==
<syntaxhighlight lang="vba">Debug.Print Chr(97) 'Prints a
Line 2,576 ⟶ 2,806:
=={{header|Wren}}==
Wren does not have a ''character'' type as such but one can use single character strings instead. Strings can contain any Unicode code point.
<syntaxhighlight lang="ecmascriptwren">var cps = []
for (c in ["a", "π", "字", "🐘"]) {
var cp = c.codePoints[0]
Line 2,600 ⟶ 2,830:
128024 = 🐘
</pre>
 
=={{header|XLISP}}==
In a REPL:
Line 2,642 ⟶ 2,873:
=={{header|Zig}}==
<syntaxhighlight lang="zig">const std = @import("std");
 
const debug = std.debug;
const unicode = std.unicode;
 
pub fn main() !void {
test "character codes" {
const stdout = std.io.getStdOut().writer();
debug.warn("\n", .{});
 
try characterAsciiCodes(stdout);
try characterUnicodeCodes(stdout);
}
 
fn characterAsciiCodes(writer: anytype) !void {
try writer.writeAll("Sample ASCII characters and codes:\n");
 
// Zig's string is just an array of bytes (u8).
const message: []const u8 = "ABCabc";
 
for (message) |val| {
debugtry writer.warnprint(" '{c}' code: {d} [hexa: 0x{x}]\n", .{ val, val, val });
}
try writer.writeByte('\n');
}
 
fn characterUnicodeCodes(writer: anytype) !void {
test "character (uni)codes" {
try writer.writeAll("Sample Unicode characters and codes:\n");
debug.warn("\n", .{});
 
const message: []const u8 = "あいうえお";
 
const utf8_view = unicode.Utf8View.initUnchecked(message);
Line 2,667 ⟶ 2,904:
while (iter.nextCodepoint()) |val| {
var array: [4]u8 = undefined;
varconst slice = array[0..try unicode.utf8Encode(val, &array)];
 
debugtry writer.warnprint(" '{s}' code: {d} [hexa: U+{x}]\n", .{ slice, val, val });
}
try writer.writeByte('\n');
}</syntaxhighlight>
 
{{out}}
<pre>TestSample [1/2]ASCII testcharacters "characterand codes"...:
'A' code: 65 [hexa: 0x41]
'B' code: 66 [hexa: 0x42]
Line 2,681 ⟶ 2,919:
'b' code: 98 [hexa: 0x62]
'c' code: 99 [hexa: 0x63]
 
Test [2/2] test "character (uni)codes"...
Sample Unicode characters and codes:
'あ' code: 12354 [hexa: U+3042]
'い' code: 12356 [hexa: U+3044]
'う' code: 12358 [hexa: U+3046]
'え' code: 12360 [hexa: U+3048]
'お' code: 12362 [hexa: U+304a]</pre>
 
All 2 tests passed.</pre>
=={{header|zkl}}==
The character set is 8 bit ASCII (but doesn't care if you use UTF-8 or unicode characters).
18

edits