Idiomatically determine all the characters that can be used for symbols: Difference between revisions

m
→‎{{header|Wren}}: Changed to Wren S/H
No edit summary
m (→‎{{header|Wren}}: Changed to Wren S/H)
(2 intermediate revisions by 2 users not shown)
Line 17:
* [[Idiomatically_determine_all_the_lowercase_and_uppercase_letters|Idiomatically determine all the lowercase and uppercase letters]].
<br><br>
 
=={{header|ALGOL 68}}==
{{works with|ALGOL 68G|Any - tested with release 2.8.3.win32}}
...should also work with other Algol 68 implementations that use upper-stropping (reserved words in upper-case).<br>
There are a number of different types of symbols that can be defined in Algol 68 (informally as follows):<br>
- identifiers used for variables, constants, structure members, procedures<br>
- monadic operators<br>
- dyadic operators<br>
- mode indicants - used for type names
<br>
Monadic and dyadic operators can be symbolic or have "bold" names. Mode indicants also have "bold" names. When upper-stropping is used, bold words are formed from upper-case letters. Algol 68G also allows underscores in bold words - other implementations of Algol 68 may also allow digits.<br>
In the output, the characters shown for monadic and dyadic operators include the upper-case letters - these can't be mixed with symbols, e.g. +A and B- are not valid operator symbols. Additionally, the only valid two character operator name where ":" is the second character is "=:" - the surlaw operator, perhaps :).<br>
Symbolic operator symbols can be one or two characters, optionally suffixed with := or =:.<br>
The following uses the same approach as the AWK sample, though due to the different symbol types, rather more possible symbols have to be checked.<br>
The sample assumes Windows/Linux is the operating system and the Algol 68 compiler/interpreter can be invoked with "a68". It should be possible to modify it for other operating systems/commands. Only 7-bit ASCII characters > space are considered
<syntaxhighlight lang="algol68">
BEGIN # determine which characters can be in identifiers, etc. by trying to #
# compile test programs #
 
STRING source name = "_tmp.a68";
STRING a68 command = "a68 " + source name + " > _tmp.err 2>&1";
 
# attenpts to compile the code with "%" substituted with id, #
# returns 0 if it compiled OK, non-zero otherwise #
PROC attempt compilation = ( STRING template, id )INT:
BEGIN
STRING code := "";
# replace "%" with the identifier in the template #
FOR t pos FROM LWB template TO UPB template DO
code +:= IF template[ t pos ] /= "%"
THEN template[ t pos ]
ELSE id
FI
OD;
# output the source file and try compiling it #
FILE source file;
BOOL open error := IF open( source file, source name, stand out channel ) = 0
THEN
# opened OK - file already exists and #
# will be overwritten #
FALSE
ELSE
# failed to open the file #
# - try creating a new file #
establish( source file, source name, stand out channel ) /= 0
FI;
IF open error
THEN # failed to open the file #
print( ( "Unable to open ", source name, newline ) );
stop
ELSE # file opened OK #
put( source file, ( code ) ); # write source #
close( source file );
system( a68 command ) # compile it #
FI
END # attempt compilation # ;
# attempt to construct all two-charaacter symbols and determine whether #
# they are valid by attempting to compile a program containing them #
# only 7-bit ASCII characters > space are considered #
PROC try = ( STRING template, legend )VOID:
BEGIN
[ 0 : 127 ]BOOL first, second;
FOR i FROM LWB first TO UPB first DO
first[ i ] := second[ i ] := FALSE
OD;
FOR f FROM ABS " " + 1 TO UPB first DO
CHAR fc = REPR f;
IF attempt compilation( template, fc ) = 0
THEN
# this character can be the first character of a symbol #
first[ f ] := TRUE;
FOR s FROM ABS " " + 1 TO UPB second DO
IF NOT second[ s ]
THEN
# haven't found this is a valid second character #
# yet #
IF attempt compilation( template, fc + REPR s ) = 0
THEN
# compiled OK #
second[ s ] := TRUE
FI
FI
OD
FI
OD;
print( ( "Characters valid for ", legend, ":", newline ) );
print( ( " as first: " ) );
FOR c pos FROM LWB first TO UPB first DO
IF first[ c pos ]
THEN print( ( REPR c pos ) )
ELIF second[ c pos ]
THEN print( ( " " ) )
FI
OD;
print( ( newline ) );
print( ( " as other: " ) );
FOR c pos FROM LWB first TO UPB first DO
IF second[ c pos ]
THEN print( ( REPR c pos ) )
ELIF first[ c pos ]
THEN print( ( " " ) )
FI
OD;
print( ( newline ) )
END # try # ;
 
try( "BEGIN INT %; % := 1 END", "identifiers" );
try( "BEGIN OP % = ( INT a )INT: a; % 1 END", "monadic operators" );
try( "BEGIN PRIO % = 5; OP % = ( INT a, b )INT: a; 1 % 1 END", "dyadic operators" );
try( "BEGIN MODE % = INT; % x; x := 1 END", "mode indicants" )
 
END
</syntaxhighlight>
{{out}}
<pre>
Characters valid for identifiers:
as first: abcdefghijklmnopqrstuvwxyz
as other: 0123456789_abcdefghijklmnopqrstuvwxyz
Characters valid for monadic operators:
as first: !%& +- ?ABCDEFGHIJKLMNOPQRSTUVWXYZ^ ~
as other: * /<=> ABCDEFGHIJKLMNOPQRSTUVWXYZ _
Characters valid for dyadic operators:
as first: !%&*+-/ <=>?ABCDEFGHIJKLMNOPQRSTUVWXYZ^ ~
as other: * /:<=> ABCDEFGHIJKLMNOPQRSTUVWXYZ _
Characters valid for mode indicants:
as first: ABCDEFGHIJKLMNOPQRSTUVWXYZ
as other: ABCDEFGHIJKLMNOPQRSTUVWXYZ_
</pre>
 
=={{header|AWK}}==
Line 880 ⟶ 1,008:
REXX-Regina_3.8.2(MT) 5.00 22 Jun 2014
symbol characters: !#$.0123456789?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
</pre>
 
=={{header|RPL}}==
The RPL character set is an 8-bit character set, sometimes referred to simply as "ECMA-94" in documentation, although it is for the most part a variant of ISO/IEC 8859-1 / ECMA-94. See the related [https://en.wikipedia.org/wiki/RPL_character_set Wikipedia entry] for more details.
≪ "" "'A '"
1 255 '''FOR''' c
3 c CHR REPL
'''IFERR''' DUP STR→ '''THEN''' DROP
'''ELSE'''
'''IF''' 'A' SAME NOT '''THEN''' SWAP c CHR + SWAP '''END'''
'''END'''
'''NEXT''' DROP
≫ '<span style="color:blue">SYMBOLS</span>' STO
{{out}}
<pre>
1: "!$%&.0123456789?ABCDEFGHIJKLMNOPQRSTUVWXYZ\abcdefghijklmnopqrstuvwxyz~∇∑▶πα→←↓↑γδεηθλρστωΔΠΩ▬∞ ¡¢£¤¥¦§¨©ª¬­®¯°±²³´µ¶·¸¹º¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
</pre>
 
Line 923 ⟶ 1,067:
 
Identifiers which begin with underscores can only be used as instance field names (one underscore) or static field names (two or more underscores).
<syntaxhighlight lang="ecmascriptwren">for (i in 97..122) System.write(String.fromByte(i))
for (i in 65..90) System.write(String.fromByte(i))
System.print("_")</syntaxhighlight>
9,482

edits