Determine sentence type: Difference between revisions
(Added XPL0 example.) |
Thundergnat (talk | contribs) m (syntax highlighting fixup automation) |
||
Line 24: | Line 24: | ||
{{trans|Go}} |
{{trans|Go}} |
||
< |
<syntaxhighlight lang="11l">F sentenceType(s) |
||
I s.empty |
I s.empty |
||
R ‘’ |
R ‘’ |
||
Line 43: | Line 43: | ||
V s = ‘hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it’ |
V s = ‘hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it’ |
||
print(sentenceType(s))</ |
print(sentenceType(s))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 52: | Line 52: | ||
=={{header|ALGOL 68}}== |
=={{header|ALGOL 68}}== |
||
Classifies an empty string as "". |
Classifies an empty string as "". |
||
< |
<syntaxhighlight lang="algol68">BEGIN # determuine the type of a sentence by looking at the final punctuation # |
||
CHAR exclamation = "E"; # classification codes... # |
CHAR exclamation = "E"; # classification codes... # |
||
CHAR question = "Q"; |
CHAR question = "Q"; |
||
Line 89: | Line 89: | ||
) |
) |
||
) |
) |
||
END</ |
END</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 96: | Line 96: | ||
=={{header|AutoHotkey}}== |
=={{header|AutoHotkey}}== |
||
< |
<syntaxhighlight lang="autohotkey">Sentence := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
||
Msgbox, % SentenceType(Sentence) |
Msgbox, % SentenceType(Sentence) |
||
Line 111: | Line 111: | ||
} |
} |
||
return (D = SubStr(Sentence, 1, 3)) ? RTrim(RTrim(type, "|"), "N|") : RTrim(type, "|") |
return (D = SubStr(Sentence, 1, 3)) ? RTrim(RTrim(type, "|"), "N|") : RTrim(type, "|") |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>Q|S|E|N</pre> |
<pre>Q|S|E|N</pre> |
||
=={{header|AWK}}== |
=={{header|AWK}}== |
||
<syntaxhighlight lang="awk"> |
|||
<lang AWK> |
|||
# syntax: GAWK -f DETERMINE_SENTENCE_TYPE.AWK |
# syntax: GAWK -f DETERMINE_SENTENCE_TYPE.AWK |
||
BEGIN { |
BEGIN { |
||
Line 147: | Line 147: | ||
sentence = "" |
sentence = "" |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 162: | Line 162: | ||
=={{header|CLU}}== |
=={{header|CLU}}== |
||
< |
<syntaxhighlight lang="clu">% This iterator takes a string and yields one of 'E', 'Q', |
||
% 'S' or 'N' for every sentence found. |
% 'S' or 'N' for every sentence found. |
||
% Because sentences are separated by punctuation, only the |
% Because sentences are separated by punctuation, only the |
||
Line 214: | Line 214: | ||
stream$putc(po, c) |
stream$putc(po, c) |
||
end |
end |
||
end start_up </ |
end start_up </syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>QSEN</pre> |
<pre>QSEN</pre> |
||
=={{header|Epoxy}}== |
=={{header|Epoxy}}== |
||
< |
<syntaxhighlight lang="epoxy">const SentenceTypes: { |
||
["?"]:"Q", |
["?"]:"Q", |
||
["."]:"S", |
["."]:"S", |
||
Line 243: | Line 243: | ||
cls |
cls |
||
GetSentences("hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it")</ |
GetSentences("hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it")</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 255: | Line 255: | ||
This program attempts to prevent common abbreviations from ending sentences early. It also tries to handle parenthesized sentences and implements an additional type for exclamatory questions (EQ). |
This program attempts to prevent common abbreviations from ending sentences early. It also tries to handle parenthesized sentences and implements an additional type for exclamatory questions (EQ). |
||
{{works with|Factor|0.99 2021-06-02}} |
{{works with|Factor|0.99 2021-06-02}} |
||
< |
<syntaxhighlight lang="factor">USING: combinators io kernel regexp sequences sets splitting |
||
wrap.strings ; |
wrap.strings ; |
||
Line 312: | Line 312: | ||
"Hello, Mr. Anderson!" show |
"Hello, Mr. Anderson!" show |
||
nl |
nl |
||
"Are you sure?!?! How can you know?" show</ |
"Are you sure?!?! How can you know?" show</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 330: | Line 330: | ||
=={{header|FreeBASIC}}== |
=={{header|FreeBASIC}}== |
||
< |
<syntaxhighlight lang="freebasic">function sentype( byref s as string ) as string |
||
'determines the sentence type of the first sentence in the string |
'determines the sentence type of the first sentence in the string |
||
'returns "E" for an exclamation, "Q" for a question, "S" for serious |
'returns "E" for an exclamation, "Q" for a question, "S" for serious |
||
Line 359: | Line 359: | ||
while len(spam)>0 |
while len(spam)>0 |
||
print sentype(spam) |
print sentype(spam) |
||
wend</ |
wend</syntaxhighlight> |
||
{{out}}<pre>Q |
{{out}}<pre>Q |
||
S |
S |
||
Line 368: | Line 368: | ||
=={{header|Go}}== |
=={{header|Go}}== |
||
{{trans|Wren}} |
{{trans|Wren}} |
||
< |
<syntaxhighlight lang="go">package main |
||
import ( |
import ( |
||
Line 398: | Line 398: | ||
s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
||
fmt.Println(sentenceType(s)) |
fmt.Println(sentenceType(s)) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 411: | Line 411: | ||
The following parses sentences with embedded quotations naively, |
The following parses sentences with embedded quotations naively, |
||
so that for example the sentence "He asked 'How are you?'." results in: Q S |
so that for example the sentence "He asked 'How are you?'." results in: Q S |
||
<syntaxhighlight lang="jq"> |
|||
<lang jq> |
|||
# Input: a string |
# Input: a string |
||
# Output: a stream of sentence type indicators |
# Output: a stream of sentence type indicators |
||
Line 432: | Line 432: | ||
def s: "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"; |
def s: "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"; |
||
s | sentenceTypes</ |
s | sentenceTypes</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 442: | Line 442: | ||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
< |
<syntaxhighlight lang="julia">const text = """ |
||
Hi there, how are you today? I'd like to present to you the washing machine 9001. |
Hi there, how are you today? I'd like to present to you the washing machine 9001. |
||
You have been nominated to win one of these! Just make sure you don't break it""" |
You have been nominated to win one of these! Just make sure you don't break it""" |
||
Line 454: | Line 454: | ||
println(rpad(parsed[i] * parsed[i + 1], 52), " ==> ", haspunctotype(parsed[i + 1])) |
println(rpad(parsed[i] * parsed[i + 1], 52), " ==> ", haspunctotype(parsed[i + 1])) |
||
end |
end |
||
</ |
</syntaxhighlight>{{out}} |
||
<pre> |
<pre> |
||
Hi there, how are you today? ==> Q |
Hi there, how are you today? ==> Q |
||
Line 463: | Line 463: | ||
=={{header|Lua}}== |
=={{header|Lua}}== |
||
< |
<syntaxhighlight lang="lua">text = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
||
p2t = { [""]="N", ["."]="S", ["!"]="E", ["?"]="Q" } |
p2t = { [""]="N", ["."]="S", ["!"]="E", ["?"]="Q" } |
||
for s, p in text:gmatch("%s*([^%!%?%.]+)([%!%?%.]?)") do |
for s, p in text:gmatch("%s*([^%!%?%.]+)([%!%?%.]?)") do |
||
print(s..p..": "..p2t[p]) |
print(s..p..": "..p2t[p]) |
||
end</ |
end</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>hi there, how are you today?: Q |
<pre>hi there, how are you today?: Q |
||
Line 475: | Line 475: | ||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
< |
<syntaxhighlight lang="perl">use strict; |
||
use warnings; |
use warnings; |
||
use feature 'say'; |
use feature 'say'; |
||
Line 501: | Line 501: | ||
else { say 'N' } |
else { say 'N' } |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>hi there, how are you today?| Q |
<pre>hi there, how are you today?| Q |
||
Line 511: | Line 511: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
<!--< |
<!--<syntaxhighlight lang="phix">(phixonline)--> |
||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
||
<span style="color: #008080;">constant</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">`hi there, how are you today? I'd like to present |
<span style="color: #008080;">constant</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">`hi there, how are you today? I'd like to present |
||
Line 521: | Line 521: | ||
<span style="color: #000000;">w</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">v</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'|'</span><span style="color: #0000FF;">)</span> |
<span style="color: #000000;">w</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">v</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'|'</span><span style="color: #0000FF;">)</span> |
||
<span style="color: #0000FF;">?</span><span style="color: #000000;">w</span> |
<span style="color: #0000FF;">?</span><span style="color: #000000;">w</span> |
||
<!--</ |
<!--</syntaxhighlight>--> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 528: | Line 528: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
< |
<syntaxhighlight lang="python">import re |
||
txt = """ |
txt = """ |
||
Line 543: | Line 543: | ||
for i in range(0, len(pars)-1, 2): |
for i in range(0, len(pars)-1, 2): |
||
print((pars[i] + pars[i + 1]).ljust(54), "==>", haspunctotype(pars[i + 1])) |
print((pars[i] + pars[i + 1]).ljust(54), "==>", haspunctotype(pars[i + 1])) |
||
</ |
</syntaxhighlight>{{out}} |
||
<pre> |
<pre> |
||
Hi there, how are you today? ==> Q |
Hi there, how are you today? ==> Q |
||
Line 553: | Line 553: | ||
Or for more generality, and an alternative to hand-crafted regular expressions: |
Or for more generality, and an alternative to hand-crafted regular expressions: |
||
< |
<syntaxhighlight lang="python">'''Grouping and tagging by final character of string''' |
||
from functools import reduce |
from functools import reduce |
||
Line 666: | Line 666: | ||
# MAIN --- |
# MAIN --- |
||
if __name__ == '__main__': |
if __name__ == '__main__': |
||
main()</ |
main()</syntaxhighlight> |
||
{{Out}} |
{{Out}} |
||
<pre>('E', ['You have been nominated to win one of these!', "But perhaps substance isn't the goal!"]) |
<pre>('E', ['You have been nominated to win one of these!', "But perhaps substance isn't the goal!"]) |
||
Line 675: | Line 675: | ||
=={{header|Raku}}== |
=={{header|Raku}}== |
||
<lang |
<syntaxhighlight lang="raku" line>use Lingua::EN::Sentence; |
||
my $paragraph = q:to/PARAGRAPH/; |
my $paragraph = q:to/PARAGRAPH/; |
||
Line 697: | Line 697: | ||
default { 'N' }; |
default { 'N' }; |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>hi there, how are you today? | Q |
<pre>hi there, how are you today? | Q |
||
Line 717: | Line 717: | ||
=={{header|Vlang}}== |
=={{header|Vlang}}== |
||
{{trans|go}} |
{{trans|go}} |
||
< |
<syntaxhighlight lang="vlang">fn sentence_type(s string) string { |
||
if s.len == 0 { |
if s.len == 0 { |
||
return "" |
return "" |
||
Line 740: | Line 740: | ||
s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
||
println(sentence_type(s)) |
println(sentence_type(s)) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 748: | Line 748: | ||
=={{header|Wren}}== |
=={{header|Wren}}== |
||
< |
<syntaxhighlight lang="ecmascript">var sentenceType = Fn.new { |s| |
||
if (s.count == 0) return "" |
if (s.count == 0) return "" |
||
var types = [] |
var types = [] |
||
Line 765: | Line 765: | ||
var s = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
var s = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" |
||
System.print(sentenceType.call(s))</ |
System.print(sentenceType.call(s))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 775: | Line 775: | ||
{{libheader|Wren-trait}} |
{{libheader|Wren-trait}} |
||
The following alternative version takes the simplistic view that (unless they end the final sentence of the paragraph) '''?''', '''!''' or '''.''' will only end a sentence if they're immediately followed by a space. This of course is nonsense, given the way English is written nowadays, but it's probably an improvement on the first version without the need to search through an inevitably incomplete list of abbreviations. |
The following alternative version takes the simplistic view that (unless they end the final sentence of the paragraph) '''?''', '''!''' or '''.''' will only end a sentence if they're immediately followed by a space. This of course is nonsense, given the way English is written nowadays, but it's probably an improvement on the first version without the need to search through an inevitably incomplete list of abbreviations. |
||
< |
<syntaxhighlight lang="ecmascript">import "./pattern" for Pattern |
||
import "./trait" for Indexed |
import "./trait" for Indexed |
||
Line 802: | Line 802: | ||
} |
} |
||
System.print() |
System.print() |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 819: | Line 819: | ||
=={{header|XPL0}}== |
=={{header|XPL0}}== |
||
< |
<syntaxhighlight lang="xpl0">include xpllib; \for StrLen |
||
int Sentence, N, Len; |
int Sentence, N, Len; |
||
char Str; |
char Str; |
||
Line 836: | Line 836: | ||
if N < 3 then ChOut(0, ^|); |
if N < 3 then ChOut(0, ^|); |
||
]; |
]; |
||
]</ |
]</syntaxhighlight> |
||
{{out}} |
{{out}} |
Revision as of 23:37, 26 August 2022
You are encouraged to solve this task according to the task description, using any language you may know.
Use these sentences: "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it."
- Task
- Search for the last used punctuation in a sentence, and determine its type according to its punctuation.
- Output one of these letters
- "E" (Exclamation!), "Q" (Question?), "S" (Serious.), "N" (Neutral).
- Extra
- Make your code able to determine multiple sentences.
Don't leave any errors!
- Metrics
- Counting
- Word frequency
- Letter frequency
- Jewels and stones
- I before E except after C
- Bioinformatics/base count
- Count occurrences of a substring
- Count how many vowels and consonants occur in a string
- Remove/replace
- XXXX redacted
- Conjugate a Latin verb
- Remove vowels from a string
- String interpolation (included)
- Strip block comments
- Strip comments from a string
- Strip a set of characters from a string
- Strip whitespace from a string -- top and tail
- Strip control codes and extended characters from a string
- Anagrams/Derangements/shuffling
- Word wheel
- ABC problem
- Sattolo cycle
- Knuth shuffle
- Ordered words
- Superpermutation minimisation
- Textonyms (using a phone text pad)
- Anagrams
- Anagrams/Deranged anagrams
- Permutations/Derangements
- Find/Search/Determine
- ABC words
- Odd words
- Word ladder
- Semordnilap
- Word search
- Wordiff (game)
- String matching
- Tea cup rim text
- Alternade words
- Changeable words
- State name puzzle
- String comparison
- Unique characters
- Unique characters in each string
- Extract file extension
- Levenshtein distance
- Palindrome detection
- Common list elements
- Longest common suffix
- Longest common prefix
- Compare a list of strings
- Longest common substring
- Find common directory path
- Words from neighbour ones
- Change e letters to i in words
- Non-continuous subsequences
- Longest common subsequence
- Longest palindromic substrings
- Longest increasing subsequence
- Words containing "the" substring
- Sum of the digits of n is substring of n
- Determine if a string is numeric
- Determine if a string is collapsible
- Determine if a string is squeezable
- Determine if a string has all unique characters
- Determine if a string has all the same characters
- Longest substrings without repeating characters
- Find words which contains all the vowels
- Find words which contains most consonants
- Find words which contains more than 3 vowels
- Find words which first and last three letters are equals
- Find words which odd letters are consonants and even letters are vowels or vice_versa
- Formatting
- Substring
- Rep-string
- Word wrap
- String case
- Align columns
- Literals/String
- Repeat a string
- Brace expansion
- Brace expansion using ranges
- Reverse a string
- Phrase reversals
- Comma quibbling
- Special characters
- String concatenation
- Substring/Top and tail
- Commatizing numbers
- Reverse words in a string
- Suffixation of decimal numbers
- Long literals, with continuations
- Numerical and alphabetical suffixes
- Abbreviations, easy
- Abbreviations, simple
- Abbreviations, automatic
- Song lyrics/poems/Mad Libs/phrases
- Mad Libs
- Magic 8-ball
- 99 Bottles of Beer
- The Name Game (a song)
- The Old lady swallowed a fly
- The Twelve Days of Christmas
- Tokenize
- Text between
- Tokenize a string
- Word break problem
- Tokenize a string with escaping
- Split a character string based on change of character
- Sequences
11l
F sentenceType(s)
I s.empty
R ‘’
[Char] types
L(c) s
I c == ‘?’
types.append(Char(‘Q’))
E I c == ‘!’
types.append(Char(‘E’))
E I c == ‘.’
types.append(Char(‘S’))
I s.last !C ‘?!.’
types.append(Char(‘N’))
R types.join(‘|’)
V s = ‘hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it’
print(sentenceType(s))
- Output:
Q|S|E|N
ALGOL 68
Classifies an empty string as "".
BEGIN # determuine the type of a sentence by looking at the final punctuation #
CHAR exclamation = "E"; # classification codes... #
CHAR question = "Q";
CHAR serious = "S";
CHAR neutral = "N";
# returns the type(s) of the sentence(s) in s - exclamation, question, #
# serious or neutral; if there are multiple sentences #
# the types are separated by | #
PROC classify = ( STRING s )STRING:
BEGIN
STRING result := "";
BOOL pending neutral := FALSE;
FOR s pos FROM LWB s TO UPB s DO
IF pending neutral := FALSE;
CHAR c = s[ s pos ];
c = "?"
THEN result +:= question + "|"
ELIF c = "!"
THEN result +:= exclamation + "|"
ELIF c = "."
THEN result +:= serious + "|"
ELSE pending neutral := TRUE
FI
OD;
IF pending neutral
THEN result +:= neutral + "|"
FI;
# if s was empty, then return an empty string, otherwise remove the final separator #
IF result = "" THEN "" ELSE result[ LWB result : UPB result - 1 ] FI
END # classify # ;
# task test case #
print( ( classify( "hi there, how are you today? I'd like to present to you the washing machine 9001. "
+ "You have been nominated to win one of these! Just make sure you don't break it"
)
, newline
)
)
END
- Output:
Q|S|E|N
AutoHotkey
Sentence := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
Msgbox, % SentenceType(Sentence)
SentenceType(Sentence) {
Sentence := Trim(Sentence)
Loop, Parse, Sentence, .?!
{
N := (!E && !Q && !S)
, S := (InStr(SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3), "."))
, Q := (InStr(SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3), "?"))
, E := (InStr(SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3), "!"))
, type .= (E) ? ("E|") : ((Q) ? ("Q|") : ((S) ? ("S|") : "N|"))
, D := SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3)
}
return (D = SubStr(Sentence, 1, 3)) ? RTrim(RTrim(type, "|"), "N|") : RTrim(type, "|")
}
- Output:
Q|S|E|N
AWK
# syntax: GAWK -f DETERMINE_SENTENCE_TYPE.AWK
BEGIN {
str = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
main(str)
main("Exclamation! Question? Serious. Neutral")
exit(0)
}
function main(str, c) {
while (length(str) > 0) {
c = substr(str,1,1)
sentence = sentence c
if (c == "!") {
prn("E")
}
else if (c == ".") {
prn("S")
}
else if (c == "?") {
prn("Q")
}
str = substr(str,2)
}
prn("N")
print("")
}
function prn(type) {
gsub(/^ +/,"",sentence)
printf("%s %s\n",type,sentence)
sentence = ""
}
- Output:
Q hi there, how are you today? S I'd like to present to you the washing machine 9001. E You have been nominated to win one of these! N Just make sure you don't break it E Exclamation! Q Question? S Serious. N Neutral
CLU
% This iterator takes a string and yields one of 'E', 'Q',
% 'S' or 'N' for every sentence found.
% Because sentences are separated by punctuation, only the
% last one can be 'N'.
sentence_types = iter (s: string) yields (char)
own punct: string := "!?." % relevant character classes
own space: string := " \t\n"
own types: string := "EQS" % sentence type characters
prev_punct: bool := false % whether the previous character was punctuation
last_punct: int := 0 % index of last punctuation character encountered
sentence: bool := true % whether there are words since the last punctuation
for c: char in string$chars(s) do
pu: int := string$indexc(c, punct)
sp: int := string$indexc(c, space)
if pu ~= 0 then
prev_punct := true
last_punct := pu
elseif sp ~= 0 then
if prev_punct then
% a space after punctuation means a sentence has ended here
yield(types[last_punct])
sentence := false
end
prev_punct := false
sentence := false
else
sentence := true
end
end
% handle the last sentence
if prev_punct then yield(types[last_punct])
elseif sentence then yield('N')
end
end sentence_types
% Test
start_up = proc ()
po: stream := stream$primary_output()
test: string :=
"hi there, how are you today? I'd like to " ||
"present to you the washing machine 9001. You " ||
"have been nominated to win one of these! Just " ||
"make sure you don't break it"
% print the type of each sentence
for c: char in sentence_types(test) do
stream$putc(po, c)
end
end start_up
- Output:
QSEN
Epoxy
const SentenceTypes: {
["?"]:"Q",
["."]:"S",
["!"]:"E"
}
fn DetermineSentenceType(Char)
return SentenceTypes[Char]||"N"
cls
fn GetSentences(Text)
var Sentences: [],
Index: 0,
Length: #Text
loop i:0;i<Length;i+:1 do
var Char: string.subs(Text,i,1)
var Type: DetermineSentenceType(Char)
if Type != "N" || i==Length-1 then
log(string.sub(Text,Index,i+1)+" ("+Type+")")
Index:i+2;
cls
cls
cls
GetSentences("hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it")
- Output:
hi there, how are you today? (Q) I'd like to present to you the washing machine 9001. (S) You have been nominated to win one of these! (E) Just make sure you don't break it (N)
Factor
This program attempts to prevent common abbreviations from ending sentences early. It also tries to handle parenthesized sentences and implements an additional type for exclamatory questions (EQ).
USING: combinators io kernel regexp sequences sets splitting
wrap.strings ;
! courtesy of https://www.infoplease.com/common-abbreviations
CONSTANT: common-abbreviations {
"A.B." "abbr." "Acad." "A.D." "alt." "A.M." "Assn."
"at. no." "at. wt." "Aug." "Ave." "b." "B.A." "B.C." "b.p."
"B.S." "c." "Capt." "cent." "co." "Col." "Comdr." "Corp."
"Cpl." "d." "D.C." "Dec." "dept." "dist." "div." "Dr." "ed."
"est." "et al." "Feb." "fl." "gal." "Gen." "Gov." "grad."
"Hon." "i.e." "in." "inc." "Inst." "Jan." "Jr." "lat."
"Lib." "long." "Lt." "Ltd." "M.D." "Mr." "Mrs." "mt." "mts."
"Mus." "no." "Nov." "Oct." "Op." "pl." "pop." "pseud." "pt."
"pub." "Rev." "rev." "R.N." "Sept." "Ser." "Sgt." "Sr."
"St." "uninc." "Univ." "U.S." "vol." "vs." "wt."
}
: sentence-enders ( str -- newstr )
R/ \)/ "" re-replace
" " split harvest
unclip-last swap
[ common-abbreviations member? ] reject
[ last ".!?" member? ] filter
swap suffix ;
: serious? ( str -- ? ) last CHAR: . = ;
: neutral? ( str -- ? ) last ".!?" member? not ;
: mixed? ( str -- ? ) "?!" intersect length 2 = ;
: exclamation? ( str -- ? ) last CHAR: ! = ;
: question? ( str -- ? ) last CHAR: ? = ;
: type ( str -- newstr )
{
{ [ dup serious? ] [ drop "S" ] }
{ [ dup neutral? ] [ drop "N" ] }
{ [ dup mixed? ] [ drop "EQ" ] }
{ [ dup exclamation? ] [ drop "E" ] }
{ [ dup question? ] [ drop "Q" ] }
[ drop "UNKNOWN" ]
} cond ;
: sentences ( str -- newstr )
sentence-enders [ type ] map "|" join ;
: show ( str -- )
dup sentences " -> " glue 60 wrap-string print ;
"Hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" show
nl
"(There was nary a mouse stirring.) But the cats were going
bonkers!" show
nl
"\"Why is the car so slow?\" she said." show
nl
"Hello, Mr. Anderson!" show
nl
"Are you sure?!?! How can you know?" show
- Output:
Hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it -> Q|S|E|N (There was nary a mouse stirring.) But the cats were going bonkers! -> S|E "Why is the car so slow?" she said. -> S Hello, Mr. Anderson! -> E Are you sure?!?! How can you know? -> EQ|Q
FreeBASIC
function sentype( byref s as string ) as string
'determines the sentence type of the first sentence in the string
'returns "E" for an exclamation, "Q" for a question, "S" for serious
'and "N" for neutral.
'modifies the string to remove the first sentence
for i as uinteger = 1 to len(s)
if mid(s, i, 1) = "!" then
s=right(s,len(s)-i)
return "E"
end if
if mid(s, i, 1) = "." then
s=right(s,len(s)-i)
return "S"
end if
if mid(s, i, 1) = "?" then
s=right(s,len(s)-i)
return "Q"
end if
next i
'if we get to the end without encountering punctuation, this
'must be a neutral sentence, which can only happen as the last one
s=""
return "N"
end function
dim as string spam = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
while len(spam)>0
print sentype(spam)
wend
- Output:
QS E N
Go
package main
import (
"fmt"
"strings"
)
func sentenceType(s string) string {
if len(s) == 0 {
return ""
}
var types []string
for _, c := range s {
if c == '?' {
types = append(types, "Q")
} else if c == '!' {
types = append(types, "E")
} else if c == '.' {
types = append(types, "S")
}
}
if strings.IndexByte("?!.", s[len(s)-1]) == -1 {
types = append(types, "N")
}
return strings.Join(types, "|")
}
func main() {
s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
fmt.Println(sentenceType(s))
}
- Output:
Q|S|E|N
jq
Works with gojq, the Go implementation of jq
The following parses sentences with embedded quotations naively, so that for example the sentence "He asked 'How are you?'." results in: Q S
# Input: a string
# Output: a stream of sentence type indicators
def sentenceTypes:
def trim: sub("^ +";"") | sub(" +$";"");
def parse:
capture("(?<s>[^?!.]*)(?<p>[?!.])(?<remainder>.*)" )
// {p:"", remainder:""};
def encode:
if . == "?" then "Q"
elif . == "!" then "E"
elif . == "." then "S"
else "N"
end;
trim
| select(length>0)
| parse
| (.p | encode), (.remainder | sentenceTypes);
def s: "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it";
s | sentenceTypes
- Output:
Q S E N
Julia
const text = """
Hi there, how are you today? I'd like to present to you the washing machine 9001.
You have been nominated to win one of these! Just make sure you don't break it"""
haspunctotype(s) = '.' in s ? "S" : '!' in s ? "E" : '?' in s ? "Q" : "N"
text = replace(text, "\n" => " ")
parsed = strip.(split(text, r"(?:(?:(?<=[\?\!\.])(?:))|(?:(?:)(?=[\?\!\.])))"))
isodd(length(parsed)) && push!(parsed, "") # if ends without pnctuation
for i in 1:2:length(parsed)-1
println(rpad(parsed[i] * parsed[i + 1], 52), " ==> ", haspunctotype(parsed[i + 1]))
end
- Output:
Hi there, how are you today? ==> Q I'd like to present to you the washing machine 9001. ==> S You have been nominated to win one of these! ==> E Just make sure you don't break it ==> N
Lua
text = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
p2t = { [""]="N", ["."]="S", ["!"]="E", ["?"]="Q" }
for s, p in text:gmatch("%s*([^%!%?%.]+)([%!%?%.]?)") do
print(s..p..": "..p2t[p])
end
- Output:
hi there, how are you today?: Q I'd like to present to you the washing machine 9001.: S You have been nominated to win one of these!: E Just make sure you don't break it: N
Perl
use strict;
use warnings;
use feature 'say';
use Lingua::Sentence;
my $para1 = <<'EOP';
hi there, how are you today? I'd like to present to you the washing machine
9001. You have been nominated to win one of these! Just make sure you don't
break it
EOP
my $para2 = <<'EOP';
Just because there are punctuation characters like "?", "!" or especially "."
present, it doesn't necessarily mean you have reached the end of a sentence,
does it Mr. Magoo? The syntax highlighting here for Perl isn't bad at all.
EOP
my $splitter = Lingua::Sentence->new("en");
for my $text ($para1, $para2) {
for my $s (split /\n/, $splitter->split( $text =~ s/\n//gr ) {
print "$s| ";
if ($s =~ /!$/) { say 'E' }
elsif ($s =~ /\?$/) { say 'Q' }
elsif ($s =~ /\.$/) { say 'S' }
else { say 'N' }
}
}
- Output:
hi there, how are you today?| Q I'd like to present to you the washing machine 9001.| S You have been nominated to win one of these!| E Just make sure you don't break it.| N Just because there are punctuation characters like "?", "!" or especially "." present, it doesn't necessarily mean you have reached the end of a sentence, does it Mr. Magoo?| Q The syntax highlighting here for Perl isn't bad at all.| S
Phix
with javascript_semantics constant s = `hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it` sequence t = split_any(trim(s),"?!."), u = substitute_all(s,t,repeat("|",length(t))), v = substitute_all(u,{"|?","|!","|.","|"},"QESN"), w = join(v,'|') ?w
- Output:
"Q|S|E|N"
Python
import re
txt = """
Hi there, how are you today? I'd like to present to you the washing machine 9001.
You have been nominated to win one of these! Just make sure you don't break it"""
def haspunctotype(s):
return 'S' if '.' in s else 'E' if '!' in s else 'Q' if '?' in s else 'N'
txt = re.sub('\n', '', txt)
pars = [s.strip() for s in re.split("(?:(?:(?<=[\?\!\.])(?:))|(?:(?:)(?=[\?\!\.])))", txt)]
if len(pars) % 2:
pars.append('') # if ends without punctuation
for i in range(0, len(pars)-1, 2):
print((pars[i] + pars[i + 1]).ljust(54), "==>", haspunctotype(pars[i + 1]))
- Output:
Hi there, how are you today? ==> Q I'd like to present to you the washing machine 9001. ==> S You have been nominated to win one of these! ==> E Just make sure you don't break it ==> N
Or for more generality, and an alternative to hand-crafted regular expressions:
'''Grouping and tagging by final character of string'''
from functools import reduce
from itertools import groupby
# tagGroups :: Dict -> [String] -> [(String, [String])]
def tagGroups(tagDict):
'''A list of (Tag, SentenceList) tuples, derived
from an input text and a supplied dictionary of
tags for each of a set of final punctuation marks.
'''
def go(sentences):
return [
(tagDict.get(k, 'Not punctuated'), list(v))
for (k, v) in groupby(
sorted(sentences, key=last),
key=last
)
]
return go
# sentenceSegments :: Chars -> String -> [String]
def sentenceSegments(punctuationChars):
'''A list of sentences delimited by the supplied
punctuation characters, where these are followed
by spaces.
'''
def go(s):
return [
''.join(cs).strip() for cs
in splitBy(
sentenceBreak(punctuationChars)
)(s)
]
return go
# sentenceBreak :: Chars -> (Char, Char) -> Bool
def sentenceBreak(finalPunctuation):
'''True if the first of two characters is a final
punctuation mark and the second is a space.
'''
def go(a, b):
return a in finalPunctuation and " " == b
return go
# ------------------------- TEST -------------------------
# main :: IO ()
def main():
'''Join, segmentation, tags'''
tags = {'!': 'E', '?': 'Q', '.': 'S'}
# Joined by spaces,
sample = ' '.join([
"Hi there, how are you today?",
"I'd like to present to you the washing machine 9001.",
"You have been nominated to win one of these!",
"Might it be possible to add some challenge to this task?",
"Feels as light as polystyrene filler.",
"But perhaps substance isn't the goal!",
"Just make sure you don't break off before the"
])
# segmented by punctuation,
sentences = sentenceSegments(
tags.keys()
)(sample)
# and grouped under tags.
for kv in tagGroups(tags)(sentences):
print(kv)
# ----------------------- GENERIC ------------------------
# last :: [a] -> a
def last(xs):
'''The last element of a non-empty list.'''
return xs[-1]
# splitBy :: (a -> a -> Bool) -> [a] -> [[a]]
def splitBy(p):
'''A list split wherever two consecutive
items match the binary predicate p.
'''
# step :: ([[a]], [a], a) -> a -> ([[a]], [a], a)
def step(acp, x):
acc, active, prev = acp
return (acc + [active], [x], x) if p(prev, x) else (
(acc, active + [x], x)
)
# go :: [a] -> [[a]]
def go(xs):
if 2 > len(xs):
return xs
else:
h = xs[0]
ys = reduce(step, xs[1:], ([], [h], h))
# The accumulated sublists, and the final group.
return ys[0] + [ys[1]]
return go
# MAIN ---
if __name__ == '__main__':
main()
- Output:
('E', ['You have been nominated to win one of these!', "But perhaps substance isn't the goal!"]) ('S', ["I'd like to present to you the washing machine 9001.", 'Feels as light as polystyrene filler.']) ('Q', ['Hi there, how are you today?', 'Might it be possible to add some challenge to this task?']) ('Not punctuated', ["Just make sure you don't break off before the"])
Raku
use Lingua::EN::Sentence;
my $paragraph = q:to/PARAGRAPH/;
hi there, how are you today? I'd like to present to you the washing machine
9001. You have been nominated to win one of these! Just make sure you don't
break it
Just because there are punctuation characters like "?", "!" or especially "."
present, it doesn't necessarily mean you have reached the end of a sentence,
does it Mr. Magoo? The syntax highlighting here for Raku isn't the best.
PARAGRAPH
say join "\n\n", $paragraph.&get_sentences.map: {
/(<:punct>)$/;
$_ ~ ' | ' ~ do
given $0 {
when '!' { 'E' };
when '?' { 'Q' };
when '.' { 'S' };
default { 'N' };
}
}
- Output:
hi there, how are you today? | Q I'd like to present to you the washing machine 9001. | S You have been nominated to win one of these! | E Just make sure you don't break it | N Just because there are punctuation characters like "?", "!" or especially "." present, it doesn't necessarily mean you have reached the end of a sentence, does it Mr. Magoo? | Q The syntax highlighting here for Raku isn't the best. | S
Vlang
fn sentence_type(s string) string {
if s.len == 0 {
return ""
}
mut types := []string{}
for c in s.split('') {
if c == '?' {
types << "Q"
} else if c == '!' {
types << "E"
} else if c == '.' {
types << "S"
}
}
if s[s.len-1..s.len].index_any('?!.') == -1 {
types << "N"
}
return types.join("|")
}
fn main() {
s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
println(sentence_type(s))
}
- Output:
Q|S|E|N
Wren
var sentenceType = Fn.new { |s|
if (s.count == 0) return ""
var types = []
for (c in s) {
if (c == "?") {
types.add("Q")
} else if (c == "!") {
types.add("E")
} else if (c == ".") {
types.add("S")
}
}
if (!"?!.".contains(s[-1])) types.add("N")
return types.join("|")
}
var s = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
System.print(sentenceType.call(s))
- Output:
Q|S|E|N
The following alternative version takes the simplistic view that (unless they end the final sentence of the paragraph) ?, ! or . will only end a sentence if they're immediately followed by a space. This of course is nonsense, given the way English is written nowadays, but it's probably an improvement on the first version without the need to search through an inevitably incomplete list of abbreviations.
import "./pattern" for Pattern
import "./trait" for Indexed
var map = { "?": "Q", "!": "E", ".": "S", "": "N" }
var p = Pattern.new("[? |! |. ]")
var paras = [
"hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it",
"hi there, how are you on St.David's day (isn't it a holiday yet?), Mr.Smith? I'd like to present to you (well someone has to win one!) the washing machine 900.1. You have been nominated by Capt.Johnson('?') to win one of these! Just make sure you (or Mrs.Smith) don't break it. By the way, what the heck is an exclamatory question!?"
]
for (para in paras) {
para = para.trim()
var sentences = p.splitAll(para)
var endings = p.findAll(para).map { |m| m.text[0] }.toList
var lastChar = sentences[-1][-1]
if ("?!.".contains(lastChar)) {
endings.add(lastChar)
sentences[-1] = sentences[-1][0...-1]
} else {
endings.add("")
}
for (se in Indexed.new(sentences)) {
var ix = se.index
var sentence = se.value
System.print("%(map[endings[ix]]) <- %(sentence + endings[ix])")
}
System.print()
}
- Output:
Q <- hi there, how are you today? S <- I'd like to present to you the washing machine 9001. E <- You have been nominated to win one of these! N <- Just make sure you don't break it Q <- hi there, how are you on St.David's day (isn't it a holiday yet?), Mr.Smith? S <- I'd like to present to you (well someone has to win one!) the washing machine 900.1. E <- You have been nominated by Capt.Johnson('?') to win one of these! S <- Just make sure you (or Mrs.Smith) don't break it. Q <- By the way, what the heck is an exclamatory question!?
XPL0
include xpllib; \for StrLen
int Sentence, N, Len;
char Str;
[Sentence:= ["hi there, how are you today?",
"I'd like to present to you the washing machine 9001.",
"You have been nominated to win one of these!",
"Just make sure you don't break it"];
for N:= 0 to 3 do
[Str:= Sentence(N);
Len:= StrLen(Str);
case Str(Len-1) of
^!: ChOut(0, ^E);
^?: ChOut(0, ^Q);
^.: ChOut(0, ^S)
other ChOut(0, ^N);
if N < 3 then ChOut(0, ^|);
];
]
- Output:
Q|S|E|N