Determine sentence type: Difference between revisions

Content added Content deleted

Inline

Revision as of 23:37, 26 August 2022

Use these sentences: "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it."

Task: Search for the last used punctuation in a sentence, and determine its type according to its punctuation.

Output one of these letters: "E" (Exclamation!), "Q" (Question?), "S" (Serious.), "N" (Neutral).

Extra: Make your code able to determine multiple sentences.

Don't leave any errors!

Other tasks related to string operations:

Metrics

Counting

Remove/replace

Anagrams/Derangements/shuffling

Find/Search/Determine

Formatting

Song lyrics/poems/Mad Libs/phrases

Tokenize

Sequences

11l

Translation of: Go

F sentenceType(s)
   I s.empty
      R ‘’

   [Char] types
   L(c) s
      I c == ‘?’
         types.append(Char(‘Q’))
      E I c == ‘!’
         types.append(Char(‘E’))
      E I c == ‘.’
         types.append(Char(‘S’))

   I s.last !C ‘?!.’
      types.append(Char(‘N’))

   R types.join(‘|’)

V s = ‘hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it’
print(sentenceType(s))

Output:

Q|S|E|N

ALGOL 68

Classifies an empty string as "".

BEGIN # determuine the type of a sentence by looking at the final punctuation  #
    CHAR exclamation = "E"; # classification codes... #
    CHAR question    = "Q";
    CHAR serious     = "S";
    CHAR neutral     = "N";
    # returns the type(s) of the sentence(s) in s - exclamation, question,     #
    #                     serious or neutral; if there are multiple sentences  #
    #                     the types are separated by |                         #
    PROC classify = ( STRING s )STRING:
         BEGIN
            STRING result := "";
            BOOL pending neutral := FALSE; 
            FOR s pos FROM LWB s TO UPB s DO
                IF   pending neutral := FALSE;
                     CHAR c = s[ s pos ];
                     c = "?"
                THEN result +:= question    + "|"
                ELIF c = "!"
                THEN result +:= exclamation + "|"
                ELIF c = "."
                THEN result +:= serious     + "|"
                ELSE pending neutral := TRUE
                FI
            OD;
            IF   pending neutral
            THEN result +:= neutral + "|"
            FI;
            # if s was empty, then return an empty string, otherwise remove the final separator #
            IF result = "" THEN "" ELSE result[ LWB result : UPB result - 1 ] FI
         END # classify # ;
    # task test case #
    print( ( classify( "hi there, how are you today? I'd like to present to you the washing machine 9001. "
                     + "You have been nominated to win one of these! Just make sure you don't break it"
                     )
           , newline
           )
         )
END

Output:

Q|S|E|N

AutoHotkey

Sentence := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
Msgbox, % SentenceType(Sentence)

SentenceType(Sentence) {
	Sentence := Trim(Sentence)
	Loop, Parse, Sentence, .?!
	{
		N := (!E && !Q && !S)
		, S := (InStr(SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3), "."))
		, Q := (InStr(SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3), "?"))
		, E := (InStr(SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3), "!"))
		, type .= (E) ? ("E|") : ((Q) ? ("Q|") : ((S) ? ("S|") : "N|"))
		, D := SubStr(Sentence, InStr(Sentence, A_LoopField)+StrLen(A_LoopField), 3)
	}
	return (D = SubStr(Sentence, 1, 3)) ? RTrim(RTrim(type, "|"), "N|") : RTrim(type, "|")
}

Output:

Q|S|E|N

AWK

# syntax: GAWK -f DETERMINE_SENTENCE_TYPE.AWK
BEGIN {
    str = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
    main(str)
    main("Exclamation! Question? Serious. Neutral")
    exit(0)
}
function main(str,  c) {
    while (length(str) > 0) {
      c = substr(str,1,1)
      sentence = sentence c
      if (c == "!") {
        prn("E")
      }
      else if (c == ".") {
        prn("S")
      }
      else if (c == "?") {
        prn("Q")
      }
      str = substr(str,2)
    }
    prn("N")
    print("")
}
function prn(type) {
    gsub(/^ +/,"",sentence)
    printf("%s %s\n",type,sentence)
    sentence = ""
}

Output:

Q hi there, how are you today?
S I'd like to present to you the washing machine 9001.
E You have been nominated to win one of these!
N Just make sure you don't break it

E Exclamation!
Q Question?
S Serious.
N Neutral

CLU

% This iterator takes a string and yields one of 'E', 'Q',
% 'S' or 'N' for every sentence found.
% Because sentences are separated by punctuation, only the
% last one can be 'N'.

sentence_types = iter (s: string) yields (char)
    own punct: string := "!?."  % relevant character classes
    own space: string := " \t\n"
    own types: string := "EQS"  % sentence type characters
    
    prev_punct: bool := false   % whether the previous character was punctuation
    last_punct: int := 0        % index of last punctuation character encountered
    sentence: bool := true      % whether there are words since the last punctuation
    
    for c: char in string$chars(s) do
        pu: int := string$indexc(c, punct)
        sp: int := string$indexc(c, space)
        if pu ~= 0 then
            prev_punct := true
            last_punct := pu 
        elseif sp ~= 0 then
            if prev_punct then
                % a space after punctuation means a sentence has ended here
                yield(types[last_punct])
                sentence := false
            end
            prev_punct := false
            sentence := false
        else
            sentence := true
        end
    end
    
    % handle the last sentence
    if prev_punct then yield(types[last_punct])
    elseif sentence then yield('N')
    end
end sentence_types

% Test
start_up = proc ()
    po: stream := stream$primary_output()
    test: string := 
        "hi there, how are you today? I'd like to " ||
        "present to you the washing machine 9001. You " ||
        "have been nominated to win one of these! Just " ||
        "make sure you don't break it" 

    % print the type of each sentence
    for c: char in sentence_types(test) do  
        stream$putc(po, c)
    end
end start_up

Output:

QSEN

Epoxy

const SentenceTypes: {
	["?"]:"Q",
	["."]:"S",
	["!"]:"E"
}

fn DetermineSentenceType(Char)
	return SentenceTypes[Char]||"N"
cls

fn GetSentences(Text)
	var Sentences: [],
		Index: 0,
		Length: #Text
	loop i:0;i<Length;i+:1 do
		var Char: string.subs(Text,i,1)
		var Type: DetermineSentenceType(Char)
		if Type != "N" || i==Length-1 then
			log(string.sub(Text,Index,i+1)+" ("+Type+")")
			Index:i+2;
		cls
	cls
cls

GetSentences("hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it")

Output:

hi there, how are you today? (Q)
I'd like to present to you the washing machine 9001. (S)
You have been nominated to win one of these! (E)
Just make sure you don't break it (N)

Factor

This program attempts to prevent common abbreviations from ending sentences early. It also tries to handle parenthesized sentences and implements an additional type for exclamatory questions (EQ).

Works with: Factor version 0.99 2021-06-02

USING: combinators io kernel regexp sequences sets splitting
wrap.strings ;

! courtesy of https://www.infoplease.com/common-abbreviations

CONSTANT: common-abbreviations {
    "A.B." "abbr." "Acad." "A.D." "alt." "A.M." "Assn."
    "at. no." "at. wt." "Aug." "Ave." "b." "B.A." "B.C." "b.p."
    "B.S." "c." "Capt." "cent." "co." "Col." "Comdr." "Corp."
    "Cpl." "d." "D.C." "Dec." "dept." "dist." "div." "Dr." "ed."
    "est." "et al." "Feb." "fl." "gal." "Gen." "Gov." "grad."
    "Hon." "i.e." "in." "inc." "Inst." "Jan." "Jr." "lat."
    "Lib." "long." "Lt." "Ltd." "M.D." "Mr." "Mrs." "mt." "mts."
    "Mus." "no." "Nov." "Oct." "Op." "pl." "pop." "pseud." "pt."
    "pub." "Rev." "rev." "R.N." "Sept." "Ser." "Sgt." "Sr."
    "St." "uninc." "Univ." "U.S." "vol." "vs." "wt."
}

: sentence-enders ( str -- newstr )
    R/ \)/ "" re-replace
    " " split harvest
    unclip-last swap
    [ common-abbreviations member? ] reject
    [ last ".!?" member? ] filter
    swap suffix ;

: serious? ( str -- ? ) last CHAR: . = ;
: neutral? ( str -- ? ) last ".!?" member? not ;
: mixed? ( str -- ? ) "?!" intersect length 2 = ;
: exclamation? ( str -- ? ) last CHAR: ! = ;
: question? ( str -- ? ) last CHAR: ? = ;

: type ( str -- newstr )
    {
        { [ dup serious? ] [ drop "S" ] }
        { [ dup neutral? ] [ drop "N" ] }
        { [ dup mixed? ] [ drop "EQ" ] }
        { [ dup exclamation? ] [ drop "E" ] }
        { [ dup question? ] [ drop "Q" ] }
        [ drop "UNKNOWN" ]
    } cond ;

: sentences ( str -- newstr )
    sentence-enders [ type ] map "|" join ;

: show ( str -- )
    dup sentences " -> " glue 60 wrap-string print ;

"Hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it" show
nl
"(There was nary a mouse stirring.) But the cats were going
bonkers!" show
nl
"\"Why is the car so slow?\" she said." show
nl
"Hello, Mr. Anderson!" show
nl
"Are you sure?!?! How can you know?" show

Output:

Hi there, how are you today? I'd like to present to you the
washing machine 9001. You have been nominated to win one of
these! Just make sure you don't break it -> Q|S|E|N

(There was nary a mouse stirring.) But the cats were going
bonkers! -> S|E

"Why is the car so slow?" she said. -> S

Hello, Mr. Anderson! -> E

Are you sure?!?! How can you know? -> EQ|Q

Go

Translation of: Wren

package main

import (
    "fmt"
    "strings"
)

func sentenceType(s string) string {
    if len(s) == 0 {
        return ""
    }
    var types []string
    for _, c := range s {
        if c == '?' {
            types = append(types, "Q")
        } else if c == '!' {
            types = append(types, "E")
        } else if c == '.' {
            types = append(types, "S")
        }
    }
    if strings.IndexByte("?!.", s[len(s)-1]) == -1 {
        types = append(types, "N")
    }
    return strings.Join(types, "|")
}

func main() {
    s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
    fmt.Println(sentenceType(s))
}

Output:

Q|S|E|N

jq

Works with: jq

Works with gojq, the Go implementation of jq

The following parses sentences with embedded quotations naively, so that for example the sentence "He asked 'How are you?'." results in: Q S

# Input: a string
# Output: a stream of sentence type indicators
def sentenceTypes:
  def trim: sub("^ +";"") | sub(" +$";"");
  def parse:
    capture("(?<s>[^?!.]*)(?<p>[?!.])(?<remainder>.*)" )
    // {p:"", remainder:""};
  def encode:
    if   . == "?" then "Q"
    elif . == "!" then "E"
    elif . == "." then "S"
    else "N"
    end;
  trim
  | select(length>0)
  | parse
  | (.p | encode), (.remainder | sentenceTypes);

def s: "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it";

s | sentenceTypes

Output:

Q
S
E
N

Julia

const text = """
Hi there, how are you today? I'd like to present to you the washing machine 9001.
You have been nominated to win one of these! Just make sure you don't break it"""

haspunctotype(s) = '.' in s ? "S" : '!' in s ? "E" : '?' in s ? "Q" : "N"

text = replace(text, "\n" => " ")
parsed = strip.(split(text, r"(?:(?:(?<=[\?\!\.])(?:))|(?:(?:)(?=[\?\!\.])))"))
isodd(length(parsed)) && push!(parsed, "")  # if ends without pnctuation
for i in 1:2:length(parsed)-1
    println(rpad(parsed[i] * parsed[i + 1], 52),  " ==> ", haspunctotype(parsed[i + 1]))
end

Output:

Hi there, how are you today?                         ==> Q
I'd like to present to you the washing machine 9001. ==> S
You have been nominated to win one of these!         ==> E
Just make sure you don't break it                    ==> N

Lua

text = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
p2t = { [""]="N", ["."]="S", ["!"]="E", ["?"]="Q" }
for s, p in text:gmatch("%s*([^%!%?%.]+)([%!%?%.]?)") do
  print(s..p..":  "..p2t[p])
end

Output:

hi there, how are you today?:  Q
I'd like to present to you the washing machine 9001.:  S
You have been nominated to win one of these!:  E
Just make sure you don't break it:  N

Perl

use strict;
use warnings;
use feature 'say';
use Lingua::Sentence;

my $para1 = <<'EOP';
hi there, how are you today? I'd like to present to you the washing machine
9001. You have been nominated to win one of these! Just make sure you don't
break it
EOP

my $para2 = <<'EOP';
Just because there are punctuation characters like "?", "!" or especially "."
present, it doesn't necessarily mean you have reached the end of a sentence,
does it Mr. Magoo? The syntax highlighting here for Perl isn't bad at all.
EOP

my $splitter = Lingua::Sentence->new("en");
for my $text ($para1, $para2) {
  for my $s (split /\n/, $splitter->split( $text =~ s/\n//gr ) {
    print "$s| ";
    if    ($s =~ /!$/)  { say 'E' }
    elsif ($s =~ /\?$/) { say 'Q' }
    elsif ($s =~ /\.$/) { say 'S' }
    else                { say 'N' }
  }
}

Output:

hi there, how are you today?| Q
I'd like to present to you the washing machine 9001.| S
You have been nominated to win one of these!| E
Just make sure you don't break it.| N
Just because there are punctuation characters like "?", "!" or especially "." present, it doesn't necessarily mean you have reached the end of a sentence, does it Mr. Magoo?| Q
The syntax highlighting here for Perl isn't bad at all.| S

Phix

with javascript_semantics
constant s = `hi there, how are you today? I'd like to present 
to you the washing machine 9001. You have been nominated to win 
one of these! Just make sure you don't break it`
sequence t = split_any(trim(s),"?!."),
         u = substitute_all(s,t,repeat("|",length(t))),
         v = substitute_all(u,{"|?","|!","|.","|"},"QESN"),
         w = join(v,'|')
?w

Output:

"Q|S|E|N"

Python

import re

txt = """
Hi there, how are you today? I'd like to present to you the washing machine 9001.
You have been nominated to win one of these! Just make sure you don't break it"""

def haspunctotype(s):
    return 'S' if '.' in s else 'E' if '!' in s else 'Q' if '?' in s else 'N'

txt = re.sub('\n', '', txt)
pars = [s.strip() for s in re.split("(?:(?:(?<=[\?\!\.])(?:))|(?:(?:)(?=[\?\!\.])))", txt)]
if len(pars) % 2:
    pars.append('')  # if ends without punctuation
for i in range(0, len(pars)-1, 2):
    print((pars[i] + pars[i + 1]).ljust(54), "==>", haspunctotype(pars[i + 1]))

Output:

Hi there, how are you today?                           ==> Q
I'd like to present to you the washing machine 9001.   ==> S
You have been nominated to win one of these!           ==> E
Just make sure you don't break it                      ==> N

Or for more generality, and an alternative to hand-crafted regular expressions:

'''Grouping and tagging by final character of string'''

from functools import reduce
from itertools import groupby


# tagGroups :: Dict -> [String] -> [(String, [String])]
def tagGroups(tagDict):
    '''A list of (Tag, SentenceList) tuples, derived
       from an input text and a supplied dictionary of
       tags for each of a set of final punctuation marks.
    '''
    def go(sentences):
        return [
            (tagDict.get(k, 'Not punctuated'), list(v))
            for (k, v) in groupby(
                sorted(sentences, key=last),
                key=last
            )
        ]
    return go


# sentenceSegments :: Chars -> String -> [String]
def sentenceSegments(punctuationChars):
    '''A list of sentences delimited by the supplied
       punctuation characters, where these are followed
       by spaces.
    '''
    def go(s):
        return [
            ''.join(cs).strip() for cs
            in splitBy(
                sentenceBreak(punctuationChars)
            )(s)
        ]
    return go


# sentenceBreak :: Chars -> (Char, Char) -> Bool
def sentenceBreak(finalPunctuation):
    '''True if the first of two characters is a final
       punctuation mark and the second is a space.
    '''
    def go(a, b):
        return a in finalPunctuation and " " == b
    return go


# ------------------------- TEST -------------------------
# main :: IO ()
def main():
    '''Join, segmentation, tags'''

    tags = {'!': 'E', '?': 'Q', '.': 'S'}

    # Joined by spaces,
    sample = ' '.join([
        "Hi there, how are you today?",
        "I'd like to present to you the washing machine 9001.",
        "You have been nominated to win one of these!",
        "Might it be possible to add some challenge to this task?",
        "Feels as light as polystyrene filler.",
        "But perhaps substance isn't the goal!",
        "Just make sure you don't break off before the"
    ])

    # segmented by punctuation,
    sentences = sentenceSegments(
        tags.keys()
    )(sample)

    # and grouped under tags.
    for kv in tagGroups(tags)(sentences):
        print(kv)


# ----------------------- GENERIC ------------------------

# last :: [a] -> a
def last(xs):
    '''The last element of a non-empty list.'''
    return xs[-1]


# splitBy :: (a -> a -> Bool) -> [a] -> [[a]]
def splitBy(p):
    '''A list split wherever two consecutive
       items match the binary predicate p.
    '''
    # step :: ([[a]], [a], a) -> a -> ([[a]], [a], a)
    def step(acp, x):
        acc, active, prev = acp

        return (acc + [active], [x], x) if p(prev, x) else (
            (acc, active + [x], x)
        )

    # go :: [a] -> [[a]]
    def go(xs):
        if 2 > len(xs):
            return xs
        else:
            h = xs[0]
            ys = reduce(step, xs[1:], ([], [h], h))
            # The accumulated sublists, and the final group.
            return ys[0] + [ys[1]]

    return go


# MAIN ---
if __name__ == '__main__':
    main()

Output:

('E', ['You have been nominated to win one of these!', "But perhaps substance isn't the goal!"])
('S', ["I'd like to present to you the washing machine 9001.", 'Feels as light as polystyrene filler.'])
('Q', ['Hi there, how are you today?', 'Might it be possible to add some challenge to this task?'])
('Not punctuated', ["Just make sure you don't break off before the"])

Raku

use Lingua::EN::Sentence;

my $paragraph = q:to/PARAGRAPH/;
hi there, how are you today? I'd like to present to you the washing machine
9001. You have been nominated to win one of these! Just make sure you don't
break it


Just because there are punctuation characters like "?", "!" or especially "."
present, it doesn't necessarily mean you have reached the end of a sentence,
does it Mr. Magoo? The syntax highlighting here for Raku isn't the best.
PARAGRAPH

say join "\n\n", $paragraph.&get_sentences.map: {
    /(<:punct>)$/;
    $_ ~ ' | ' ~ do
    given $0 {
        when '!' { 'E' };
        when '?' { 'Q' };
        when '.' { 'S' };
        default  { 'N' };
    }
}

Output:

hi there, how are you today? | Q

I'd like to present to you the washing machine
9001. | S

You have been nominated to win one of these! | E

Just make sure you don't
break it | N

Just because there are punctuation characters like "?", "!" or especially "."
present, it doesn't necessarily mean you have reached the end of a sentence,
does it Mr. Magoo? | Q

The syntax highlighting here for Raku isn't the best. | S

Vlang

Translation of: go

fn sentence_type(s string) string {
    if s.len == 0 {
        return ""
    }
    mut types := []string{}
    for c in s.split('') {
        if c == '?' {
            types << "Q"
        } else if c == '!' {
            types << "E"
        } else if c == '.' {
            types << "S"
        }
    }
    if s[s.len-1..s.len].index_any('?!.') == -1 {
        types << "N"
    }
    return types.join("|")
}
 
fn main() {
    s := "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
    println(sentence_type(s))
}

Output:

Q|S|E|N

Wren

var sentenceType = Fn.new { |s|
    if (s.count == 0) return ""
    var types = []
    for (c in s) {
        if (c == "?") {
            types.add("Q")
        } else if (c == "!") {
            types.add("E")
        } else if (c == ".") {
            types.add("S")
        }
    }
    if (!"?!.".contains(s[-1])) types.add("N")
    return types.join("|")
}

var s = "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it"
System.print(sentenceType.call(s))

Output:

Q|S|E|N

Library: Wren-pattern

Library: Wren-trait

The following alternative version takes the simplistic view that (unless they end the final sentence of the paragraph) ?, ! or . will only end a sentence if they're immediately followed by a space. This of course is nonsense, given the way English is written nowadays, but it's probably an improvement on the first version without the need to search through an inevitably incomplete list of abbreviations.

import "./pattern" for Pattern
import "./trait" for Indexed

var map = { "?": "Q", "!": "E", ".": "S", "": "N" }
var p = Pattern.new("[? |! |. ]")
var paras = [
    "hi there, how are you today? I'd like to present to you the washing machine 9001. You have been nominated to win one of these! Just make sure you don't break it",
    "hi there, how are you on St.David's day (isn't it a holiday yet?), Mr.Smith? I'd like to present to you (well someone has to win one!) the washing machine 900.1. You have been nominated by Capt.Johnson('?') to win one of these! Just make sure you (or Mrs.Smith) don't break it. By the way, what the heck is an exclamatory question!?"
]

for (para in paras) {
    para = para.trim()
    var sentences = p.splitAll(para)
    var endings = p.findAll(para).map { |m| m.text[0] }.toList
    var lastChar = sentences[-1][-1]
    if ("?!.".contains(lastChar)) {
        endings.add(lastChar)
        sentences[-1] = sentences[-1][0...-1]
    } else {
        endings.add("")
    }
    for (se in Indexed.new(sentences)) {
        var ix = se.index
        var sentence = se.value
        System.print("%(map[endings[ix]]) <- %(sentence + endings[ix])")
    }
    System.print()
}

Output:

Q <- hi there, how are you today?
S <- I'd like to present to you the washing machine 9001.
E <- You have been nominated to win one of these!
N <- Just make sure you don't break it

Q <- hi there, how are you on St.David's day (isn't it a holiday yet?), Mr.Smith?
S <- I'd like to present to you (well someone has to win one!) the washing machine 900.1.
E <- You have been nominated by Capt.Johnson('?') to win one of these!
S <- Just make sure you (or Mrs.Smith) don't break it.
Q <- By the way, what the heck is an exclamatory question!?

XPL0

include xpllib; \for StrLen
int  Sentence, N, Len;
char Str;
[Sentence:= ["hi there, how are you today?",
             "I'd like to present to you the washing machine 9001.",
             "You have been nominated to win one of these!",
             "Just make sure you don't break it"];
for N:= 0 to 3 do
    [Str:= Sentence(N);
    Len:= StrLen(Str);
    case Str(Len-1) of
      ^!: ChOut(0, ^E);
      ^?: ChOut(0, ^Q);
      ^.: ChOut(0, ^S)
    other ChOut(0, ^N);
    if N < 3 then ChOut(0, ^|);
    ];
]

Output:

Q|S|E|N