Remove vowels from a string: Difference between revisions

From Rosetta Code
Content added Content deleted
Line 188: Line 188:
Output : Phx Prgrmmng Lngg
Output : Phx Prgrmmng Lngg
</pre>
</pre>
If you want something a bit more like Julia/Raku/Python, the following should work, but you have to provide your own vowel-set, or maybe nick/merge the one from Julia
If you want something a bit more like Julia/Raku/Python, the following should work, but you have to provide your own vowel-set, or maybe nick/merge the ones from Julia and REXX
<lang Phix>constant vowels = utf8_to_utf32("AEIOUİÖaeiouæáåäéêıóöú"),
<lang Phix>constant vowels = utf8_to_utf32("AEIOUİÖaeiouæáåäéêıóöú"),
s = """
s = """

Revision as of 14:42, 26 July 2020

Remove vowels from a string is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Remove vowels from a string

ALGOL 68

<lang algol68>BEGIN

   # returns s with the vowels removed #
   OP DEVOWEL = ( STRING s )STRING:
      BEGIN
           [ LWB s : UPB s ]CHAR result;
           INT r pos := LWB result - 1;
           FOR s pos FROM LWB s TO UPB s DO
               IF NOT char in string( s[ s pos ], NIL, "aeiouAEIOU" ) THEN
                   # have a non-vowel - add it to the output #
                   r pos +:= 1;
                   result[ r pos ] := s[ s pos ]
               FI
           OD;
           result[ LWB s : r pos ]
      END # DEVOWEL # ;
   # tests the DEVOWEL operator #
   PROC test devowel = ( STRING s )VOID:
        print( ( "<", s, "> -> <", DEVOWEL s, ">", newline ) );
   # some test cases #
   test devowel( ""                              );
   test devowel( "aAeEiIoOuU"                    );
   test devowel( "bcdfghjklmnprstvwxyz"          );
   test devowel( "abcdefghijklmnoprstuvwxyz"     );
   test devowel( "Algol 68 Programming Language" )

END</lang>

Output:
<> -> <>
<aAeEiIoOuU> -> <>
<bcdfghjklmnprstvwxyz> -> <bcdfghjklmnprstvwxyz>
<abcdefghijklmnoprstuvwxyz> -> <bcdfghjklmnprstvwxyz>
<Algol 68 Programming Language> -> <lgl 68 Prgrmmng Lngg>

Delphi

<lang Delphi> program Remove_vowels_from_a_string;

{$APPTYPE CONSOLE}

{$R *.res}

uses

 System.SysUtils;

function RemoveVowels(const s: string): string; const

 VOWELS =['a', 'e', 'i', 'o', 'u', 'A', 'E', 'I', 'O', 'U'];

var

 c: char;

begin

 Result := ;
 for c in s do
 begin
   if not (c in VOWELS) then
     Result := Result + c;
 end;

end;

const

 TEST = 'The quick brown fox jumps over the lazy dog';

begin

 Writeln('Before: ', TEST);
 Writeln('After:  ', RemoveVowels(TEST));
 Readln;

end.

</lang>

Output:
Before: The quick brown fox jumps over the lazy dog
After:  Th qck brwn fx jmps vr th lzy dg

F#

The Function

<lang fsharp> let stripVowels n=let g=set['a';'e';'i';'o';'u';'A';'E';'I';'O';'U'] in n|>Seq.filter(fun n->not(g.Contains n))|>Array.ofSeq|>System.String printfn "%s" (stripVowels "Nigel Galloway") </lang>

Output:
"Ngl Gllwy"

Factor

<lang factor>USING: formatting kernel sets ;

without-vowels ( str -- new-str ) "aeiouAEIOU" without ;

"Factor Programming Language" dup without-vowels " Input string: %s\nWithout vowels: %s\n" printf</lang>

Output:
  Input string: Factor Programming Language
Without vowels: Fctr Prgrmmng Lngg

Go

<lang go>package main

import (

   "fmt"
   "strings"

)

func removeVowels(s string) string {

   var sb strings.Builder
   vowels := "aeiouAEIOU"
   for _, c := range s {
       if !strings.ContainsAny(string(c), vowels) {
           sb.WriteRune(c)
       }
   }
   return sb.String()

}

func main() {

   s := "Go Programming Language"
   fmt.Println("Input  :", s)
   fmt.Println("Output :", removeVowels(s))

}</lang>

Output:
Input  : Go Programming Language
Output : G Prgrmmng Lngg

Julia

Unicode sensitive, using the Raku version example text. <lang julia>const ALLVOWELS = Dict(ch => 1 for ch in Vector{Char}("AÀÁÂÃÄÅĀĂĄǺȀȂẠẢẤẦẨẪẬẮẰẲẴẶḀÆǼEȄȆḔḖḘḚḜẸẺẼẾỀỂỄỆĒĔĖĘĚÈÉÊËIȈȊḬḮỈỊĨĪĬĮİÌÍÎÏIJOŒØǾȌȎṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢŌÒÓŎŐÔÕÖUŨŪŬŮŰŲÙÚÛÜȔȖṲṴṶṸṺỤỦỨỪỬỮỰ")) const ALLVOWELSY = Dict(ch => 1 for ch in Vector{Char}("AÀÁÂÃÄÅĀĂĄǺȀȂẠẢẤẦẨẪẬẮẰẲẴẶḀÆǼEȄȆḔḖḘḚḜẸẺẼẾỀỂỄỆĒĔĖĘĚÈÉÊËIȈȊḬḮỈỊĨĪĬĮİÌÍÎÏIJOŒØǾȌȎṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢŌÒÓŎŐÔÕÖUŨŪŬŮŰŲÙÚÛÜȔȖṲṴṶṸṺỤỦỨỪỬỮỰYẙỲỴỶỸŶŸÝ"))

isvowel(ch, yisavowel=false) = haskey(yisavowel ? ALLVOWELSY : ALLVOWELS, uppercase(ch))

const testtext = """

  Norwegian, Icelandic, German, Turkish, French, Spanish, English:
  Undervisningen skal være gratis, i det minste på de elementære og grunnleggende trinn.
  Skal hún veitt ókeypis, að minnsta kosti barnafræðsla og undirstöðummentu.
  Hochschulunterricht muß allen gleichermaßen entsprechend ihren Fähigkeiten offenstehen.
  Öğrenim hiç olmazsa ilk ve temel safhalarında parasızdır. İlk öğretim mecburidir.
  L'éducation doit être gratuite, au moins en ce qui concerne l'enseignement élémentaire et fondamental.
  La instrucción elemental será obligatoria. La instrucción técnica y profesional habrá de ser generalizada.
  Education shall be free, at least in the elementary and fundamental stages."""

println("Removing vowels from:\n$testtext\n becomes:\n",

   String(filter(!isvowel, Vector{Char}(testtext))))

</lang>

Output:
Removing vowels from:
Norwegian, Icelandic, German, Turkish, French, Spanish, English:
Undervisningen skal være gratis, i det minste på de elementære og grunnleggende trinn.
Skal hún veitt ókeypis, að minnsta kosti barnafræðsla og undirstöðummentu.
Hochschulunterricht muß allen gleichermaßen entsprechend ihren Fähigkeiten offenstehen.
Öğrenim hiç olmazsa ilk ve temel safhalarında parasızdır. İlk öğretim mecburidir.
L'éducation doit être gratuite, au moins en ce qui concerne l'enseignement élémentaire et fondamental.
La instrucción elemental será obligatoria. La instrucción técnica y profesional habrá de ser generalizada.
Education shall be free, at least in the elementary and fundamental stages.
 becomes:
Nrwgn, clndc, Grmn, Trksh, Frnch, Spnsh, nglsh:
ndrvsnngn skl vr grts,  dt mnst p d lmntr g grnnlggnd trnn.
Skl hn vtt kyps, ð mnnst kst brnfrðsl g ndrstðmmnt.
Hchschlntrrcht mß lln glchrmßn ntsprchnd hrn Fhgktn ffnsthn.
ğrnm hç lmzs lk v tml sfhlrnd prszdr. lk ğrtm mcbrdr.
L'dctn dt tr grtt,  mns n c q cncrn l'nsgnmnt lmntr t fndmntl.
L nstrccn lmntl sr blgtr. L nstrccn tcnc y prfsnl hbr d sr gnrlzd.
dctn shll b fr, t lst n th lmntry nd fndmntl stgs.

Phix

<lang Phix>function not_vowel(integer ch)

   return find(lower(ch),"aeiou")=0

end function constant s = "Phix Programming Language" printf(1,"Input  : %s\nOutput : %s\n",{s,filter(s,not_vowel)})</lang>

Output:
Input  : Phix Programming Language
Output : Phx Prgrmmng Lngg

If you want something a bit more like Julia/Raku/Python, the following should work, but you have to provide your own vowel-set, or maybe nick/merge the ones from Julia and REXX <lang Phix>constant vowels = utf8_to_utf32("AEIOUİÖaeiouæáåäéêıóöú"), s = """ Norwegian, Icelandic, German, Turkish, French, Spanish, English: Undervisningen skal være gratis, i det minste på de elementære og grunnleggende trinn. Skal hún veitt ókeypis, að minnsta kosti barnafræðsla og undirstöðummentu. Hochschulunterricht muß allen gleichermaßen entsprechend ihren Fähigkeiten offenstehen. Öğrenim hiç olmazsa ilk ve temel safhalarında parasızdır. İlk öğretim mecburidir. L'éducation doit être gratuite, au moins en ce qui concerne l'enseignement élémentaire et fondamental. La instrucción elemental será obligatoria. La instrucción técnica y profesional habrá de ser generalizada. Education shall be free, at least in the elementary and fundamental stages. """

function remove_vowels(sequence s)

   s = utf8_to_utf32(s)
   for i=length(s) to 1 by -1 do
       if find(s[i],vowels) then s[i] = ' ' end if

-- if find(s[i],vowels) then s[i..i] = "" end if

   end for
   s = utf32_to_utf8(s)
   return s

end function printf(1,"%s\n",remove_vowels(s))</lang> (output deliberately not shown due to windows console effects, but it is the same as Raku, or Julia with the alternate find/replace line.)

Python

Works with: Python version 3.7

An over-zealous draft which (more or less) works for Roman scripts only, discarding all characters not found in a guest-list of consonants and non-alphabetics. Given the slightly parochial definition of Python's string.printable constant, glyphs like ß and ğ would need to b manually added to the guest-list. <lang python>Remove some subset of characters from a string

import string


  1. subsetOnly :: String -> String -> String

def subsetOnly(charList):

   A depleted string, including only the
      characters found in the given list.
   
   def go(s):
       return .join([
           c for c in s if c in charList
       ])
   return go


  1. -------------------------- TEST --------------------------
  2. main :: IO ()

def main():

   Test
   # Sample taken from Raku version:
   s = 
   Undervisningen skal være gratis, i det minste på de elementære og grunnleggende trinn.
   Skal hún veitt ókeypis, að minnsta kosti barnafræðsla og undirstöðummentu.
   Hochschulunterricht muß allen gleichermaßen entsprechend ihren Fähigkeiten offenstehen.
   Öğrenim hiç olmazsa ilk ve temel safhalarında parasızdır. İlk öğretim mecburidir.
   L'éducation doit être gratuite, au moins en ce qui concerne l'enseignement élémentaire et fondamental.
   La instrucción elemental será obligatoria. La instrucción técnica y profesional habrá de ser generalizada.
   Education shall be free, at least in the elementary and fundamental stages.
   angloVowels = 'AEIOUaeiou'
   consonantsAndNonAlpha = [
       c for c in string.printable if c not in angloVowels
   ]
   print(
       subsetOnly(
           consonantsAndNonAlpha
       )(s)
   )


  1. MAIN ---

if __name__ == '__main__':

   main()</lang>
Output:
    ndrvsnngn skl vr grts,  dt mnst p d lmntr g grnnlggnd trnn.
    Skl hn vtt kyps,  mnnst kst brnfrsl g ndrstmmnt.
    Hchschlntrrcht m lln glchrmn ntsprchnd hrn Fhgktn ffnsthn.
    rnm h lmzs lk v tml sfhlrnd prszdr. lk rtm mcbrdr.
    L'dctn dt tr grtt,  mns n c q cncrn l'nsgnmnt lmntr t fndmntl.
    L nstrccn lmntl sr blgtr. L nstrccn tcnc y prfsnl hbr d sr gnrlzd.
    dctn shll b fr, t lst n th lmntry nd fndmntl stgs.

Raku

Works with: Rakudo version 2020.07

Not going to bother with 'y', it's too touchy feely (How many vowels are in the word my? why? any?) and subject to interpretation.

Otherwise, this should work for most Latinate languages.

Apropos of nothing; reminds me of one of my favorite Between the Lions performances: Sloppy Pop - Sometimes Y

Spec is 'Remove vowels from a string'; nothing about what they should be replaced with. I chose to replace them with spaces.

Strings from http://mylanguages.org/. No affiliation, but it's a nice resource. (note: these are not all the same sentence but are all from the same paragraph. They frankly were picked based on their vowel load.)

<lang perl6>my @vowels = (0x20 .. 0x2fff).map: { .chr if .chr.samemark('x') ~~ m:i/<[aæeiıoœu]>/ }

my $text = q:to/END/;

  Norwegian, Icelandic, German, Turkish, French, Spanish, English:
  Undervisningen skal være gratis, i det minste på de elementære og grunnleggende trinn.
  Skal hún veitt ókeypis, að minnsta kosti barnafræðsla og undirstöðummentu.
  Hochschulunterricht muß allen gleichermaßen entsprechend ihren Fähigkeiten offenstehen.
  Öğrenim hiç olmazsa ilk ve temel safhalarında parasızdır. İlk öğretim mecburidir.
  L'éducation doit être gratuite, au moins en ce qui concerne l'enseignement élémentaire et fondamental.
  La instrucción elemental será obligatoria. La instrucción técnica y profesional habrá de ser generalizada.
  Education shall be free, at least in the elementary and fundamental stages.
  END

put $text.subst(/@vowels/, ' ', :g);</lang>

Output:
N rw g  n,  c l nd c, G rm n, T rk sh, Fr nch, Sp n sh,  ngl sh:
 nd rv sn ng n sk l v r  gr t s,   d t m nst  p  d   l m nt r   g gr nnl gg nd  tr nn.
Sk l h n v  tt  k yp s,  ð m nnst  k st  b rn fr ðsl   g  nd rst ð mm nt .
H chsch l nt rr cht m ß  ll n gl  ch rm ß n  ntspr ch nd  hr n F h gk  t n  ff nst h n.
 ğr n m h ç  lm zs   lk v  t m l s fh l r nd  p r s zd r.  lk  ğr t m m cb r d r.
L' d c t  n d  t  tr  gr t  t ,    m  ns  n c  q   c nc rn  l' ns  gn m nt  l m nt  r   t f nd m nt l.
L   nstr cc  n  l m nt l s r   bl g t r  . L   nstr cc  n t cn c  y pr f s  n l h br  d  s r g n r l z d .
 d c t  n sh ll b  fr  ,  t l  st  n th   l m nt ry  nd f nd m nt l st g s.

REXX

These two REXX versions remove the Latin (Roman) vowels and all those accented and Greek vowels that are supported in the   437   code page.

using the TRANSLATE BIF

This REXX version uses the   translate   BIF which works faster for longer strings as there is no character-by-character manipulation. <lang rexx>/*REXX program removes vowels (both lowercase and uppercase and accented) from a string.*/ parse arg x /*obtain optional argument from the CL.*/ if x= | x="," then x= 'REXX Programming Language' /*Not specified? Then use default*/ say ' input string: ' x vowels= 'AEIOUaeiou' || "üéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜáíóúªºαΩ" /*Latin + accented + Greek*/ $= translate( xrange(), ., ' ') /*define a string of almost all chars. */ q= substr($, verify($, x), 1) /*find a character NOT in the X string.*/ y= translate(x, q, " ") /*trans. blanks in the string (for now)*/ y= space(translate(y, , vowels), 0) /*trans. vowels──►blanks, elide blanks.*/ y= translate(y, , q) /*trans the Q characters back to blanks*/ say 'output string: ' y /*stick a fork in it, we're all done. */</lang>

output   when using the default input:
 input string:  REXX Programming Language
output string:  RXX Prgrmmng Lngg

using character eliding

This REXX version uses a character-by-character manipulation and should be easier to understand. <lang rexx>/*REXX program removes vowels (both lowercase and uppercase and accented) from a string.*/ parse arg x /*obtain optional argument from the CL.*/ if x= | x="," then x= 'REXX Programming Language' /*Not specified? Then use default*/ say ' input string: ' x vowels= 'AEIOUaeiou' || "üéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜáíóúªºαΩ" /*Latin + accented + Greek*/ x= . || x /*prefix string with a dummy character.*/

    do j=length(x)-1  by -1  for  length(x)-1   /*process the string from the back─end.*/
    _= pos( substr(x, j, 1), vowels)            /*is this particular character a vowel?*/
    if _==0  then iterate                       /*if zero  (not a vowel), then skip it.*/
    x= left(x, j - 1)  ||  substr(x, j + 1)     /*elide the vowel just detected from X.*/
    end   /*j*/

x= substr(x, 2) /*elide the prefixed dummy character. */ say 'output string: ' x /*stick a fork in it, we're all done. */</lang>

output   is identical to the 1st REXX version.



Ring

<lang ring> load "stdlib.ring" str = "Ring Programming Language" see "Input : " + str + nl for n = 1 to len(str)

   if isVowel(str[n])
      str = substr(str,str[n],"")
   ok

next see "String without vowels: " + str + nl </lang>

Output:
Input : Ring Programming Language
String without vowels: Rng Prgrmmng Lngg

Wren

<lang ecmascript>var removeVowels = Fn.new { |s| s.where { |c| !"aeiouAEIOU".contains(c) }.join() }

var s = "Wren Programming Language" System.print("Input  : %(s)") System.print("Output : %(removeVowels.call(s))")</lang>

Output:
Input  : Wren Programming Language
Output : Wrn Prgrmmng Lngg