WiktionaryDumps to words: Difference between revisions

Added OCaml
(Draft Task)
 
(Added OCaml)
Line 1:
Use the [https://dumps.wikimedia.org/enwiktionary/latest/enwiktionary-latest-pages-articles.xml.bz2 wiktionary dump] to create a file equivalent than [http://manpages.ubuntu.com/manpages/bionic/man5/french.5.html "/usr/share/dict/french"]. Demonstrate how your language can handle this dump which is a big bz2'ed XML file of about 800MB. The "/usr/share/dict/french" file contains one word of the French language by line in a text file. This file is available in Ubuntu with the package '''wfrench'''.
 
=={{header|OCaml}}==
 
<lang ocaml>let () =
let i = Xmlm.make_input ~strip:true (`Channel stdin) in
let title = ref "" in
let tag_path = ref [] in
let push_tag tag =
tag_path := tag :: !tag_path
in
let pop_tag () =
match !tag_path with [] -> ()
| _ :: tl -> tag_path := tl
in
let last_tag_is tag =
match !tag_path with [] -> false
| hd :: _ -> hd = tag
in
while not (Xmlm.eoi i) do
match Xmlm.input i with
| `Dtd dtd -> ()
| `El_start ((uri, tag_name), attrs) -> push_tag tag_name
| `El_end -> pop_tag ()
| `Data s ->
if last_tag_is "title"
then title := s;
if last_tag_is "text"
then begin
let reg = Str.regexp_string "==French==" in
if Str.string_match reg s 0
then print_endline !title
end
done</lang>