Rosetta Code/Find bare lang tags
You are encouraged to solve this task according to the task description, using any language you may know.
- Task
Find all <lang> tags without a language specified in the text of a page.
Display counts by language section:
Description <lang>Pseudocode</lang> =={{header|C}}== <lang C>printf("Hello world!\n");</lang> =={{header|Perl}}== <lang>print "Hello world!\n"</lang>
should display something like
2 bare language tags. 1 in perl 1 in no language
- Extra credit
Allow multiple files to be read. Summarize all results by language:
5 bare language tags. 2 in c ([[Foo]], [[Bar]]) 1 in perl ([[Foo]]) 2 in no language ([[Baz]])
- Extra extra credit
Use the Media Wiki API to test actual RC tasks.
AutoHotkey
This code has no syntax highlighting, because Rosetta Code's highlighter fails with code that contains literal </lang> tags.
Stole RegEx Needle from Perl
task = ( Description <lang>Pseudocode</lang> =={{header|C}}== <lang C>printf("Hello world!\n");</lang> =={{header|Perl}}== <lang>print "Hello world!\n"</lang> ) lang := "no language", out := Object(lang, 0), total := 0 Loop Parse, task, `r`n If RegExMatch(A_LoopField, "==\s*{{\s*header\s*\|\s*([^\s\}]+)\s*}}\s*==", $) lang := $1, out[lang] := 0 else if InStr(A_LoopField, "<lang>") out[lang]++ For lang, num in Out If num total++, str .= "`n" num " in " lang MsgBox % clipboard := total " bare lang tags.`n" . str
Output:
2 bare lang tags. 1 in no language 1 in Perl
Erlang
<lang Erlang> -module( find_bare_lang_tags ).
-export( [task/0] ).
task() -> {ok, Binary} = file:read_file( "priv/find_bare_lang_tags_1" ), Lines = string:tokens( erlang:binary_to_list(Binary), "\n" ), {_Lang, Dict} = lists:foldl( fun count_empty_lang/2, {"no language", dict:new()}, Lines ), Count_langs = [{dict:fetch(X, Dict), X} || X <- dict:fetch_keys(Dict)], io:fwrite( "~p bare language tags.~n", [lists:sum([X || {X, _Y} <- Count_langs])] ), [io:fwrite( "~p in ~p~n", [X, Y] ) || {X, Y} <- Count_langs].
count_empty_lang( Line, {Lang, Dict} ) -> Empty_lang = string:str( Line, "<lang>" ), New_dict = dict_update_counter( Empty_lang, Lang, Dict ), New_lang = new_lang( string:str( Line,"==[[:Category:{{{1}}}|{{{1}}}]] [[Category:{{{1}}}]] Property "Implemented in language" (as page type) with input value "{{{1}}}" contains invalid characters or is incomplete and therefore can cause unexpected results during a query or annotation process.==" ), string:sub_string( Line, Start+1, Stop-1 ). </lang>
- Output:
60> find_bare_lang_tags:task(). 2 bare language tags. 1 in "no language" 1 in "Perl"
Go
<lang go>package main
import (
"fmt" "io/ioutil" "log" "os" "regexp" "strings"
)
type header struct {
start, end int lang string
}
type data struct {
count int names *[]string
}
func newData(count int, name string) *data {
return &data{count, &[]string{name}}
}
var bmap = make(map[string]*data)
func add2bmap(lang, name string) {
pd := bmap[lang] if pd != nil { pd.count++ *pd.names = append(*pd.names, name) } else { bmap[lang] = newData(1, name) }
}
func check(err error) {
if err != nil { log.Fatal(err) }
}
func main() {
expr := `==\s*Template:\s*header\s*\\s*==` expr2 := fmt.Sprintf("<%s>.*?</%s>", "lang", "lang") r := regexp.MustCompile(expr) r2 := regexp.MustCompile(expr2) fileNames := []string{"example.txt", "example2.txt", "example3.txt"} for _, fileName := range fileNames { f, err := os.Open(fileName) check(err) b, err := ioutil.ReadAll(f) check(err) f.Close() text := string(b) fmt.Printf("Contents of %s:\n\n%s\n\n", fileName, text) m := r.FindAllStringIndex(text, -1) headers := make([]header, len(m)) if len(m) > 0 { for i, p := range m { headers[i] = header{p[0], p[1] - 1, ""} } m2 := r.FindAllStringSubmatch(text, -1) for i, s := range m2 { headers[i].lang = strings.ToLower(s[1]) } } last := len(headers) - 1 if last == -1 { // if there are no headers in the file add a dummy one headers = append(headers, header{-1, -1, "no language"}) last = 0 } m3 := r2.FindAllStringIndex(text, -1) for _, p := range m3 { if p[1] < headers[0].start { add2bmap("no language", fileName) } else if p[0] > headers[last].end { add2bmap(headers[last].lang, fileName) } else { for i := 0; i < last; i++ { if p[0] > headers[i].end && p[0] < headers[i+1].start { add2bmap(headers[i].lang, fileName) break } } } } } fmt.Println("Results:\n") count := 0 for _, v := range bmap { count += v.count } fmt.Printf(" %d bare language tags.\n\n", count) for k, v := range bmap { fmt.Printf(" %d in %-11s %v\n", v.count, k, *v.names) }
}</lang>
- Output:
Contents of example.txt: Description <lang>Pseudocode</lang> =={{header|C}}== <lang C>printf("Hello world!\n");</lang> =={{header|Perl}}== <lang>print "Hello world!\n"</lang> Contents of example2.txt: =={{header|C}}== <lang>printf("Hello again world!\n");</lang> =={{header|Perl}}== <lang perl>print "Hello again world!\n"</lang> Contents of example3.txt: <lang>Some more pseudocode</lang> =={{header|C}}== <lang>printf("Hello once again world!\n");</lang> Results: 5 bare language tags. 2 in no language [example.txt example3.txt] 1 in perl [example.txt] 2 in c [example2.txt example3.txt]
Groovy
<lang groovy>import java.util.function.Predicate import java.util.regex.Matcher import java.util.regex.Pattern
class FindBareTags {
private static final Pattern TITLE_PATTERN = Pattern.compile("\"title\": \"([^\"]+)\"") private static final Pattern HEADER_PATTERN = Pattern.compile("==\\{\\{header\\|([^}]+)}}==") private static final Predicate<String> BARE_PREDICATE = Pattern.compile("<lang>").asPredicate()
static String download(URL target) { URLConnection connection = target.openConnection() connection.setRequestProperty("User-Agent", "Firefox/2.0.0.4")
InputStream is = connection.getInputStream() return is.getText("UTF-8") }
static void main(String[] args) { URI titleUri = URI.create("http://rosettacode.org/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks") String titleText = download(titleUri.toURL()) if (titleText != null) { Matcher titleMatcher = TITLE_PATTERN.matcher(titleText)
Map<String, Integer> countMap = new HashMap<>() while (titleMatcher.find()) { String title = titleMatcher.group(1)
URI pageUri = new URI("http", null, "//rosettacode.org/wiki", "action=raw&title=$title", null) String pageText = download(pageUri.toURL()) if (pageText != null) { String language = "no language" for (String line : pageText.readLines()) { Matcher headerMatcher = HEADER_PATTERN.matcher(line) if (headerMatcher.matches()) { language = headerMatcher.group(1) continue }
if (BARE_PREDICATE.test(line)) { int count = countMap.get(language, 0) + 1 countMap.put(language, count) } } } else { println("Got an error reading the task page") } }
for (Map.Entry<String, Integer> entry : countMap.entrySet()) { println("$entry.value in $entry.key") } } else { println("Got an error reading the title page") } }
}</lang>
- Output:
2 in Scilab 1 in R 1 in Ursa 3 in EasyLang 1 in Factor 1 in uBasic/4tH 2 in Caché ObjectScript 1 in 4DOS Batch 1 in PostScript
Haskell
There are actually many different Regex packages available for Haskell. For this example, I chose TDFA, a very fast POSIX ERE engine. To change engines, simply change the import statement. If you use a Perl-style RE engine, you'll have to modify the expressions slightly.
This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. Additionally, if you specify the -w flag in the first argument, it will take a list of Rosetta Code wiki pages and search them. Note that the page names must be as they appear in your URL bar -- underscores in place of spaces.
<lang Haskell>import System.Environment import Network.HTTP import Text.Printf import Text.Regex.TDFA import Data.List import Data.Array import qualified Data.Map as Map
{-| Takes a string and cuts out the text matched in the MatchText array. -} splitByMatches :: String -> [MatchText String] -> [String] splitByMatches str matches = foldr splitHead [str] matches
where splitHead match acc = before:after:(tail acc) where before = take (matchOffset).head$ acc after = drop (matchOffset + matchLen).head$ acc matchOffset = fst.snd.(!0)$ match matchLen = snd.snd.(!0)$ match
{-| Takes a string and counts the number of time a valid, but bare, lang tag
appears. It does not attempt to ignore valid tags inside lang blocks. -}
countBareLangTags :: String -> Int countBareLangTags = matchCount (makeRegex "<langspace:*>" :: Regex)
{-| Takes a string and counts the number of bare lang tags per section of the
text. All tags before the first section are put into the key "". -}
countByLanguage :: String -> Map.Map String Int countByLanguage str = Map.fromList.filter ((>0).snd)$ zip langs counts
where counts = map countBareLangTags.splitByMatches str$ allMatches langs = "":(map (fst.(!1)) allMatches) allMatches = matchAllText (makeRegex headerRegex :: Regex) str headerRegex = "==space:*{{space:*headerspace:*\\|space:*([^ }]*)space:*}}[^=]*=="
main = do
args <- getArgs (contents, files) <- if length args == 0 then do -- If there aren't arguments, read from stdin content <- getContents return ([content],[""]) else if length args == 1 then do -- If there's only one argument, read the file, but don't display -- the filename in the results. content <- readFile (head args) return ([content],[""]) else if (args !! 0) == "-w" then do -- If there's more than one argument and the first one is the -w option, -- use the rest of the arguments as page titles and load them from the wiki. contents <- mapM getPageContent.tail$ args return (contents, if length args > 2 then tail args else [""]) else do -- Otherwise, read all the files and display their file names. contents <- mapM readFile args return (contents, args) let tagsPerLang = map countByLanguage contents let tagsWithFiles = zipWith addFileToTags files tagsPerLang let combinedFiles = Map.unionsWith combine tagsWithFiles printBareTags combinedFiles where addFileToTags file = Map.map (flip (,) [file]) combine cur next = (fst cur + fst next, snd cur ++ snd next)
printBareTags :: Map.Map String (Int,[String]) -> IO () printBareTags tags = do
let numBare = Map.foldr ((+).fst) 0 tags printf "%d bare language tags:\n\n" numBare mapM_ (\(lang,(count,files)) -> printf "%d in %s%s\n" count (if lang == "" then "no language" else lang) (filesString files) ) (Map.toAscList tags)
filesString :: [String] -> String filesString [] = "" filesString ("":rest) = filesString rest filesString files = " ("++listString files++")"
where listString [file] = ""++file++"" listString (file:files) = ""++file++", "++listString files
getPageContent :: String -> IO String getPageContent title = do
response <- simpleHTTP.getRequest$ url getResponseBody response where url = "http://rosettacode.org/mw/index.php?action=raw&title="++title</lang>
Here are the input files I used to test:
example1.wiki ------------------------------------------------------------- Description <lang>Pseudocode</lang> =={{header|C}}== <lang C>printf("Hello world!\n");</lang> =={{header|Perl}}== <lang>print "Hello world!\n"</lang>
example2.wiki ------------------------------------------------------------- Description <lang>Pseudocode</lang> =={{header|C}}== <lang>printf("Hello world!\n");</lang> =={{header|Perl}}== <lang>print "Hello world!\n"</lang> <lang Perl>print "Goodbye world!\n"</lang> =={{header|Haskell}}== <lang>hubris lang = "I'm so much better than a "++lang++" programmer because I program in Haskell."</lang>
And the output:
6 bare language tags: 2 in no language ([[example1.wiki]], [[example2.wiki]]) 1 in C ([[example2.wiki]]) 1 in Haskell ([[example2.wiki]]) 2 in Perl ([[example1.wiki]], [[example2.wiki]])
Additionally, I tested with 100_doors and Huffman_coding. The following resulted:
5 bare language tags: 1 in no language ([[100_doors]]) 1 in C ([[Huffman_coding]]) 1 in CoffeeScript ([[Huffman_coding]]) 1 in Perl ([[Huffman_coding]]) 1 in PostScript ([[100_doors]])
Icon and Unicon
The following is a Unicon-specific solution. <lang unicon>import Utils # To get the FindFirst class
procedure main()
keys := ["[[:Category:([^}]+)|([^}]+)]] [[Category:([^}]+)]] Property "Implemented in language" (as page type) with input value "([^}]+)" contains invalid characters or is incomplete and therefore can cause unexpected results during a query or annotation process.==");
private static final Predicate<String> BARE_PREDICATE = Pattern.compile("<lang>").asPredicate();
public static void main(String[] args) throws Exception { var client = HttpClient.newBuilder().build();
URI titleUri = URI.create(BASE + "/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks"); var titleRequest = HttpRequest.newBuilder(titleUri).GET().build();
var titleResponse = client.send(titleRequest, HttpResponse.BodyHandlers.ofString()); if (titleResponse.statusCode() == 200) { var titleBody = titleResponse.body();
var titleMatcher = TITLE_PATTERN.matcher(titleBody); var titleList = titleMatcher.results().map(mr -> mr.group(1)).collect(Collectors.toList());
var countMap = new HashMap<String, Integer>(); for (String title : titleList) { var pageUri = new URI("http", null, "//rosettacode.org/wiki", "action=raw&title=" + title, null); var pageRequest = HttpRequest.newBuilder(pageUri).GET().build(); var pageResponse = client.send(pageRequest, HttpResponse.BodyHandlers.ofString()); if (pageResponse.statusCode() == 200) { var pageBody = pageResponse.body();
AtomicReference<String> language = new AtomicReference<>("no language"); pageBody.lines().forEach(line -> { var headerMatcher = HEADER_PATTERN.matcher(line); if (headerMatcher.matches()) { language.set(headerMatcher.group(1)); } else if (BARE_PREDICATE.test(line)) { int count = countMap.getOrDefault(language.get(), 0) + 1; countMap.put(language.get(), count); } }); } else { System.out.printf("Got a %d status code%n", pageResponse.statusCode()); } }
for (Map.Entry<String, Integer> entry : countMap.entrySet()) { System.out.printf("%d in %s%n", entry.getValue(), entry.getKey()); } } else { System.out.printf("Got a %d status code%n", titleResponse.statusCode()); } }
}</lang>
Julia
<lang julia>using Gumbo, AbstractTrees, HTTP, JSON, Dates
rosorg = "http://rosettacode.org" qURI = "/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=json" qdURI = "/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Draft_Programming_Tasks&cmlimit=500&format=json" sqURI = "http://www.rosettacode.org/mw/index.php?title="
function topages(js, v)
for d in js["query"]["categorymembers"] push!(v, sqURI * HTTP.Strings.escapehtml(replace(d["title"], " " => "_")) * "&action=raw") end
end
function getpages(uri)
wikipages = Vector{String}() response = HTTP.request("GET", rosorg * uri) if response.status == 200 fromjson = JSON.parse(String(response.body)) topages(fromjson, wikipages) while haskey(fromjson, "continue") cmcont, cont = fromjson["continue"]["cmcontinue"], fromjson["continue"]["continue"] response = HTTP.request("GET", rosorg * uri * "&cmcontinue=$cmcont&continue=$cont") fromjson = JSON.parse(String(response.body)) topages(fromjson, wikipages) end end wikipages
end
function processtaskpages(wpages, verbose=false)
totalcount = 0 langcount = Dict{String, Int}() for pag in wpages count = 0 checked = 0 try response = HTTP.request("GET", pag) if response.status == 200 doc = parsehtml(String(response.body)) lasttext = "" for elem in StatelessBFS(doc.root) if typeof(elem) != HTMLText if tag(elem) == :lang if isempty(attrs(elem)) count += 1 if lasttext != "" if verbose println("Missing lang attibute for lang $lasttext") end if !haskey(langcount, lasttext) langcount[lasttext] = 1 else langcount[lasttext] += 1 end end else checked += 1 end end else m = match(r"header\|(.+)}}==", text(elem)) lasttext = (m == nothing) ? "" : m.captures[1] end end end catch y if verbose println("Page $pag is not loaded or found: $y.") end continue end if count > 0 && verbose println("Page $pag had $count bare lang tags.") end totalcount += count end println("Total bare tags: $totalcount.") for k in sort(collect(keys(langcount))) println("Total bare <lang> for language $k: $(langcount[k])") end
end
println("Programming examples at $(DateTime(now())):") qURI |> getpages |> processtaskpages
println("\nDraft programming tasks:") qdURI |> getpages |> processtaskpages
</lang>
- Output:
Programming examples at 2019-02-19T06:33:49.951: Total bare tags: 1044. Total bare <lang> for language 360 Assembly: 2 Total bare <lang> for language 6502 Assembly: 1 Total bare <lang> for language 6800 Assembly: 2 Total bare <lang> for language ALGOL 60: 1 Total bare <lang> for language ALGOL 68: 1 Total bare <lang> for language ALGOL-M: 1 Total bare <lang> for language APL: 1 Total bare <lang> for language ATS: 3 Total bare <lang> for language Aime: 1 Total bare <lang> for language AutoIt: 1 Total bare <lang> for language BASIC256: 2 Total bare <lang> for language BBC BASIC: 1 Total bare <lang> for language Batch File: 2 Total bare <lang> for language Bracmat: 3 Total bare <lang> for language Burlesque: 1 Total bare <lang> for language C: 1 Total bare <lang> for language C sharp|C#: 1 Total bare <lang> for language COBOL: 2 Total bare <lang> for language Caché ObjectScript: 2 Total bare <lang> for language Ceylon: 1 Total bare <lang> for language Chapel: 1 Total bare <lang> for language ChucK: 11 Total bare <lang> for language CoffeeScript: 1 Total bare <lang> for language Cubescript: 1 Total bare <lang> for language DCL: 3 Total bare <lang> for language DEC BASIC-PLUS: 1 Total bare <lang> for language Dart: 3 Total bare <lang> for language Delphi: 1 Total bare <lang> for language ECL: 6 Total bare <lang> for language ERRE: 23 Total bare <lang> for language EchoLisp: 1 Total bare <lang> for language Erlang: 2 Total bare <lang> for language Euler Math Toolbox: 6 Total bare <lang> for language Euphoria: 1 Total bare <lang> for language Excel: 2 Total bare <lang> for language Factor: 16 Total bare <lang> for language Forth: 16 Total bare <lang> for language Fortran: 2 Total bare <lang> for language FreeBASIC: 1 Total bare <lang> for language Futhark: 1 Total bare <lang> for language FutureBasic: 2 Total bare <lang> for language GAP: 1 Total bare <lang> for language GFA Basic: 10 Total bare <lang> for language Gambas: 1 Total bare <lang> for language Haskell: 2 Total bare <lang> for language Icon}} and {{header|Unicon: 4 Total bare <lang> for language Idris: 1 Total bare <lang> for language Io: 2 Total bare <lang> for language J: 1 Total bare <lang> for language Java: 1 Total bare <lang> for language JavaScript: 2 Total bare <lang> for language Julia: 2 Total bare <lang> for language K: 1 Total bare <lang> for language LOLCODE: 1 Total bare <lang> for language Latitude: 1 Total bare <lang> for language Liberty BASIC: 1 Total bare <lang> for language Limbo: 1 Total bare <lang> for language Lua: 1 Total bare <lang> for language M2000 Interpreter: 2 Total bare <lang> for language M4: 1 Total bare <lang> for language MUMPS: 1 Total bare <lang> for language Maple: 6 Total bare <lang> for language Mathematica: 14 Total bare <lang> for language Mathematica}} / {{header|Wolfram Language: 8 Total bare <lang> for language Mathprog: 3 Total bare <lang> for language Maxima: 3 Total bare <lang> for language Mercury: 20 Total bare <lang> for language Modula-2: 1 Total bare <lang> for language N/t/roff: 1 Total bare <lang> for language Nim: 2 Total bare <lang> for language Octave: 1 Total bare <lang> for language PARI/GP: 1 Total bare <lang> for language PHL: 1 Total bare <lang> for language PL/I: 15 Total bare <lang> for language Perl: 6 Total bare <lang> for language Raku: 1 Total bare <lang> for language PicoLisp: 4 Total bare <lang> for language PostScript: 13 Total bare <lang> for language ProDOS: 2 Total bare <lang> for language QB64: 1 Total bare <lang> for language R: 4 Total bare <lang> for language REXX: 1 Total bare <lang> for language Racket: 6 Total bare <lang> for language Raven: 1 Total bare <lang> for language Ring: 5 Total bare <lang> for language Rust: 4 Total bare <lang> for language SAS: 1 Total bare <lang> for language Scala: 2 Total bare <lang> for language Scheme: 3 Total bare <lang> for language Scilab: 41 Total bare <lang> for language Simula: 1 Total bare <lang> for language Stata: 5 Total bare <lang> for language Swift: 5 Total bare <lang> for language TI-83 BASIC: 1 Total bare <lang> for language TI-89 BASIC: 1 Total bare <lang> for language Trith: 1 Total bare <lang> for language UNIX Shell: 1 Total bare <lang> for language Unicon: 2 Total bare <lang> for language Ursa: 1 Total bare <lang> for language Visual Basic .NET: 1 Total bare <lang> for language Viua VM assembly: 1 Total bare <lang> for language Wart: 1 Total bare <lang> for language XSLT: 1 Total bare <lang> for language XSLT 2.0: 2 Total bare <lang> for language ooRexx: 2 Total bare <lang> for language smart BASIC: 1 Total bare <lang> for language uBasic/4tH: 72 Total bare <lang> for language x86 Assembly: 1 Total bare <lang> for language zkl: 2 Total bare <lang> for language zonnon: 1 Total bare <lang> for language МК-61/52: 62 Draft programming tasks: Total bare tags: 30. Total bare <lang> for language 1C: 1 Total bare <lang> for language AppleScript: 1 Total bare <lang> for language CoffeeScript: 1 Total bare <lang> for language Dart: 1 Total bare <lang> for language Factor: 2 Total bare <lang> for language Forth: 2 Total bare <lang> for language Glagol: 1 Total bare <lang> for language M2000 Interpreter: 1 Total bare <lang> for language Mathematica: 1 Total bare <lang> for language Racket: 1 Total bare <lang> for language uBasic/4tH: 1 Total bare <lang> for language МК-61/52: 2
Kotlin
<lang scala>import java.net.URI import java.net.http.HttpClient import java.net.http.HttpRequest import java.net.http.HttpResponse import java.util.regex.Pattern import java.util.stream.Collectors
const val BASE = "http://rosettacode.org"
fun main() {
val client = HttpClient.newBuilder().build()
val titleUri = URI.create("$BASE/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks") val titleRequest = HttpRequest.newBuilder(titleUri).GET().build()
val titleResponse = client.send(titleRequest, HttpResponse.BodyHandlers.ofString()) if (titleResponse.statusCode() == 200) { val titleBody = titleResponse.body()
val titlePattern = Pattern.compile("\"title\": \"([^\"]+)\"") val titleMatcher = titlePattern.matcher(titleBody) val titleList = titleMatcher.results().map { it.group(1) }.collect(Collectors.toList())
val headerPattern = Pattern.compile("==\\{\\{header\\|([^}]+)}}==") val barePredicate = Pattern.compile("<lang>").asPredicate()
val countMap = mutableMapOf<String, Int>() for (title in titleList) { val pageUri = URI("http", null, "//rosettacode.org/wiki", "action=raw&title=$title", null) val pageRequest = HttpRequest.newBuilder(pageUri).GET().build() val pageResponse = client.send(pageRequest, HttpResponse.BodyHandlers.ofString()) if (pageResponse.statusCode() == 200) { val pageBody = pageResponse.body()
//println("Title is $title") var language = "no language" for (line in pageBody.lineSequence()) { val headerMatcher = headerPattern.matcher(line) if (headerMatcher.matches()) { language = headerMatcher.group(1) continue }
if (barePredicate.test(line)) { countMap[language] = countMap.getOrDefault(language, 0) + 1 } } } else { println("Got a ${titleResponse.statusCode()} status code") } }
for (entry in countMap.entries) { println("${entry.value} in ${entry.key}") } } else { println("Got a ${titleResponse.statusCode()} status code") }
}</lang>
- Output:
1 in 4DOS Batch 2 in Caché ObjectScript 3 in EasyLang 1 in PostScript 2 in Scilab 1 in uBasic/4tH 1 in Ursa 1 in Factor 1 in R
Maple
<lang Maple>#Did not count the tasks where languages tasks are properly closed add_lan := proc(language, n, existence, languages, pos) if (assigned(existence[language])) then existence[language] += n: return pos; else existence[language] := n: languages(pos) := language: return pos+1; end if; end proc: count_tags := proc(tasks, pos) local task, url, txt, header_tags, close_tags, close_len, header_len, occurence, i, pos_copy; pos_copy := pos: for task in tasks do url := cat("http://www.rosettacode.org/mw/index.php?title=", StringTools:-Encode(StringTools:-SubstituteAll(task["title"], " ", "_"), 'percent'), "&action=raw"): txt := URL:-Get(url): header_tags := [StringTools:-SearchAll("==[[:Category:{{{1}}}|{{{1}}}]] [[Category:{{{1}}}]] Property "Implemented in language" (as page type) with input value "{{{1}}}" contains invalid characters or is incomplete and therefore can cause unexpected results during a query or annotation process.==",txt)]: close_len := numelems(close_tags): header_len := numelems(header_tags): if header_len = 0 then break; end if; if (not header_len = close_len) then printf("%s is not counted since some language tags are not properly closed.\n", task["title"]); break; end if; occurence := numelems([StringTools:-SearchAll("<lang>", txt[1..header_tags[1]])]): if occurence > 0 then pos_copy := add_lan("no languages", occurence, existence, languages, pos_copy): end if: if close_len > 1 then for i from 2 to close_len do occurence := numelems([StringTools:-SearchAll("<lang>", txt[header_tags[i-1]..header_tags[i]])]): if occurence > 0 then pos_copy := add_lan(txt[header_tags[i-1]+11..close_tags[i-1]-1], occurence, existence, languages, pos_copy): end if: end do: occurence := numelems([StringTools:-SearchAll("<lang>", txt[header_tags[-1]..])]): if occurence > 0 then pos_copy := add_lan(txt[header_tags[-1]+11..close_tags[-1]-1], occurence, existence, languages, pos_copy): end if: end if: end do: return pos_copy: end proc:
existence := table(): languages := Array(): pos := 1:
- go through every task
x := JSON:-ParseFile("http://rosettacode.org/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=10&format=json"): pos := count_tags(x["query"]["categorymembers"], pos): while(assigned(x["continue"]["cmcontinue"])) do continue := x["continue"]["cmcontinue"]: more_tasks:= cat("http://rosettacode.org/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=10&format=json", "&continue=", x["continue"]["continue"], "&cmcontinue=", x["continue"]["cmcontinue"]): x := JSON:-ParseFile(more_tasks): pos := count_tags(x["query"]["categorymembers"], pos): end do:
- Prints out the table
total := 0: for lan in languages do total += existence[lan]: printf("There are %d bare lang tags in %s\n", existence[lan], lan); end do: printf("Total number %d", total); </lang>
- Output:
15 Puzzle Game is not counted since some language tags are not properly closed. Abstract type is not counted since some language tags are not properly closed. Almost prime is not counted since some language tags are not properly closed. ... Zig-zag matrix is not counted since some language tags are not properly closed. There are 1 bare lang tags in 4DOS Batch There are 6 bare lang tags in Caché ObjectScript There are 8 bare lang tags in PostScript There are 34 bare lang tags in Scilab There are 35 bare lang tags in uBasic/4tH There are 1 bare lang tags in Ursa There are 3 bare lang tags in PicoLisp There are 15 bare lang tags in CoffeeScript There are 29 bare lang tags in МК-61/52 There are 2 bare lang tags in APL There are 10 bare lang tags in ERRE There are 4 bare lang tags in Excel There are 2 bare lang tags in LiveCode There are 9 bare lang tags in Mercury There are 3 bare lang tags in ECL There are 3 bare lang tags in Maxima There are 19 bare lang tags in PL/I There are 4 bare lang tags in Ring There are 1 bare lang tags in zonnon There are 13 bare lang tags in Forth There are 9 bare lang tags in J There are 1 bare lang tags in Unicon There are 3 bare lang tags in Java There are 23 bare lang tags in C There are 11 bare lang tags in Common Lisp There are 3 bare lang tags in factor There are 3 bare lang tags in Racket There are 4 bare lang tags in N/t/roff There are 3 bare lang tags in UNIX Shell There are 3 bare lang tags in Scheme There are 2 bare lang tags in Korn Shell There are 4 bare lang tags in Fortran There are 10 bare lang tags in C sharp|C# There are 3 bare lang tags in Io There are 2 bare lang tags in Erlang There are 1 bare lang tags in F# There are 2 bare lang tags in Bracmat There are 3 bare lang tags in Rust There are 1 bare lang tags in FreeBASIC There are 1 bare lang tags in Gri There are 2 bare lang tags in Simula There are 1 bare lang tags in smart BASIC There are 7 bare lang tags in Mathematica}} / {{header|Wolfram Language There are 1 bare lang tags in Aime There are 2 bare lang tags in GFA Basic There are 1 bare lang tags in Visual Basic .NET There are 3 bare lang tags in Raku There are 3 bare lang tags in Swift There are 1 bare lang tags in no languages There are 2 bare lang tags in Maple There are 1 bare lang tags in M4 There are 1 bare lang tags in FutureBasic There are 1 bare lang tags in Potion There are 2 bare lang tags in PowerShell There are 2 bare lang tags in QB64 There are 2 bare lang tags in Batch File There are 8 bare lang tags in ChucK There are 8 bare lang tags in Euler Math Toolbox There are 1 bare lang tags in gnuplot There are 2 bare lang tags in ooRexx There are 7 bare lang tags in Mathprog There are 1 bare lang tags in PHP There are 5 bare lang tags in Perl There are 4 bare lang tags in Python There are 1 bare lang tags in Haskell There are 1 bare lang tags in jq There are 7 bare lang tags in Mathematica There are 2 bare lang tags in DCL There are 1 bare lang tags in R There are 5 bare lang tags in XSLT There are 2 bare lang tags in Clojure There are 1 bare lang tags in REXX There are 3 bare lang tags in XSLT 2.0 There are 1 bare lang tags in Sinclair ZX81 BASIC There are 2 bare lang tags in Stata There are 1 bare lang tags in Wart There are 1 bare lang tags in BBC BASIC There are 1 bare lang tags in Euphoria There are 1 bare lang tags in 6800 Assembly There are 2 bare lang tags in Dart There are 3 bare lang tags in Factor There are 1 bare lang tags in F Sharp There are 1 bare lang tags in zkl There are 1 bare lang tags in Chapel There are 1 bare lang tags in Ceylon There are 1 bare lang tags in PARI/GP There are 1 bare lang tags in C++ There are 1 bare lang tags in JavaScript There are 1 bare lang tags in Powershell There are 2 bare lang tags in Delphi There are 1 bare lang tags in Gambas There are 1 bare lang tags in ACL2 There are 1 bare lang tags in TypeScript There are 1 bare lang tags in AutoHotkey There are 1 bare lang tags in Elixir There are 1 bare lang tags in Raven There are 1 bare lang tags in 360 Assembly There are 1 bare lang tags in LOLCODE There are 1 bare lang tags in COBOL Total number 416
Mathematica/Wolfram Language
<lang Mathematica>tasks[page_: ""] :=
Module[{res = Import["http://rosettacode.org/mw/api.php?format=xml&action=\
query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=\ 500" <> page, "XML"]},
If[MemberQ[res2, 3, XMLElement["query-continue", __]], Join[res2, 3, 1, 3, 1, 3, All, 2, 3, 2, tasks["&cmcontinue=" <> res2, 3, 2, 3, 1, 2, 1, 2]], res2, 3, 1, 3, 1, 3, All, 2, 3, 2]];
bareTags = # -> (# -> StringCount[#2, "<lang>"] &) @@@
Partition[ Prepend[StringSplit[ Import["http://rosettacode.org/wiki?action=raw&title=" <> URLEncode[#], "Text"], Shortest["==" ~~ x__ ~~ "=="] :> x], "no language"] //. {a___, multi_String?StringContainsQ["}}" ~~ ___ ~~ "[[:Category:"], bare_Integer, b___} :> {a, StringSplit[multi, "}"]1, bare, StringSplit[multi, "|"], bare_Integer, b___} :> {a, StringSplit[multi, "}"]1, bare, StringSplit[multi, "]] [[Category:"], bare_Integer, b___} :> {a, StringSplit[multi, "}"]1, bare, StringSplit[multi, "]] Property "Implemented in language" (as page type) with input value ""], </br> bare_Integer, b___} :> {a, StringSplit[multi, "}"]1, </br> bare, StringSplit[multi, "" contains invalid characters or is incomplete and therefore can cause unexpected results during a query or annotation process.== */ call testLang /* " " <lang ααα> or <lang>*/ end /*recs*/
call lineout iFID /*close the file, just in case its open*/ say recs ' records read from file: ' iFID; say /*show number of records read from file*/ if bare==0 then bare= 'no'; say right(bare, 9) " bare language tags."; say
do #=1 for words(head); _= word(head, #) /*maybe show <lang> for language ααα */ if !._\==0 then say right(!._, 9) ' in' _ /*show the count for a particular lang.*/ end /*#*/
if noLa==0 then noLa= 'no'; say right(noLa, 9) " in no specified language." exit 0 /*──────────────────────────────────────────────────────────────────────────────────────*/ testHead: @head= '==[[:Category:{{{1}}}|{{{1}}}]] [[Category:{{{1}}}]] Property "Implemented in language" (as page type) with input value "{{{1}}}" contains invalid characters or is incomplete and therefore can cause unexpected results during a query or annotation process.==" /*define two literals. */
hh= pos(@head, $ ); if hh==0 then return /*get start of literal.*/ or= hh + length(@head) - 1 /*get position of | */ hb= pos(@foot, $, or); if hb==0 then return /*get position of foot.*/ head= substr($, or+1, hb-or-1) /*get the language name*/ if head\== then header= head /*Header? Then use it.*/ if wordpos(head, heads)==0 then heads= heads head /*Is lang? Add──► list*/ return
/*──────────────────────────────────────────────────────────────────────────────────────*/ testLang: @lang= '<lang'; @back= ">" /*define two literals. */
s1= pos(@lang, $ ); if s1==0 then return /*get start of literal.*/ gt= pos(@back, $, s1+1) /*get position of < */ lang= strip( substr($, gt-2, gt-length(@lang) -1 ) ) /*get the language name*/ if lang== then bare= bare + 1 /*No lang? Bump bares.*/ else @lang= lang /*Is lang? Set lang. */ if @lang\== & header== then noLa= noLa + 1 /*bump noLang counter.*/ if @lang\== & header\== then !.head= !.head + 1 /*bump a lang " */ return</lang>
- output when using the default input:
9 records read from file: BARELANG.HTM 2 bare language tags. 1 in Perl 1 in no specified language.
Ruby
Quoting from the FAQ: "If you just want the raw wikitext without any other information whatsoever, it's best to use index.php's action=raw mode instead of the API" <lang Ruby>require "open-uri" require "cgi"
tasks = ["Greatest_common_divisor", "Greatest_element_of_a_list", "Greatest_subsequential_sum"] part_uri = "http://rosettacode.org/wiki?action=raw&title=" Report = Struct.new(:count, :tasks) result = Hash.new{|h,k| h[k] = Report.new(0, [])}
tasks.each do |task|
puts "processing #{task}" current_lang = "no language" open(part_uri + CGI.escape(task)).each_line do |line| current_lang = Regexp.last_match["lang"] if /==\{\{header\|(?<lang>.+)\}\}==/ =~ line num_no_langs = line.scan(/<lang\s*>/).size if num_no_langs > 0 then result[current_lang].count += num_no_langs result[current_lang].tasks << task end end
end
puts "\n#{result.values.map(&:count).inject(&:+)} bare language tags.\n\n" result.each{|k,v| puts "#{v.count} in #{k} (#{v.tasks})"}</lang>
- Output:
processing Greatest_common_divisor processing Greatest_element_of_a_list processing Greatest_subsequential_sum 10 bare language tags. 2 in Euler Math Toolbox (["Greatest_common_divisor", "Greatest_element_of_a_list"]) 1 in gnuplot (["Greatest_common_divisor"]) 1 in Bracmat (["Greatest_element_of_a_list"]) 2 in МК-61/52 (["Greatest_element_of_a_list", "Greatest_element_of_a_list"]) 1 in ooRexx (["Greatest_element_of_a_list"]) 2 in Mathprog (["Greatest_subsequential_sum", "Greatest_subsequential_sum"]) 1 in PHP (["Greatest_subsequential_sum"])
Rust
<lang rust> extern crate regex;
use std::io; use std::io::prelude::*;
use regex::Regex;
fn find_bare_lang_tags(input: &str) -> Vec<(Option<String>, i32)> {
let mut language_pairs = vec![]; let mut language = None; let mut counter = 0_i32;
let header_re = Regex::new(r"==\{\{header\|(?P<lang>alpha:+)\}\}==").unwrap();
for line in input.lines() { if let Some(captures) = header_re.captures(line) { if let Some(header_lang) = captures.name("lang") { language_pairs.push((language, counter)); language = Some(header_lang.as_str().to_owned()); counter = 0; } }
if line.contains("<lang>") { counter += 1; } }
language_pairs.push((language, counter)); language_pairs
}
fn main() {
let stdin = io::stdin(); let mut buf = String::new(); stdin.lock().read_to_string(&mut buf).unwrap(); let results = find_bare_lang_tags(&buf); let total_bare = results.iter().map(|r| r.1).sum::<i32>();
println!("{} bare language tags.\n", total_bare); for result in &results { let num_bare = result.1;
if num_bare > 0 { println!( "{} in {}", result.1, result .0 .to_owned() .unwrap_or_else(|| String::from("no language")) ); } }
} </lang>
- Output:
Scala
To analyse RosettaCode pages, invoke Java with -Dhttp.agent=Anything
to work around CloudFlare blocking Java from accessing the RosettaCode site.
<lang Scala>// Map lines to a list of Option(heading -> task) for each bare lang tag found.
val headerFormat = "==[{]+header[|]([^}]*)[}]+==".r
val langFormat = "<lang([^>]*)>".r
def mapped(lines: Seq[String], taskName: String = "") = {
var heading = "" for (line <- lines; head = headerFormat.findFirstMatchIn(line).map(_ group 1); lang = langFormat.findFirstMatchIn(line).map(_ group 1)) yield { if (head.isDefined) heading = head.get lang.map(_.trim).filter(_ == "").map(_ => heading -> taskName) }
} // Group results as a Map(heading -> task1, task2, ...) def reduced(results: Seq[Option[(String,String)]]) =
results.flatten.groupBy(_._1).mapValues(_.unzip._2)
// Format each heading as "tasklist.size in heading (tasklist)" def format(results: Map[String,Seq[String]]) = results.map{case (heading, tasks) =>
val h = if (heading.length > 0) heading else "no langauge" val hmsg = s"${tasks.size} in $h" val t = tasks.filterNot(_ == "") val tmsg = if (t.isEmpty) "" else t.distinct.mkString(" (", ",", ")") hmsg + tmsg
} def count(results: Map[String,Seq[String]]) = results.values.map(_.size).sum
// Single and multi-source support case class BareLangFinder(source: scala.io.Source, taskName: String = "") {
def map = mapped(source.getLines.toSeq, taskName) def mapReduce = reduced(map) def summary = format(mapReduce) mkString "\n"
} def mapReduce(inputs: Seq[BareLangFinder]) = reduced(inputs.flatMap(_.map))</lang> Examples:
val test = """ Description <lang>Pseudocode</lang> =={{header|C}}== <lang C>printf("Hello world!\n");</lang> =={{header|Perl}}== <lang>print "Hello world!\n"</lang> """ println(BareLangFinder(scala.io.Source.fromString(test)).summary) // System.setProperty("http.agent", "RosettaCode/1.0") val tasks = List("Greatest_common_divisor", "Greatest_element_of_a_list", "Greatest_subsequential_sum") val inputs = for (task <- tasks; url = "http://rosettacode.org/wiki?action=raw&title=" + task) yield BareLangFinder(scala.io.Source.fromURL(url), task) val bare = mapReduce(inputs) println println(s"${count(bare)} bare language tags in ${tasks.size} tasks:") println(format(bare) mkString "\n")
- Output:
1 in Perl 1 in no langauge 10 bare language tags in 3 tasks: 2 in Mathprog (Greatest_subsequential_sum) 1 in gnuplot (Greatest_common_divisor) 2 in МК-61/52 (Greatest_element_of_a_list) 1 in Bracmat (Greatest_element_of_a_list) 1 in PHP (Greatest_subsequential_sum) 2 in Euler Math Toolbox (Greatest_common_divisor,Greatest_element_of_a_list) 1 in ooRexx (Greatest_element_of_a_list)
Tcl
For all the extra credit (note, takes a substantial amount of time due to number of HTTP requests):
<lang tcl>package require Tcl 8.5 package require http package require json package require textutil::split package require uri
proc getUrlWithRedirect {base args} {
set url $base?[http::formatQuery {*}$args] while 1 {
set t [http::geturl $url] if {[http::status $t] ne "ok"} { error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $token]" } if {[string match 2?? [http::ncode $t]]} { return $t } # OK, but not 200? Must be a redirect... set url [uri::resolve $url [dict get [http::meta $t] Location]] http::cleanup $t
}
}
proc get_tasks {category} {
global cache if {[info exists cache($category)]} {
return $cache($category)
} set query [dict create cmtitle Category:$category] set tasks [list] while {1} {
set response [getUrlWithRedirect http://rosettacode.org/mw/api.php \ action query list categorymembers format json cmlimit 500 {*}$query]
# Get the data out of the message
set data [json::json2dict [http::data $response]] http::cleanup $response # add tasks to list foreach task [dict get $data query categorymembers] { lappend tasks [dict get [dict create {*}$task] title] } if {[catch {
dict get $data query-continue categorymembers cmcontinue } continue_task]} then {
# no more continuations, we're done break } dict set query cmcontinue $continue_task } return [set cache($category) $tasks]
} proc getTaskContent task {
set token [getUrlWithRedirect http://rosettacode.org/mw/index.php \
title $task action raw]
set content [http::data $token] http::cleanup $token return $content
}
proc init {} {
global total count found set total 0 array set count {} array set found {}
} proc findBareTags {pageName pageContent} {
global total count found set t {{}} lappend t {*}[textutil::split::splitx $pageContent \
{==\s*\{\{\s*header\s*\|\s*([^{}]+?)\s*\}\}\s*==}]
foreach {sectionName sectionText} $t {
set n [regexp -all {<lang>} $sectionText] if {!$n} continue incr count($sectionName) $n lappend found($sectionName) $pageName incr total $n
}
} proc printResults {} {
global total count found puts "$total bare language tags." if {$total} {
puts "" if {[info exists found()]} { puts "$count() in task descriptions\ (\[\[[join $found() {]], [[}]\]\])" unset found() } foreach sectionName [lsort -dictionary [array names found]] { puts "$count($sectionName) in $sectionName\ (\[\[[join $found($sectionName) {]], [[}]\]\])" }
}
}
init set tasks [get_tasks Programming_Tasks]
- puts stderr "querying over [llength $tasks] tasks..."
foreach task [get_tasks Programming_Tasks] {
#puts stderr "$task..." findBareTags $task [getTaskContent $task]
} printResults</lang>
Wren
Uses Go's example files. <lang ecmascript>import "/ioutil" for FileUtil import "/pattern" for Pattern import "/set" for Set import "/sort" for Sort import "/fmt" for Fmt
var p = Pattern.new("/=/=Template:Header//=/=", Pattern.start) var bareCount = 0 var bareLang = {} for (fileName in ["example.txt", "example2.txt", "example3.txt"]) {
var lines = FileUtil.readLines(fileName) var lastHeader = "No language" for (line in lines) { line = line.trimStart() if (line == "") continue var m = p.find(line) if (m) { lastHeader = m.capsText[0] continue } if (line.startsWith("<lang>")) { bareCount = bareCount + 1 var value = bareLang[lastHeader] if (value) { value[0] = value[0] + 1 value[1].add(fileName) } else { bareLang[lastHeader] = [1, Set.new([fileName])] } } }
} System.print("%(bareCount) bare language tags:") for (me in bareLang) {
var lang = me.key var count = me.value[0] var names = me.value[1].toList Sort.insertion(names) Fmt.print(" $2d in $-11s $n", count, lang, names)
}</lang>
- Output:
5 bare language tags: 2 in C [example2.txt, example3.txt] 1 in Perl [example.txt] 2 in No language [example.txt, example3.txt]
zkl
Uses shared library cURL. <lang zkl>var [const] CURL=Import("zklCurl"),
partURI="http://rosettacode.org/wiki?action=raw&title=%s", langRE=RegExp(0'!\s*==\s*Template:\s*header\s*\!), // == zkl emptyRE=RegExp(0'!<lang\s*>!);
fcn findEmptyTags(a,b,c,etc){ // -->[lang:(task,task...)]
results:=Dictionary(); foreach task in (vm.arglist){ println("processing ",task); currentLang:=""; page:=CURL().get(partURI.fmt(CURL.urlEncode(task))); foreach line in (page[0]){
if(langRE.search(line,True)){ lang:=langRE.matched[1].strip(); if(lang) currentLang=lang; } if(emptyRE.matches(line,True)) results.appendV(currentLang,task);
} } results
}</lang> <lang zkl>results:=findEmptyTags("Greatest_common_divisor", "Greatest_element_of_a_list", "Greatest_subsequential_sum"); println("\n%d bare language tags:".fmt(results.values.apply("len").sum(0))); foreach lang in (results.keys.sort()){
tasks:=results[lang]; println("%d in %s: %s".fmt(tasks.len(),lang,tasks.concat(",")));
}</lang>
- Output:
processing Greatest_common_divisor processing Greatest_element_of_a_list processing Greatest_subsequential_sum 14 bare language tags: 1 in Bracmat: Greatest_element_of_a_list 1 in ERRE: Greatest_subsequential_sum 2 in Euler Math Toolbox: Greatest_common_divisor,Greatest_element_of_a_list 2 in Mathprog: Greatest_subsequential_sum,Greatest_subsequential_sum 1 in PHP: Greatest_subsequential_sum 1 in Ring: Greatest_common_divisor 1 in gnuplot: Greatest_common_divisor 1 in ooRexx: Greatest_element_of_a_list 2 in uBasic/4tH: Greatest_common_divisor,Greatest_element_of_a_list 2 in МК-61/52: Greatest_element_of_a_list,Greatest_element_of_a_list