Rosetta Code/Find bare lang tags: Difference between revisions

m
m (Reverted edits by Thundergnat (talk) to last revision by SqrtNegInf)
m (→‎{{header|Wren}}: Minor tidy)
 
(23 intermediate revisions by 13 users not shown)
Line 10:
 
<lang>Pseudocode</lang>
 
=={{header|C}}==
<lang C>printf("Hello world!\n");</lang>
 
=={{header|Perl}}==
<lang>print "Hello world!\n"</lang></nowiki>
</pre>
 
should display something like
<pre>
2 bare language tags.
 
1 in perl
1 in no language
</pre>
 
 
;Extra credit:
Allow multiple files to be read. &nbsp; Summarize all results by language:
<pre>
<nowiki>5 bare language tags.
 
2 in c ([[Foo]], [[Bar]])
1 in perl ([[Foo]])
2 in no language ([[Baz]])</nowiki>
</pre>
 
 
;Extra extra credit:
Use the &nbsp; [[mw:API:Main_page|Media Wiki API]] &nbsp; to test actual RC tasks.
<br><br>
 
=={{header|AutoHotkey}}==
Line 22 ⟶ 53:
 
=={{header|C}}==
<lang C>printf("Hello world!\n");</lang> =={{header|C}}==
<lang C>printf("Hello world!\n");</lang> =={{header|C}}==
<lang C>printf("Hello world!\n");</lang> =={{header|C}}==
<lang>printf("Hello again world!\n");</lang> =={{header|C}}==
<lang>printf("Hello once again world!\n");</lang>
 
=={{header|Perl}}==
<lang>print "Hello world!\n"</lang>
)
lang := "no language", out := Object(lang, 0), total := 0
Loop Parse, task, `r`n
If RegExMatch(A_LoopField, "==\s*{{\s*header\s*\|\s*([^\s\}]+)\s*}}\s*==", $)
lang := $1, out[lang] := 0
else if InStr(A_LoopField, "<lang>")
out[lang]++
For lang, num in Out
If num
total++, str .= "`n" num " in " lang
MsgBox % clipboard := total " bare lang tags.`n" . str</nowiki></pre>
Output:
<pre>2 bare lang tags.
 
1 in no language
Results:
1 in Perl</pre>
 
5 bare language tags.
 
2 in no language [example.txt example3.txt]
1 in perl [example.txt]
2 in c [example2.txt example3.txt]
</pre> =={{header|C}}==
<lang C>printf("Hello world!\n");</lang> =={{header|C}}==
<lang>printf("Hello world!\n");</lang> =={{header|C}}==
<lang C>printf("Hello world!\n");</lang>
 
=={{header|Erlang}}==
<langsyntaxhighlight lang=Erlang>
-module( find_bare_lang_tags ).
 
Line 71 ⟶ 104:
Stop = string:rstr( Line, "}}==" ),
string:sub_string( Line, Start+1, Stop-1 ).
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 81 ⟶ 114:
 
=={{header|Go}}==
<langsyntaxhighlight lang=go>package main
 
import (
Line 179 ⟶ 212:
fmt.Printf(" %d in %-11s %v\n", v.count, k, *v.names)
}
}</langsyntaxhighlight>
 
{{out}}
Line 188 ⟶ 221:
 
<lang>Pseudocode</lang>
 
=={{header|C}}==
<lang C>printf("Hello world!\n");</lang>
 
=={{header|Perl}}==
<lang>print "Hello world!\n"</lang>
 
 
Contents of example2.txt:
 
=={{header|C}}==
<lang>printf("Hello again world!\n");</lang>
 
=={{header|Perl}}==
<lang perl>print "Hello again world!\n"</lang>
 
 
Contents of example3.txt:
 
<lang>Some more pseudocode</lang>
 
=={{header|C}}==
<lang>printf("Hello once again world!\n");</lang>
 
 
Results:
 
5 bare language tags.
 
2 in no language [example.txt example3.txt]
1 in perl [example.txt]
2 in c [example2.txt example3.txt]
</pre>
 
=={{header|Groovy}}==
{{trans|Kotlin}}
<syntaxhighlight lang=groovy>import java.util.function.Predicate
import java.util.regex.Matcher
import java.util.regex.Pattern
 
class FindBareTags {
private static final Pattern TITLE_PATTERN = Pattern.compile("\"title\": \"([^\"]+)\"")
private static final Pattern HEADER_PATTERN = Pattern.compile("==\\{\\{header\\|([^}]+)}}==")
private static final Predicate<String> BARE_PREDICATE = Pattern.compile("<lang>").asPredicate()
 
static String download(URL target) {
URLConnection connection = target.openConnection()
connection.setRequestProperty("User-Agent", "Firefox/2.0.0.4")
 
InputStream is = connection.getInputStream()
return is.getText("UTF-8")
}
 
static void main(String[] args) {
URI titleUri = URI.create("http://rosettacode.org/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks")
String titleText = download(titleUri.toURL())
if (titleText != null) {
Matcher titleMatcher = TITLE_PATTERN.matcher(titleText)
 
Map<String, Integer> countMap = new HashMap<>()
while (titleMatcher.find()) {
String title = titleMatcher.group(1)
 
URI pageUri = new URI("http", null, "//rosettacode.org/wiki", "action=raw&title=$title", null)
String pageText = download(pageUri.toURL())
if (pageText != null) {
String language = "no language"
for (String line : pageText.readLines()) {
Matcher headerMatcher = HEADER_PATTERN.matcher(line)
if (headerMatcher.matches()) {
language = headerMatcher.group(1)
continue
}
 
if (BARE_PREDICATE.test(line)) {
int count = countMap.get(language, 0) + 1
countMap.put(language, count)
}
}
} else {
println("Got an error reading the task page")
}
}
 
for (Map.Entry<String, Integer> entry : countMap.entrySet()) {
println("$entry.value in $entry.key")
}
} else {
println("Got an error reading the title page")
}
}
}</syntaxhighlight>
{{out}}
<pre>2 in Scilab
1 in R
1 in Ursa
3 in EasyLang
1 in Factor
1 in uBasic/4tH
2 in Caché ObjectScript
1 in 4DOS Batch
1 in PostScript</pre>
 
=={{header|Haskell}}==
Line 194 ⟶ 329:
This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. Additionally, if you specify the -w flag in the first argument, it will take a list of Rosetta Code wiki pages and search them. Note that the page names must be as they appear in your URL bar -- underscores in place of spaces.
 
<langsyntaxhighlight lang=Haskell>import System.Environment
import Network.HTTP
import Text.Printf
Line 273 ⟶ 408:
response <- simpleHTTP.getRequest$ url
getResponseBody response
where url = "http://rosettacode.org/mw/index.php?action=raw&title="++title</langsyntaxhighlight>
 
Here are the input files I used to test:
Line 282 ⟶ 417:
Description
 
<lang>Pseudocode</lang> =={{header|Haskell}}==
 
=={{header|C}}==
<lang C>printf("Hello world!\n");</lang>
 
 
 
=={{header|Perl}}==
<lang>print "Hello world!\n"</lang>
</nowiki></pre>
<pre><nowiki>
example2.wiki
-------------------------------------------------------------
Description
 
<lang>Pseudocode</lang>
 
=={{header|C}}==
<lang>printf("Hello world!\n");</lang>
 
=={{header|Perl}}==
<lang>print "Hello world!\n"</lang>
<lang Perl>print "Goodbye world!\n"</lang>
 
=={{header|Haskell}}==
<lang>hubris lang = "I'm so much better than a "++lang++" programmer because I program in Haskell."</lang>
</nowiki></pre>
Line 311 ⟶ 470:
 
The following is a Unicon-specific solution.
<langsyntaxhighlight lang=unicon>import Utils # To get the FindFirst class
 
procedure main()
Line 329 ⟶ 488:
write(total," bare language tags:\n")
every pair := !sort(tags) do write(pair[2]," in ",pair[1])
end</langsyntaxhighlight>
 
Sample run using example given in problem statement:
Line 339 ⟶ 498:
1 in perl
->
</pre>
 
=={{header|Java}}==
<syntaxhighlight lang=java>import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
 
public class FindBareTags {
private static final String BASE = "http://rosettacode.org";
 
private static final Pattern TITLE_PATTERN = Pattern.compile("\"title\": \"([^\"]+)\"");
private static final Pattern HEADER_PATTERN = Pattern.compile("==\\{\\{header\\|([^}]+)}}==");
private static final Predicate<String> BARE_PREDICATE = Pattern.compile("<lang>").asPredicate();
 
public static void main(String[] args) throws Exception {
var client = HttpClient.newBuilder().build();
 
URI titleUri = URI.create(BASE + "/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks");
var titleRequest = HttpRequest.newBuilder(titleUri).GET().build();
 
var titleResponse = client.send(titleRequest, HttpResponse.BodyHandlers.ofString());
if (titleResponse.statusCode() == 200) {
var titleBody = titleResponse.body();
 
var titleMatcher = TITLE_PATTERN.matcher(titleBody);
var titleList = titleMatcher.results().map(mr -> mr.group(1)).collect(Collectors.toList());
 
var countMap = new HashMap<String, Integer>();
for (String title : titleList) {
var pageUri = new URI("http", null, "//rosettacode.org/wiki", "action=raw&title=" + title, null);
var pageRequest = HttpRequest.newBuilder(pageUri).GET().build();
var pageResponse = client.send(pageRequest, HttpResponse.BodyHandlers.ofString());
if (pageResponse.statusCode() == 200) {
var pageBody = pageResponse.body();
 
AtomicReference<String> language = new AtomicReference<>("no language");
pageBody.lines().forEach(line -> {
var headerMatcher = HEADER_PATTERN.matcher(line);
if (headerMatcher.matches()) {
language.set(headerMatcher.group(1));
} else if (BARE_PREDICATE.test(line)) {
int count = countMap.getOrDefault(language.get(), 0) + 1;
countMap.put(language.get(), count);
}
});
} else {
System.out.printf("Got a %d status code%n", pageResponse.statusCode());
}
}
 
for (Map.Entry<String, Integer> entry : countMap.entrySet()) {
System.out.printf("%d in %s%n", entry.getValue(), entry.getKey());
}
} else {
System.out.printf("Got a %d status code%n", titleResponse.statusCode());
}
}
}</syntaxhighlight>
 
=={{header|jq}}==
{{works with|jq}}
'''Also works with gojq, the Go implementation of jq.'''
 
'''Adapted from [[#Wren|Wren]]'''
<syntaxhighlight lang=jq>
def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;
 
def trim: sub("^[ \t]+";"") | sub("[ \t]+$";"");
 
# Insert into a sorted list using bsearch
def binsert($x):
(-bsearch($x) - 1) as $ix
| if $ix < 0 then .
else .[:$ix] + [$x] + .[$ix:]
end;
 
def report:
def header:
"==\\s*{{\\s*header\\s*\\|\\s*(?<title>[^\\s\\}]+)\\s*}}\\s*==";
 
reduce inputs as $line ( { bareCount:0, bareLang: {} };
if .fileName != input_filename
then .lastHeader = "No language"
| .fileName = input_filename
else .
end
| .line = ($line|trim)
| if .line | length == 0 then .
else .header = ((.line | capture(header)) // null)
| if .header
then .lastHeader = .header.title
elif .line|startswith("<lang>")
then .bareCount += 1
| .bareLang[.lastHeader][0] += 1
| .fileName as $fileName
| .bareLang[.lastHeader][1] |= binsert($fileName)
else .
end
end )
| "\(.bareCount) bare language tags:",
(.bareLang
| to_entries[] as {"key": $lang, "value": $value}
| $value[0] as $count
| $value[1] as $names
| ("\($count|lpad(3)) in \($lang|lpad(15))" + ": " + ($names | join(", ")) )) ;
 
report
</syntaxhighlight>
'''Invocation'''
 
Using the examples in the [[#Go|Go]] entry:
<pre>
jq -Rnr -f find-bare-lang-tags.jq rc-example1.txt rc-example2.txt
rc-example3.txt
</pre>
{{output}}
<pre>
5 bare language tags:
2 in No language: rc-example1.txt, rc-example3.txt
1 in Perl: rc-example1.txt
2 in C: rc-example2.txt, rc-example3.txt
</pre>
 
=={{header|Julia}}==
<langsyntaxhighlight lang=julia>using Gumbo, AbstractTrees, HTTP, JSON, Dates
 
rosorg = "http://rosettacode.org"
Line 429 ⟶ 716:
println("\nDraft programming tasks:")
qdURI |> getpages |> processtaskpages
</langsyntaxhighlight>{{out}}
<pre>
Programming examples at 2019-02-19T06:33:49.951:
Line 508 ⟶ 795:
Total bare <lang> for language PL/I: 15
Total bare <lang> for language Perl: 6
Total bare <lang> for language Perl 6Raku: 1
Total bare <lang> for language PicoLisp: 4
Total bare <lang> for language PostScript: 13
Line 543 ⟶ 830:
Total bare <lang> for language zkl: 2
Total bare <lang> for language zonnon: 1
Total bare <lang> for language МК??-61/52: 62
 
Draft programming tasks:
Line 558 ⟶ 845:
Total bare <lang> for language Racket: 1
Total bare <lang> for language uBasic/4tH: 1
Total bare <lang> for language МК??-61/52: 2
</pre>
 
=={{header|Kotlin}}==
<syntaxhighlight lang=scala>import java.net.URI
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse
import java.util.regex.Pattern
import java.util.stream.Collectors
 
const val BASE = "http://rosettacode.org"
 
fun main() {
val client = HttpClient.newBuilder().build()
 
val titleUri = URI.create("$BASE/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks")
val titleRequest = HttpRequest.newBuilder(titleUri).GET().build()
 
val titleResponse = client.send(titleRequest, HttpResponse.BodyHandlers.ofString())
if (titleResponse.statusCode() == 200) {
val titleBody = titleResponse.body()
 
val titlePattern = Pattern.compile("\"title\": \"([^\"]+)\"")
val titleMatcher = titlePattern.matcher(titleBody)
val titleList = titleMatcher.results().map { it.group(1) }.collect(Collectors.toList())
 
val headerPattern = Pattern.compile("==\\{\\{header\\|([^}]+)}}==")
val barePredicate = Pattern.compile("<lang>").asPredicate()
 
val countMap = mutableMapOf<String, Int>()
for (title in titleList) {
val pageUri = URI("http", null, "//rosettacode.org/wiki", "action=raw&title=$title", null)
val pageRequest = HttpRequest.newBuilder(pageUri).GET().build()
val pageResponse = client.send(pageRequest, HttpResponse.BodyHandlers.ofString())
if (pageResponse.statusCode() == 200) {
val pageBody = pageResponse.body()
 
//println("Title is $title")
var language = "no language"
for (line in pageBody.lineSequence()) {
val headerMatcher = headerPattern.matcher(line)
if (headerMatcher.matches()) {
language = headerMatcher.group(1)
continue
}
 
if (barePredicate.test(line)) {
countMap[language] = countMap.getOrDefault(language, 0) + 1
}
}
} else {
println("Got a ${titleResponse.statusCode()} status code")
}
}
 
for (entry in countMap.entries) {
println("${entry.value} in ${entry.key}")
}
} else {
println("Got a ${titleResponse.statusCode()} status code")
}
}</syntaxhighlight>
{{out}}
<pre>1 in 4DOS Batch
2 in Caché ObjectScript
3 in EasyLang
1 in PostScript
2 in Scilab
1 in uBasic/4tH
1 in Ursa
1 in Factor
1 in R</pre>
 
=={{header|Maple}}==
<langsyntaxhighlight lang=Maple>#Did not count the tasks where languages tasks are properly closed
add_lan := proc(language, n, existence, languages, pos)
if (assigned(existence[language])) then
Line 629 ⟶ 987:
end do:
printf("Total number %d", total);
</syntaxhighlight>
</lang>
{{Out|Output}}
<pre>15 Puzzle Game is not counted since some language tags are not properly closed.
Line 644 ⟶ 1,002:
There are 3 bare lang tags in PicoLisp
There are 15 bare lang tags in CoffeeScript
There are 29 bare lang tags in МК??-61/52
There are 2 bare lang tags in APL
There are 10 bare lang tags in ERRE
Line 682 ⟶ 1,040:
There are 2 bare lang tags in GFA Basic
There are 1 bare lang tags in Visual Basic .NET
There are 3 bare lang tags in Perl 6Raku
There are 3 bare lang tags in Swift
There are 1 bare lang tags in no languages
Line 738 ⟶ 1,096:
</pre>
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
<langsyntaxhighlight lang="Mathematica">tasks[page_: ""] :=
Module[{res =
Import["http://rosettacode.org/mw/api.php?format=xml&action=\
Line 769 ⟶ 1,127:
Function[task, MemberQ[task[[2]], # -> _Integer?Positive]]]],
"]], [["] <> "]])"] & @@@
Select[SortBy[langCounts, Keys], #[[2]] > 0 &];</langsyntaxhighlight>
This script looks for all of the pages in [[:Category:Programming Tasks]], downloads them, and gets the bare tags per language. Then, it gets the total of all of them, prints it, and sums up the bare tags by language. Note that it doesn't check if the tags are in a block or even closed, so it picks up false positives (especially on this page.)
{{out}}
Line 777 ⟶ 1,135:
1 in 360 Assembly ([[FizzBuzz]])
1 in 4DOS Batch ([[100 doors]])
71 in МК??-61/52 ([[Arithmetic-geometric mean]], [[Arithmetic-geometric mean/Calculate Pi]], [[Arithmetic/Complex]], [[Arithmetic/Integer]], [[Averages/Arithmetic mean]], [[Averages/Root mean square]], [[Balanced ternary]], [[Circles of given radius through two points]], [[Combinations and permutations]], [[Conditional structures]], [[Convert decimal number to rational]], [[Count in octal]], [[Day of the week]], [[Dot product]], [[Empty program]], [[Ethiopian multiplication]], [[Euler method]], [[Evaluate binomial coefficients]], [[Even or odd]], [[Execute a Markov algorithm]], [[Exponentiation operator]], [[Fibonacci sequence]], [[Find limit of recursion]], [[Greatest element of a list]], [[Haversine formula]], [[Higher-order functions]], [[Holidays related to Easter]], [[Horizontal sundial calculations]], [[Horner's rule for polynomial evaluation]], [[Integer comparison]], [[Integer sequence]], [[Jump anywhere]], [[Leap year]], [[Least common multiple]], [[Loops/Break]], [[Loops/Do-while]], [[Loops/Downward for]], [[Loops/For with a specified step]], [[Loops/Infinite]], [[Loops/While]], [[Main step of GOST 28147-89]], [[Middle three digits]], [[Modular inverse]], [[Monte Carlo methods]], [[Multifactorial]], [[Multiplication tables]], [[Nth root]], [[Pick random element]], [[Polynomial regression]], [[Primality by trial division]], [[Program termination]], [[Random numbers]], [[Real constants and functions]], [[Roots of a quadratic function]], [[Roots of unity]], [[Sequence of non-squares]], [[Standard deviation]], [[Sum and product of an array]], [[Sum digits of an integer]], [[Sum multiples of 3 and 5]], [[Sum of squares]], [[Ternary logic]], [[Towers of Hanoi]], [[Vector products]], [[Voronoi diagram]], [[Zero to the zero power]])
2 in 6502 Assembly ([[FizzBuzz]], [[String case]])
2 in 6800 Assembly ([[Hello world/Text]], [[Loops/Infinite]])
Line 855 ⟶ 1,213:
1 in PARI/GP ([[Conjugate transpose]])
18 in Perl ([[Box the compass]], [[Extend your language]], [[History variables]], [[Linear congruential generator]], [[Narcissist]], [[Numeric error propagation]], [[Parse an IP Address]], [[Priority queue]], [[Queue/Usage]], [[Rosetta Code/Find bare lang tags]], [[Set of real numbers]])
13 in Perl 6Raku ([[Concurrent computing]], [[Constrained genericity]], [[Create a two-dimensional array at runtime]], [[Dutch national flag problem]], [[Empty program]], [[Hofstadter-Conway $10,000 sequence]], [[Morse code]], [[Numerical integration]], [[Queue/Usage]], [[Rosetta Code/Find bare lang tags]], [[Write language name in 3D ASCII]])
1 in PHL ([[String concatenation]])
2 in PHP ([[Greatest subsequential sum]], [[Power set]])
Line 884 ⟶ 1,242:
5 in zkl ([[Carmichael 3 strong pseudoprimes]], [[Hamming numbers]], [[Introspection]], [[Lucas-Lehmer test]], [[Non-continuous subsequences]])</pre>
 
=={{header|PerlNim}}==
{{trans|Julia}}
<lang>print "Hello world!\n"</lang></nowiki>
<syntaxhighlight lang=Nim>import algorithm, htmlparser, httpclient, json
</pre>
import sequtils, strformat, strscans, tables, times, xmltree
import strutils except escape
 
const
should display something like
<pre>
2 bare language tags.
 
Rosorg = "http://rosettacode.org"
1 in perl
QUri = "/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=json"
1 in no language
QdUri = "/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Draft_Programming_Tasks&cmlimit=500&format=json"
</pre>
SqUri = "http://www.rosettacode.org/mw/index.php?title="
 
 
proc addPages(pages: var seq[string], fromJson: JsonNode) =
;Extra credit:
for d in fromJson{"query", "categorymembers"}:
Allow multiple files to be read. &nbsp; Summarize all results by language:
pages.add SqUri & d["title"].getStr().replace(" ", "_").escape() & "&action=raw"
<pre>
<nowiki>5 bare language tags.
 
2 in c ([[Foo]], [[Bar]])
1 in perl ([[Foo]])
2 in no language ([[Baz]])</nowiki>
</pre>
 
proc getPages(client: var HttpClient; uri: string): seq[string] =
let response = client.get(Rosorg & uri)
if response.status == $Http200:
var fromJson = response.body.parseJson()
result.addPages(fromJson)
while fromJson.hasKey("continue"):
let cmcont = fromJson{"continue", "cmcontinue"}.getStr()
let cont = fromJson{"continue", "continue"}.getStr()
let response = client.get(Rosorg & uri & fmt"&cmcontinue={cmcont}&continue={cont}")
fromJson = response.body.parseJson()
result.addPages(fromJson)
 
proc processTaskPages(client: var HttpClient; pages: seq[string]; verbose = false) =
;Extra extra credit:
var totalCount = 0
Use the &nbsp; [http://rosettacode.org/mw/api.php Media Wiki API] &nbsp; to test actual RC tasks.
var langCount: CountTable[string]
<br><br> =={{header|Perl}}==
<lang>print "Hello world!\n"</lang>
)
lang := "no language", out := Object(lang, 0), total := 0
Loop Parse, task, `r`n
If RegExMatch(A_LoopField, "==\s*{{\s*header\s*\|\s*([^\s\}]+)\s*}}\s*==", $)
lang := $1, out[lang] := 0
else if InStr(A_LoopField, "<lang>")
out[lang]++
For lang, num in Out
If num
total++, str .= "`n" num " in " lang
MsgBox % clipboard := total " bare lang tags.`n" . str</nowiki></pre>
Output:
<pre>2 bare lang tags.
 
for page in pages:
1 in no language
var count, checked = 0
1 in Perl</pre> =={{header|Perl}}==
try:
<lang>print "Hello world!\n"</lang>
let response = client.get(page)
if response.status == $Http200:
let doc = response.body.parseHtml()
if doc.kind != xnElement: continue
var lastText = ""
for elem in doc:
if elem.kind == xnElement and elem.tag == "lang":
if elem.attrs.isNil:
inc count
if lastText.len != 0:
if verbose:
echo "Missing lang attribute for lang ", lastText
langCount.inc lastText
else:
inc checked
elif elem.kind == xnText:
discard elem.text.scanf("=={{header|$+}}", lastText):
except CatchableError:
if verbose:
echo &"Page {page} is not loaded or found: {getCurrentExceptionMsg()}"
continue
 
if count > 0 and verbose:
echo &"Page {page} had {count} bare lang tags."
inc totalCount, count
 
echo &"Total bare tags: {totalCount}."
Contents of example2.txt: =={{header|Perl}}==
for k in sorted(toSeq(langCount.keys)):
<lang perl>print "Hello again world!\n"</lang>
echo &"Total bare <lang> for language {k}: ({langcount[k]})"
 
 
echo "Programming examples at ", now()
Contents of example3.txt:
var client = newHttpClient()
client.processTaskPages(client.getPages(QUri))
 
echo "\nDraft programming tasks:"
<lang>Some more pseudocode</lang> =={{header|Perl}}==
client.processTaskPages(client.getPages(QdUri))</syntaxhighlight>
<lang>print "Hello world!\n"</lang>
</nowiki></pre>
<pre><nowiki>
example2.wiki
-------------------------------------------------------------
Description
 
{{out}}
<lang>Pseudocode</lang> =={{header|Perl}}==
<pre>Programming examples at 2021-04-01T00:27:04+02:00
<lang>print "Hello world!\n"</lang>
Total bare tags: 792.
<lang Perl>print "Goodbye world!\n"</lang> =={{header|Perl}}==
Total bare <lang> for language 0815: (14)
Total bare <lang> for language 11l: (392)
Total bare <lang> for language 1C: (1)
Total bare <lang> for language 360 Assembly: (199)
Total bare <lang> for language 4D: (6)
Total bare <lang> for language 4DOS Batch: (9)
Total bare <lang> for language 6502 Assembly: (19)
Total bare <lang> for language 6800 Assembly: (1)
Total bare <lang> for language 68000 Assembly: (3)
Total bare <lang> for language 8080 Assembly: (58)
Total bare <lang> for language 8th: (7)
Total bare <lang> for language AArch64 Assembly: (1)
Total bare <lang> for language ALGOL 60: (1)
Total bare <lang> for language ALGOL 68: (1)
Total bare <lang> for language APL: (1)
Total bare <lang> for language ARM Assembly: (6)
Total bare <lang> for language ATS: (2)
Total bare <lang> for language AWK: (1)
Total bare <lang> for language Ada: (1)
Total bare <lang> for language AutoHotkey: (4)
Total bare <lang> for language Axe: (1)
Total bare <lang> for language BASIC256: (1)
Total bare <lang> for language BaCon: (1)
Total bare <lang> for language Befunge: (1)
Total bare <lang> for language Bracmat: (1)
Total bare <lang> for language C sharp: (1)
Total bare <lang> for language ChucK: (2)
Total bare <lang> for language Dart: (1)
Total bare <lang> for language E: (3)
Total bare <lang> for language EasyLang: (1)
Total bare <lang> for language Icon: (1)
Total bare <lang> for language JavaScript: (1)
Total bare <lang> for language Julia: (2)
Total bare <lang> for language REXX: (1)
Total bare <lang> for language Wart: (1)
 
Draft programming tasks:
Total bare tags: 45.
Total bare <lang> for language 11l: (24)
Total bare <lang> for language 1C: (1)
Total bare <lang> for language 360 Assembly: (1)
Total bare <lang> for language 8080 Assembly: (3)
Total bare <lang> for language APL: (1)
Total bare <lang> for language AppleScript: (1)
Total bare <lang> for language C: (1)
Total bare <lang> for language Fermat: (1)
Total bare <lang> for language J: (3)
Total bare <lang> for language Mathematica: (1)</pre>
 
=={{header|Objeck}}==
With extra credit
<syntaxhighlight lang=objeck>use Web.HTTP;
use Query.RegEx;
use Collection.Generic;
 
class Program {
function : Main(args : String[]) ~ Nil {
master_tasks := ProcessTasks(["100_doors", "99_bottles_of_beer", "Filter", "Array_length", "Greatest_common_divisor", "Greatest_element_of_a_list", "Greatest_subsequential_sum"]);
"---"->PrintLine();
PrintTasks(master_tasks);
}
 
function : ProcessTasks(tasks : String[]) ~ MultiMap<String, String> {
master_tasks := MultiMap->New()<String, String>;
 
each(i : tasks) {
task := tasks[i];
"Processing '{$task}'..."->PrintLine();
matches := ProcessTask(task);
langs := matches->GetKeys()<String>;
each(j : langs) {
master_tasks->Insert(langs->Get(j), task);
};
};
 
return master_tasks;
}
 
function : ProcessTask(task : String) ~ Set<String> {
langs := Set->New()<String>;
 
header_regex := RegEx->New("==\\{\\{header\\|(\\w|/|-|_)+\\}\\}==");
lang_regex := RegEx->New("<(\\s)*lang(\\s)*>");
 
url := "http://rosettacode.org/mw/index.php?action=raw&title={$task}";
lines := HttpClient->New()->GetAll(url)->Split("\n");
 
last_header : String;
each(i : lines) {
line := lines[i];
# get header
header := header_regex->FindFirst(line);
if(<>header->IsEmpty()) {
last_header := HeaderName(header);
};
 
# get language
lang := lang_regex->FindFirst(line);
if(lang->Size() > 0) {
if(last_header <> Nil) {
langs->Insert("{$last_header}");
}
else {
langs->Insert("no language");
};
};
};
 
return langs;
}
 
function : HeaderName(lang_str : String) ~ String {
start := lang_str->Find('|');
if(start > -1) {
start += 1;
end := lang_str->Find(start, '}');
return lang_str->SubString(start, end - start);
};
 
return "";
}
 
function : PrintTasks(tasks : MultiMap<String, String>) ~ Nil {
keys := tasks->GetKeys()<String>;
each(i : keys) {
buffer := "";
 
key := keys->Get(i);
values := tasks->Find(key)<String>;
count := values->Size();
buffer += "{$count} in {$key} (";
each(j : values) {
value := values->Get(j);
buffer += "[[{$value}]]";
if(j + 1 < values->Size()) {
buffer += ", ";
};
};
buffer += ")";
 
buffer->PrintLine();
};
}
}
</syntaxhighlight>
{{out}}
<pre>
Processing '100_doors'...
Processing '99_bottles_of_beer'...
Processing 'Filter'...
Processing 'Array_length'...
Processing 'Greatest_common_divisor'...
Processing 'Greatest_element_of_a_list'...
Processing 'Greatest_subsequential_sum'...
---
1 in 11l ([[100_doors]])
1 in Bracmat ([[Greatest_element_of_a_list]])
1 in C1R ([[100_doors]])
3 in ERRE ([[Greatest_common_divisor]], [[Greatest_element_of_a_list]], [[Greatest_subsequential_sum]])
5 in EasyLang ([[100_doors]], [[Filter]], [[Array_length]], [[Greatest_common_divisor]], [[Greatest_element_of_a_list]])
1 in Forth ([[Array_length]])
1 in J ([[Array_length]])
1 in Klingphix ([[99_bottles_of_beer]])
1 in Mathprog ([[Greatest_subsequential_sum]])
1 in MiniScript ([[Greatest_element_of_a_list]])
1 in OCaml ([[99_bottles_of_beer]])
1 in PHP ([[Greatest_subsequential_sum]])
1 in PostScript ([[100_doors]])
2 in Ring ([[Filter]], [[Greatest_common_divisor]])
1 in Scilab ([[100_doors]])
2 in Ursa ([[100_doors]], [[99_bottles_of_beer]])
1 in gnuplot ([[Greatest_common_divisor]])
1 in ooRexx ([[Greatest_element_of_a_list]])
4 in uBasic/4tH ([[100_doors]], [[99_bottles_of_beer]], [[Greatest_common_divisor]], [[Greatest_element_of_a_list]])
</pre>
 
=={{header|Perl}}==
This is a simple implementation that does not attempt either extra credit.
<langsyntaxhighlight lang=perl>my $lang = 'no language';
my $total = 0;
my %blanks = ();
Line 970 ⟶ 1,521:
print "$k in $v\n"
}
}</syntaxhighlight>
}</lang> =={{header|Perl}}==
<lang>print "Hello world!\n"</lang>
"""
 
println(BareLangFinder(scala.io.Source.fromString(test)).summary)
 
// System.setProperty("http.agent", "RosettaCode/1.0")
val tasks = List("Greatest_common_divisor", "Greatest_element_of_a_list", "Greatest_subsequential_sum")
val inputs = for (task <- tasks; url = "http://rosettacode.org/wiki?action=raw&title=" + task)
yield BareLangFinder(scala.io.Source.fromURL(url), task)
val bare = mapReduce(inputs)
println
println(s"${count(bare)} bare language tags in ${tasks.size} tasks:")
println(format(bare) mkString "\n")</pre>
{{out}}
<pre>1 in Perl
1 in no langauge
 
10 bare language tags in 3 tasks:
2 in Mathprog (Greatest_subsequential_sum)
1 in gnuplot (Greatest_common_divisor)
2 in МК-61/52 (Greatest_element_of_a_list)
1 in Bracmat (Greatest_element_of_a_list)
1 in PHP (Greatest_subsequential_sum)
2 in Euler Math Toolbox (Greatest_common_divisor,Greatest_element_of_a_list)
1 in ooRexx (Greatest_element_of_a_list)</pre>
 
=={{header|Phix}}==
Both extra credits. Would probably benefit from excluding &lt;pre&gt;&lt;/pre&gt; sections first.
<!--<lang Phix>(notonline)-->
<lang Phix>-- demo\rosetta\Find_bare_lang_tags.exw
<span style="color: #000080;font-style:italic;">--
--
-- demo\rosetta\Find_bare_lang_tags.exw
-- Finds/counts no of "<lang>" as opposed to eg "<lang Phix>" tags.
-- ====================================
-- Since downloading all the pages can be very slow, this uses a cache.
--
-- (Uses '&' instead of/as well as 'a', for everyone's sanity..)
constant include_drafts = true,
-- Finds/counts no of "&lt;l&ng&gt;" as opposed to eg "&lt;l&ng Phix&gt;" tags.
sort_by_task = false,
-- Since downloading all the pages can be very slow, this uses a cache.
sort_by_lang = not sort_by_task -- (one or t'other)
--</span>
 
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (fairly obviously this will never ever run in a browser!)</span>
integer lp = 0
<span style="color: #008080;">constant</span> <span style="color: #000000;">include_drafts</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">,</span>
procedure progress(string msg, sequence args = {})
<span style="color: #000000;">sort_by_task</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">,</span>
if length(args) then msg = sprintf(msg,args) end if
<span style="color: #000000;">sort_by_lang</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">not</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #000080;font-style:italic;">-- (one or t'other)</span>
integer lm = length(msg)
if lm<lp then msg[$..$] = repeat(' ',lp-lm)&msg[$] end if
<span style="color: #008080;">include</span> <span style="color: #000000;">rosettacode_cache</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> <span style="color: #000080;font-style:italic;">-- see [[Rosetta_Code/Count_examples#Phix]]</span>
puts(1,msg)
lp = iff(msg[$]='\r'?lm:0)
<span style="color: #008080;">constant</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">utf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ansi</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({{</span><span style="color: #008000;">x"E28093"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">},</span>
end procedure
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"E28099"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"'"</span><span style="color: #0000FF;">},</span>
 
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"C3A8"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">},</span>
include builtins\timedate.e
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"C3A9"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">},</span>
integer refresh_cache = timedelta(days:=365) -- 0 for always
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"D09A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"K"</span><span style="color: #0000FF;">},</span>
--integer refresh_cache = timedelta(days:=1) -- 0 for always
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"D09C"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"M"</span><span style="color: #0000FF;">}})</span>
 
include builtins\libcurl.e
<span style="color: #008080;">function</span> <span style="color: #000000;">utf8_clean</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
atom curl = NULL
<span style="color: #008080;">return</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #000000;">utf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ansi</span><span style="color: #0000FF;">)</span>
atom pErrorBuffer
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
 
function write_callback(atom pData, integer size, integer nmemb, integer fn)
<span style="color: #008080;">function</span> <span style="color: #000000;">multi_lang</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
integer bytes_written = size * nmemb
<span style="color: #000080;font-style:italic;">-- Convert eg {"Algol","Algol","C","C","C"} to "Algol[2],C[3]"</span>
puts(fn,peek({pData,bytes_written}))
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">2</span>
return bytes_written
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
end function
<span style="color: #008080;">if</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
constant write_cb = call_back({'+', routine_id("write_callback")})
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">+=</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
 
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s[%d]"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})}</span>
function open_download(string filename, url)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
bool refetch = true
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
if get_file_type("rc_cache")!=FILETYPE_DIRECTORY then
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
if not create_directory("rc_cache") then
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
crash("cannot create rc_cache directory")
<span style="color: #008080;">return</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">","</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end if
filename = join_path({"rc_cache",filename})
<span style="color: #008080;">function</span> <span style="color: #000000;">multi_task</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span>
if file_exists(filename) then
<span style="color: #000080;font-style:italic;">-- Similar to multi_lang() but with task[indexes]</span>
-- use existing file if <= refresh_cache (365 days) old
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">2</span>
sequence last_mod = get_file_date(filename) -- (0.8.1+)
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
atom delta = timedate_diff(last_mod,date())
<span style="color: #004080;">integer</span> <span style="color: #000000;">si</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
refetch = (delta>refresh_cache) or get_file_size(filename)=0
<span style="color: #004080;">string</span> <span style="color: #000000;">tsi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">si</span><span style="color: #0000FF;">])</span>
else
<span style="color: #008080;">if</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">si</span><span style="color: #0000FF;">=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
string directory = get_file_path(filename)
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">si</span><span style="color: #0000FF;">=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">+=</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if get_file_type(directory)!=FILETYPE_DIRECTORY then
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s[%d]"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">tsi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})}</span>
if not create_directory(directory,make_parent:=true) then
<span style="color: #008080;">else</span>
crash("cannot create %s directory",{directory})
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tsi</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end if
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
object text
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if not refetch then
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)></span><span style="color: #000000;">8</span> <span style="color: #008080;">then</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">4</span><span style="color: #0000FF;">..-</span><span style="color: #000000;">4</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"..."</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
text = trim(get_text(filename))
<span style="color: #008080;">return</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">","</span><span style="color: #0000FF;">)</span>
refetch = (not sequence(text)) or (length(text)<10)
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end if
if refetch then
<span style="color: #004080;">bool</span> <span style="color: #000000;">first</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
progress("Downloading %s...\r",{filename})
if curl=NULL then
<span style="color: #008080;">function</span> <span style="color: #000000;">find_bare_lang_tags</span><span style="color: #0000FF;">()</span>
curl_global_init()
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_file_type</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)!=</span><span style="color: #004600;">FILETYPE_DIRECTORY</span> <span style="color: #008080;">then</span>
curl = curl_easy_init()
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">create_directory</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
pErrorBuffer = allocate(CURL_ERROR_SIZE)
<span style="color: #7060A8;">crash</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"cannot create rc_cache directory"</span><span style="color: #0000FF;">)</span>
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #000080;font-style:italic;">-- note this lot use web scraping (as cribbed from a similar task) ...</span>
url = substitute(url,"%3A",":")
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Programming_Tasks"</span><span style="color: #0000FF;">))</span>
url = substitute(url,"%2A","*")
<span style="color: #008080;">if</span> <span style="color: #000000;">include_drafts</span> <span style="color: #008080;">then</span>
curl_easy_setopt(curl, CURLOPT_URL, url)
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Draft_Programming_Tasks"</span><span style="color: #0000FF;">))</span>
integer fn = open(filename,"wb")
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span>
if fn=-1 then ?9/0 end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn)
<span style="color: #004080;">integer</span> <span style="color: #000000;">blt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Rosetta_Code/Find_bare_lang_tags"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- not this one!</span>
while true do
<span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">blt</span><span style="color: #0000FF;">..</span><span style="color: #000000;">blt</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
CURLcode res = curl_easy_perform(curl)
if res=CURLE_OK then exit end if
<span style="color: #000080;font-style:italic;">-- ... whereas the individual tasks use the web api instead (3x smaller/faster)</span>
string error = sprintf("%d",res)
<span style="color: #004080;">integer</span> <span style="color: #000000;">total_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
if res=CURLE_COULDNT_RESOLVE_HOST then
<span style="color: #000000;">lt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">),</span>
error &= " [CURLE_COULDNT_RESOLVE_HOST]"
<span style="color: #000000;">kept</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
end if
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks found\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">})</span>
progress("Error %s downloading file, retry?(Y/N):",{error})
<span style="color: #004080;">sequence</span> <span style="color: #000000;">task_langs</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
if lower(wait_key())!='y' then abort(0) end if
<span style="color: #000000;">task_counts</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sort_by_task</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">):{}),</span>
printf(1,"Y\n")
<span style="color: #000000;">task_things</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sort_by_task</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">({},</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">):{})</span>
end while
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
close(fn)
<span style="color: #004080;">string</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span>
refresh_cache += timedelta(days:=1) -- did I mention it is slow?
<span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"http://rosettacode.org/mw/index.php?title=%s&action=raw"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">}),</span>
text = get_text(filename)
<span style="color: #000000;">contents</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">&</span><span style="color: #008000;">".raw"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">url</span><span style="color: #0000FF;">),</span>
end if
<span style="color: #000000;">curr</span>
return text
<span style="color: #004080;">integer</span> <span style="color: #000000;">count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">header</span>
end function
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
 
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`&lt;l`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ang&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>
function open_category(string filename)
<span style="color: #008080;">if</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename)
<span style="color: #000080;font-style:italic;">-- look backward for the nearest header</span>
end function
<span style="color: #000000;">header</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">rmatch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`{`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`{he`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ader|`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>
 
<span style="color: #008080;">if</span> <span style="color: #000000;">header</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
function dewiki(string s)
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"no language"</span>
-- extract tasks from eg `<li><a href="/wiki/100_doors"`
<span style="color: #008080;">else</span>
sequence tasks = {}
<span style="color: #000000;">header</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`{`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`{he`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ader|`</span><span style="color: #0000FF;">)</span>
integer start = 1, finish = match(`<div class="printfooter">`,s)
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf8_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">[</span><span style="color: #000000;">header</span><span style="color: #0000FF;">..</span><span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<nowiki>}}</nowiki>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">header</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
s = s[1..finish-1]
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
while true do
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_lang</span> <span style="color: #008080;">then</span>
start = match("<li><a href=\"/wiki/",s,start)
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">)</span>
if start=0 then exit end if
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
start += length("<li><a href=\"/wiki/")
<span style="color: #000000;">task_langs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">,</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">)</span>
finish = find('"',s,start)
<span style="color: #000000;">task_things</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">})</span>
string task = s[start..finish-1]
<span style="color: #000000;">task_counts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
task = substitute_all(task,{"*",":"},{"%2A","%3A"})
<span style="color: #008080;">else</span>
if task!="Rosetta_Code/Find_bare_lang_tags" then -- not this one!
<span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">],</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
tasks = append(tasks,task)
<span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
-- if length(tasks)>10 then exit end if -- (debug aid)
<span style="color: #008080;">else</span>
start = finish+1
<span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">)</span>
end while
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return tasks
<span style="color: #000000;">count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end function
<span style="color: #000000;">start</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`&lt;l`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ang&gt;`</span><span style="color: #0000FF;">)</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
constant {html,ascii} = columnize({{"%2A","*"},
<span style="color: #008080;">if</span> <span style="color: #000000;">count</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
{"%3A",":"},
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #008080;">then</span>
{"%27","'"},
<span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">count</span>
{"%2B","+"},
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{"%22","\""},
<span style="color: #000000;">kept</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
{"%E2%80%93","-"},
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{"%E2%80%99","'"},
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks kept, %d to go\r"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">kept</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">})</span>
{"%C3%A8","e"},
<span style="color: #000000;">total_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">count</span>
{"%C3%A9","e"}})
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_key</span><span style="color: #0000FF;">()=</span><span style="color: #000000;">#1B</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"escape keyed\n"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
function html_clean(string s)
<span style="color: #000000;">curl_cleanup</span><span style="color: #0000FF;">()</span>
return substitute_all(s,html,ascii)
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks with bare lang tags\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">kept</span><span style="color: #0000FF;">})</span>
end function
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">)))</span>
 
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
constant {utf8,ansi} = columnize({{x"E28093","-"},
<span style="color: #004080;">integer</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span>
{x"E28099","'"},
<span style="color: #000000;">tc</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]</span>
{x"C3A8","e"},
<span style="color: #008080;">if</span> <span style="color: #000000;">tc</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{x"C3A9","e"},
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #008080;">then</span>
{x"D09A","K"},
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s %d (%s)\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]),</span><span style="color: #000000;">tc</span><span style="color: #0000FF;">,</span><span style="color: #000000;">multi_lang</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">])})</span>
{x"D09C","M"}})
<span style="color: #008080;">else</span> <span style="color: #000080;font-style:italic;">-- (sort_by_count)</span>
 
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s %d (%s)\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tc</span><span style="color: #0000FF;">,</span><span style="color: #000000;">multi_task</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)})</span>
function utf8_clean(string s)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return substitute_all(s,utf8,ansi)
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">total_count</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
function multi_lang(sequence s)
-- Convert eg {"Algol","Algol","C","C","C"} to "Algol[2],C[3]"
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Total: %d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">find_bare_lang_tags</span><span style="color: #0000FF;">()})</span>
integer i = 1, j = 2
while i<length(s) do
<span style="color: #0000FF;">?</span><span style="color: #008000;">"done"</span>
if s[i]=s[j] then
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
while j<length(s) and s[i]=s[j+1] do j+=1 end while
<!--</lang>-->
s[i..j] = {sprintf("%s[%d]",{s[i],j-i+1})}
end if
i += 1
j = i+1
end while
return join(s,",")
end function
 
function multi_task(sequence s, tasks)
-- Similar to multi_lang() but with task[indexes]
integer i = 1, j = 2
while i<=length(s) do
integer si = s[i]
string tsi = html_clean(tasks[si])
if j<=length(s) and si=s[j] then
while j<length(s) and si=s[j+1] do j+=1 end while
s[i..j] = {sprintf("%s[%d]",{tsi,j-i+1})}
else
s[i] = tsi
end if
i += 1
j = i+1
end while
if length(s)>8 then s[4..-4] = {"..."} end if
return join(s,",")
end function
 
function find_bare_lang_tags()
-- note this lot use web scraping (as cribbed from a similar task) ...
sequence tasks = dewiki(open_category("Programming_Tasks"))
if include_drafts then
tasks &= dewiki(open_category("Draft_Programming_Tasks"))
tasks = sort(tasks)
end if
-- ... whereas the individual tasks use the web api instead (3x smaller/faster)
integer total_count = 0,
lt = length(tasks),
kept = 0
progress("%d tasks found\n",{lt})
sequence task_langs = {},
task_counts = iff(sort_by_task?repeat(0,lt):{}),
task_things = iff(sort_by_task?repeat({},lt):{})
for i=1 to length(tasks) do
string ti = tasks[i],
url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=raw",{ti}),
contents = open_download(ti&".raw",url),
this
integer count = 0, start = 1, header
while true do
start = match(`<lang>`,contents,start)
if start=0 then exit end if
-- look backward for the nearest header
header = rmatch(`{{header|`,contents,start)
if header=0 then
-- this = ""
this = "no language"
else
header += length(`{{header|`)
this = utf8_clean(contents[header..match(`}}`,contents,header)-1])
end if
if sort_by_lang then
integer k = find(this,task_langs)
if k=0 then
task_langs = append(task_langs,this)
task_things = append(task_things,{i})
task_counts = append(task_counts,1)
else
task_things[k] = append(task_things[k],i)
task_counts[k] += 1
end if
else
task_things[i] = append(task_things[i],this)
end if
count += 1
start += length(`<lang>`)
end while
if count!=0 then
if sort_by_task then
task_counts[i] = count
end if
kept += 1
end if
progress("%d tasks kept, %d to go\r",{kept,lt-i})
total_count += count
if get_key()=#1B then progress("escape keyed\n") exit end if
end for
if curl!=NULL then
curl_easy_cleanup(curl)
free(pErrorBuffer)
curl = NULL
pErrorBuffer = NULL
end if
progress("%d tasks with bare lang tags\n",{kept})
sequence tags = custom_sort(task_counts,tagset(length(task_counts)))
for i=length(tags) to 1 by -1 do
integer ti = tags[i],
tc = task_counts[ti]
if tc=0 then exit end if
--if tc>5 then
if sort_by_task then
progress("%s %d (%s)\n",{html_clean(tasks[ti]),tc,multi_lang(task_things[ti])})
else -- (sort_by_count)
progress("%s %d (%s)\n",{task_langs[ti],tc,multi_task(task_things[ti],tasks)})
end if
--end if
end for
return total_count
end function
 
progress("Total: %d\n",{find_bare_lang_tags()})</lang>
{{out}}
as of 26/7/19, sort_by_task:
Line 1,284 ⟶ 1,701:
4DOS Batch 1 (100_doors)
Total: 1094
</pre>
 
=={{header|Python}}==
<syntaxhighlight lang=python>
"""Count bare `lang` tags in wiki markup. Requires Python >=3.6.
 
Uses the Python standard library `urllib` to make MediaWiki API requests.
"""
 
from __future__ import annotations
 
import functools
import gzip
import json
import logging
import platform
import re
 
from collections import Counter
from collections import defaultdict
 
from typing import Any
from typing import Iterator
from typing import Iterable
from typing import List
from typing import Mapping
from typing import NamedTuple
from typing import Optional
from typing import Tuple
 
from urllib.parse import urlencode
from urllib.parse import urlunparse
from urllib.parse import quote_plus
 
import urllib.error
import urllib.request
 
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
 
 
# Parse wiki markup with these regular expression patterns. Any headings and
# `lang` tags found inside `nowiki`, `pre` or other `lang` tags (bare or not)
# should not count as "bare".
#
# NOTE: The order of these patterns is significant.
RE_SPEC = [
("NOWIKI", r"<\s*nowiki\s*>.*?</\s*nowiki\s*>"),
("PRE", r"<\s*pre\s*>.*?</\s*pre\s*>"),
("LANG", r"<\s*lang\s+.+?>.*?</\s*lang\s*>"),
("HEAD", r"==\{\{\s*header\s*\|\s*(?P<header>.+?)\s*}}=="),
("BARE", r"<\s*lang\s*>.*?</\s*lang\s*>"),
]
 
RE_BARE_LANG = re.compile(
"|".join(rf"(?P<{name}>{pattern})" for name, pattern in RE_SPEC),
re.DOTALL | re.IGNORECASE,
)
 
# Some wiki headings look like this "=={{header|Some}} / {{header|Other}}==".
# We'll use this regular expression to strip out the markup.
RE_MULTI_HEADER = re.compile(r"(}|(\{\{\s*header\s*\|\s*))", re.IGNORECASE)
 
 
def find_bare_lang_section_headers(wiki_text: str) -> Iterator[str]:
"""Generate a sequence of wiki section headings that contain bare
'lang' tags.
 
If there are multiple bare lang tags in a section, that section
heading will appear multiple times in the sequence.
"""
current_heading = "no language"
 
for match in RE_BARE_LANG.finditer(wiki_text):
kind = match.lastgroup
 
if kind == "HEAD":
current_heading = RE_MULTI_HEADER.sub("", match.group("header"))
elif kind == "BARE":
yield current_heading
 
 
class Error(Exception):
"""Exception raised when we get an unexpected response from the MediaWiki API."""
 
 
class TagCounter:
"""Count bare `lang` tags in wiki markup. Group them by heading and
remember what page they're in."""
 
def __init__(self):
self.counter = Counter()
self.pages = defaultdict(set)
self.total = 0
 
def __len__(self):
return len(self.counter)
 
@classmethod
def from_section_headers(
cls, page_title: str, section_headers: Iterable[str]
) -> TagCounter:
"""Return a new `TagCounter` initialized with the given section
headings."""
counter = cls()
 
for heading in section_headers:
counter.add(page_title, heading)
 
return counter
 
@classmethod
def from_wiki_text(cls, page_title: str, wiki_text: str) -> TagCounter:
"""Return a new `TagCounter` initialized with bare lang tags from the
given wiki text."""
return cls.from_section_headers(
page_title,
find_bare_lang_section_headers(wiki_text),
)
 
def add(self, page_title: str, section_heading: str):
"""Increment the counter by one for the given section heading an
page."""
self.counter[section_heading] += 1
self.pages[section_heading].add(page_title)
self.total += 1
 
def update(self, other):
"""Union this counter with `other`, another counter."""
assert isinstance(other, TagCounter)
 
self.counter.update(other.counter)
 
for section_heading, pages in other.pages.items():
self.pages[section_heading].update(pages)
 
self.total += other.total
 
def most_common(self, n=None) -> str:
"""Return a formatted string of the most common wiki sections to have
bare lang tags."""
buf = [f"{sum(self.counter.values())} bare lang tags.\n"]
 
for section_heading, count in self.counter.most_common(n=n):
pages = list(self.pages[section_heading])
buf.append(f"{count} in {section_heading} {pages}")
 
return "\n".join(buf)
 
 
def quote_underscore(string, safe="", encoding=None, errors=None):
"""Like urllib.parse.quote but replaces spaces with underscores."""
string = quote_plus(string, safe, encoding, errors)
return string.replace("+", "_")
 
 
class URL(NamedTuple):
"""A `urllib.parse.urlunparse` compatible Tuple with some helper methods.
We'll use this to build and pass around our MediaWiki API URLs.
"""
 
scheme: str
netloc: str
path: str
params: str
query: str
fragment: str
 
def __str__(self):
return urlunparse(self)
 
def with_query(self, query: Mapping[str, Any]) -> URL:
query_string = urlencode(query, safe=":", quote_via=quote_underscore)
return self._replace(query=query_string)
 
 
API_BASE_URL = URL(
scheme="http",
netloc="rosettacode.org",
path="/mw/api.php",
params="",
query="",
fragment="",
)
 
UGLY_RAW_URL = URL(
scheme="http",
netloc="rosettacode.org",
path="/mw/index.php",
params="",
query="",
fragment="",
)
 
# NOTE: Cloudflare was blocking requests with the default user agent.
DEFAULT_HEADERS = {
"User-agent": f"python/{platform.python_version()}",
"Accept-encoding": "gzip, deflate",
"Accept": "*/*",
"Connection": "keep-alive",
}
 
 
class Response(NamedTuple):
headers: Mapping[str, str]
body: bytes
 
 
def get(url: URL, headers=DEFAULT_HEADERS) -> Response:
"""Make an HTTP GET request to the given URL."""
logger.debug(f"GET {url}")
request = urllib.request.Request(str(url), headers=headers)
 
try:
with urllib.request.urlopen(request) as response:
return Response(
headers=dict(response.getheaders()),
body=response.read(),
)
except urllib.error.HTTPError as e:
logging.debug(e.code)
logging.debug(gzip.decompress(e.read()))
raise
 
 
def raise_for_header(headers: Mapping[str, str], header: str, expect: str):
got = headers.get(header)
if got != expect:
raise Error(f"expected '{expect}', got '{got}'")
 
 
raise_for_content_type = functools.partial(raise_for_header, header="Content-Type")
 
 
class CMContinue(NamedTuple):
continue_: str
cmcontinue: str
 
 
Pages = Tuple[List[str], Optional[CMContinue]]
 
 
def get_wiki_page_titles(chunk_size: int = 500, continue_: CMContinue = None) -> Pages:
"""Return a list of wiki page titles and any continuation information."""
query = {
"action": "query",
"list": "categorymembers",
"cmtitle": "Category:Programming_Tasks",
"cmlimit": chunk_size,
"format": "json",
"continue": "",
}
 
if continue_:
query["continue"] = continue_.continue_
query["cmcontinue"] = continue_.cmcontinue
 
response = get(API_BASE_URL.with_query(query))
 
# Fail early if the response is not what we are expecting.
raise_for_content_type(response.headers, expect="application/json; charset=utf-8")
raise_for_header(response.headers, "Content-Encoding", "gzip")
 
data = json.loads(gzip.decompress(response.body))
page_titles = [p["title"] for p in data["query"]["categorymembers"]]
 
if data.get("continue", {}).get("cmcontinue"):
_continue = CMContinue(
data["continue"]["continue"],
data["continue"]["cmcontinue"],
)
else:
_continue = None
 
return (page_titles, _continue)
 
 
def get_wiki_page_markup(page_title: str) -> str:
"""Return raw MediaWiki markup from the page `page_title`."""
query = {"action": "raw", "title": page_title}
response = get(UGLY_RAW_URL.with_query(query))
 
# Fail early if the response is not what we are expecting.
raise_for_content_type(response.headers, expect="text/x-wiki; charset=UTF-8")
 
return response.body.decode()
 
 
def example(limit=30):
# Get the first chunk of wiki page titles from the MediaWiki API
page_titles, continue_ = get_wiki_page_titles()
 
# Get more chunks if there are any.
while continue_ is not None:
more_page_titles, continue_ = get_wiki_page_titles(continue_=continue_)
page_titles.extend(more_page_titles)
 
# Aggregate counts from all pages.
counter = TagCounter()
 
for i, page_title in enumerate(page_titles):
if i > limit:
break
 
# Read and parse raw wiki page markup.
wiki_text = get_wiki_page_markup(page_title)
counts = TagCounter.from_wiki_text(page_title, wiki_text)
counter.update(counts)
 
# Dump the results to stdout.
print(counter.most_common())
 
 
if __name__ == "__main__":
logging.basicConfig(format="%(asctime)s %(message)s", level=logging.DEBUG)
example()
</syntaxhighlight>
 
{{out}}
Limited to the first 30 wiki pages.
<pre>
44 bare lang tags.
 
5 in EasyLang ['15 Puzzle Game', '100 doors', 'A+B', 'Ackermann function', '21 Game']
4 in Scilab ['15 Puzzle Game', '100 doors', 'Ackermann function', 'AKS test for primes']
4 in uBasic/4tH ['AKS test for primes', '100 doors', 'Abundant, deficient and perfect number classifications', '99 Bottles of Beer']
3 in Ursa ['100 doors', 'A+B', '99 Bottles of Beer']
2 in Caché ObjectScript ['100 doors']
2 in Klingphix ['Ackermann function', '99 Bottles of Beer']
2 in M2000 Interpreter ['A+B', 'Abstract type']
2 in PicoLisp ['AKS test for primes', 'ABC Problem']
2 in ERRE ['Address of a variable']
1 in 4DOS Batch ['100 doors']
1 in PostScript ['100 doors']
1 in Factor ['2048']
1 in R ['21 Game']
1 in OCaml ['99 Bottles of Beer']
1 in Excel ['A+B']
1 in Java ['A+B']
1 in Maxima ['A+B']
1 in Mercury ['A+B']
1 in J ['Abbreviations, automatic']
1 in Python ['Abelian sandpile model']
1 in GFA Basic ['Abundant, deficient and perfect number classifications']
1 in ??-61/52 ['Ackermann function']
1 in Nim ['Active object']
1 in Go ['Address of a variable']
1 in Smalltalk ['Address of a variable']
1 in COBOL ['Align columns']
1 in CoffeeScript ['Align columns']
</pre>
 
Line 1,289 ⟶ 2,056:
Note that this follows the task, but the output is completely bogus since the actual <tt>&lt;lang&gt;</tt> tags that it finds are in <tt>&lt;pre&gt;</tt> and in code...
 
<langsyntaxhighlight lang=racket>
#lang racket
 
Line 1,318 ⟶ 2,085:
 
(find-bare-tags "Rosetta Code/Find bare lang tags")
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 1,330 ⟶ 2,097:
===More-extra credit===
Add the following code at the bottom, run, watch results.
<langsyntaxhighlight lang=racket>
(define (get-category cat)
(let loop ([c #f])
Line 1,347 ⟶ 2,114:
(printf "Page: ~a " page)
(find-bare-tags page))
</syntaxhighlight>
</lang>
 
=={{header|Raku}}==
(formerly Perl 6)
{{trans|Perl}}
The only tricky thing here is the use of the <tt>ms</tt> form of match, short for <tt>m:sigspace</tt>. This causes whitespace in the regex to be considered "significant", that is, it matches optional whitespace at those positions, as if you'd put <tt>\s*</tt> there. Of course, the regexes themselves are in Raku syntax, which is quite different from Perl  5 regex syntax (and arguably much cleaner). Regex syntax is perhaps the area in which Raku diverges most from Perl  5.
<syntaxhighlight lang="raku" perl6line>my $lang = '(no language)';
my $total = 0;
my %blanks;
Line 1,368 ⟶ 2,135:
 
say "$total bare language tag{ 's' if $total != 1 }\n";
say .value, ' in ', .key for %blanks.sort;</langsyntaxhighlight>
{{out}}
<pre>2 bare language tags
Line 1,374 ⟶ 2,141:
1 in (no language)
1 in perl</pre>
 
=={{header|REXX}}==
<syntaxhighlight lang=rexx>/*REXX pgm finds and displays bare language (<lang>) tags without a language specified. */
parse arg iFID . /*obtain optional argument from the CL.*/
if iFID=='' | iFID="," then iFID= 'BARELANG.HTM' /*Not specified? Then assume default*/
call lineout iFID /*close the file, just in case its open*/
call linein ifid,1,0 /*point to the first record. */
noLa= 0; bare= 0; header=; heads= /*initialize various REXX variables. */
!.= 0 /*sparse array to hold language headers*/
do recs=0 while lines(iFID)\==0 /*read all lines in the input file. */
$= linein(iFID) /*read a line (record) from the input. */
$= space($) /*elide superfluous blanks from record.*/
if $=='' then iterate /*if a blank line, then skip any tests.*/
call testHead /*process possible ==((header|aaa}}== */
call testLang /* " " <lang aaa> or <lang>*/
end /*recs*/
 
call lineout iFID /*close the file, just in case its open*/
say recs ' records read from file: ' iFID; say /*show number of records read from file*/
if bare==0 then bare= 'no'; say right(bare, 9) " bare language tags."; say
 
do #=1 for words(head); _= word(head, #) /*maybe show <lang> for language aaa */
if !._\==0 then say right(!._, 9) ' in' _ /*show the count for a particular lang.*/
end /*#*/
 
if noLa==0 then noLa= 'no'; say right(noLa, 9) " in no specified language."
exit 0
/*--------------------------------------------------------------------------------------*/
testHead: @head= '=={{header|'; @foot= "}}==" /*define two literals. */
hh= pos(@head, $ ); if hh==0 then return /*get start of literal.*/
or= hh + length(@head) - 1 /*get position of | */
hb= pos(@foot, $, or); if hb==0 then return /*get position of foot.*/
head= substr($, or+1, hb-or-1) /*get the language name*/
if head\=='' then header= head /*Header? Then use it.*/
if wordpos(head, heads)==0 then heads= heads head /*Is lang? Add--? list*/
return
/*--------------------------------------------------------------------------------------*/
testLang: @lang= '<lang'; @back= ">" /*define two literals. */
s1= pos(@lang, $ ); if s1==0 then return /*get start of literal.*/
gt= pos(@back, $, s1+1) /*get position of < */
lang= strip( substr($, gt-2, gt-length(@lang) -1 ) ) /*get the language name*/
if lang=='' then bare= bare + 1 /*No lang? Bump bares.*/
else @lang= lang /*Is lang? Set lang. */
if @lang\=='' & header=='' then noLa= noLa + 1 /*bump noLang counter.*/
if @lang\=='' & header\=='' then !.head= !.head + 1 /*bump a lang " */
return</syntaxhighlight>
{{out|output|text=&nbsp; when using the default input:}}
<pre>
9 records read from file: BARELANG.HTM
 
2 bare language tags.
 
1 in Perl
1 in no specified language.
</pre>
 
=={{header|Ruby}}==
Quoting from the FAQ: "If you just want the raw wikitext without any other information whatsoever, it's best to use index.php's action=raw mode instead of the API"
<langsyntaxhighlight lang=Ruby>require "open-uri"
require "cgi"
 
Line 1,399 ⟶ 2,221:
 
puts "\n#{result.values.map(&:count).inject(&:+)} bare language tags.\n\n"
result.each{|k,v| puts "#{v.count} in #{k} (#{v.tasks})"}</langsyntaxhighlight>
{{Output}}
<pre>
Line 1,411 ⟶ 2,233:
1 in gnuplot (["Greatest_common_divisor"])
1 in Bracmat (["Greatest_element_of_a_list"])
2 in МК??-61/52 (["Greatest_element_of_a_list", "Greatest_element_of_a_list"])
1 in ooRexx (["Greatest_element_of_a_list"])
2 in Mathprog (["Greatest_subsequential_sum", "Greatest_subsequential_sum"])
1 in PHP (["Greatest_subsequential_sum"])
</pre>
 
=={{header|Rust}}==
<syntaxhighlight lang=rust>
extern crate regex;
 
use std::io;
use std::io::prelude::*;
 
use regex::Regex;
 
fn find_bare_lang_tags(input: &str) -> Vec<(Option<String>, i32)> {
let mut language_pairs = vec![];
let mut language = None;
let mut counter = 0_i32;
 
let header_re = Regex::new(r"==\{\{header\|(?P<lang>[[:alpha:]]+)\}\}==").unwrap();
 
for line in input.lines() {
if let Some(captures) = header_re.captures(line) {
if let Some(header_lang) = captures.name("lang") {
language_pairs.push((language, counter));
language = Some(header_lang.as_str().to_owned());
counter = 0;
}
}
 
if line.contains("<lang>") {
counter += 1;
}
}
 
language_pairs.push((language, counter));
language_pairs
}
 
fn main() {
let stdin = io::stdin();
let mut buf = String::new();
stdin.lock().read_to_string(&mut buf).unwrap();
let results = find_bare_lang_tags(&buf);
let total_bare = results.iter().map(|r| r.1).sum::<i32>();
 
println!("{} bare language tags.\n", total_bare);
for result in &results {
let num_bare = result.1;
 
if num_bare > 0 {
println!(
"{} in {}",
result.1,
result
.0
.to_owned()
.unwrap_or_else(|| String::from("no language"))
);
}
}
}
</syntaxhighlight>
{{out}}
<pre>
</pre>
 
=={{header|Scala}}==
To analyse RosettaCode pages, invoke Java with <code>-Dhttp.agent=Anything</code> to work around CloudFlare blocking Java from accessing the RosettaCode site.
<langsyntaxhighlight lang=Scala>// Map lines to a list of Option(heading -> task) for each bare lang tag found.
val headerFormat = "==[{]+header[|]([^}]*)[}]+==".r
val langFormat = "<lang([^>]*)>".r
Line 1,451 ⟶ 2,335:
def summary = format(mapReduce) mkString "\n"
}
def mapReduce(inputs: Seq[BareLangFinder]) = reduced(inputs.flatMap(_.map))</langsyntaxhighlight>
'''Examples:'''
<pre>val test = """
Line 1,457 ⟶ 2,341:
 
<lang>Pseudocode</lang>
 
=={{header|C}}==
<lang C>printf("Hello world!\n");</lang>
 
=={{header|Perl}}==
<lang>print "Hello world!\n"</lang>
"""
 
println(BareLangFinder(scala.io.Source.fromString(test)).summary)
 
// System.setProperty("http.agent", "RosettaCode/1.0")
val tasks = List("Greatest_common_divisor", "Greatest_element_of_a_list", "Greatest_subsequential_sum")
val inputs = for (task <- tasks; url = "http://rosettacode.org/wiki?action=raw&title=" + task)
yield BareLangFinder(scala.io.Source.fromURL(url), task)
val bare = mapReduce(inputs)
println
println(s"${count(bare)} bare language tags in ${tasks.size} tasks:")
println(format(bare) mkString "\n")</pre>
{{out}}
<pre>1 in Perl
1 in no langauge
 
10 bare language tags in 3 tasks:
2 in Mathprog (Greatest_subsequential_sum)
1 in gnuplot (Greatest_common_divisor)
2 in ??-61/52 (Greatest_element_of_a_list)
1 in Bracmat (Greatest_element_of_a_list)
1 in PHP (Greatest_subsequential_sum)
2 in Euler Math Toolbox (Greatest_common_divisor,Greatest_element_of_a_list)
1 in ooRexx (Greatest_element_of_a_list)</pre>
 
=={{header|Tcl}}==
Line 1,463 ⟶ 2,377:
{{tcllib|textutil::split}}
{{tcllib|uri}}
<langsyntaxhighlight lang=tcl>package require Tcl 8.5
package require http
package require json
Line 1,567 ⟶ 2,481:
findBareTags $task [getTaskContent $task]
}
printResults</langsyntaxhighlight>
 
=={{header|Wren}}==
{{libheader|Wren-ioutil}}
{{libheader|Wren-pattern}}
{{libheader|Wren-set}}
{{libheader|Wren-sort}}
{{libheader|Wren-fmt}}
Uses Go's example files.
<syntaxhighlight lang=wren>import "./ioutil" for FileUtil
import "./pattern" for Pattern
import "./set" for Set
import "./sort" for Sort
import "./fmt" for Fmt
 
var p = Pattern.new("/=/={{header/|[+0/y]}}/=/=", Pattern.start)
var bareCount = 0
var bareLang = {}
for (fileName in ["example.txt", "example2.txt", "example3.txt"]) {
var lines = FileUtil.readLines(fileName)
var lastHeader = "No language"
for (line in lines) {
line = line.trimStart()
if (line == "") continue
var m = p.find(line)
if (m) {
lastHeader = m.capsText[0]
continue
}
if (line.startsWith("<lang>")) {
bareCount = bareCount + 1
var value = bareLang[lastHeader]
if (value) {
value[0] = value[0] + 1
value[1].add(fileName)
} else {
bareLang[lastHeader] = [1, Set.new([fileName])]
}
}
}
}
System.print("%(bareCount) bare language tags:")
for (me in bareLang) {
var lang = me.key
var count = me.value[0]
var names = me.value[1].toList
Sort.insertion(names)
Fmt.print(" $2d in $-11s $n", count, lang, names)
}</syntaxhighlight>
 
{{out}}
<pre>
5 bare language tags:
2 in C [example2.txt, example3.txt]
1 in Perl [example.txt]
2 in No language [example.txt, example3.txt]
</pre>
 
=={{header|zkl}}==
{{trans|Ruby}}
Uses shared library cURL.
<langsyntaxhighlight lang=zkl>var [const] CURL=Import("zklCurl"),
partURI="http://rosettacode.org/wiki?action=raw&title=%s",
langRE=RegExp(0'!\s*==\s*{{\s*header\s*\|(.+)}}!), // == {{ header | zkl }}
emptyRE=RegExp(0'!<lang\s*>!);
 
Line 1,599 ⟶ 2,569:
tasks:=results[lang];
println("%d in %s: %s".fmt(tasks.len(),lang,tasks.concat(",")));
}</langsyntaxhighlight>
{{out}}
<pre>
Line 1,616 ⟶ 2,586:
1 in ooRexx: Greatest_element_of_a_list
2 in uBasic/4tH: Greatest_common_divisor,Greatest_element_of_a_list
2 in МК??-61/52: Greatest_element_of_a_list,Greatest_element_of_a_list
</pre>
 
9,476

edits