Rosetta Code/Fix code tags: Difference between revisions
m (→{{header|Tcl}}: alternate method) |
|||
Line 88: | Line 88: | ||
slang = '/lang' |
slang = '/lang' |
||
code='code' |
|||
text = sys.stdin.read() |
text = sys.stdin.read() |
||
Line 95: | Line 96: | ||
text = text.replace("</%s>" % i, "<%s>" % slang) |
text = text.replace("</%s>" % i, "<%s>" % slang) |
||
text = re.sub("(?s)< |
text = re.sub("(?s)<%s (.+?)>(.*?)</%s>"%(code,code), r"<lang \1>\2<%s>" % slang, text) |
||
sys.stdout.write(text) |
sys.stdout.write(text) |
Revision as of 19:12, 11 December 2009
You are encouraged to solve this task according to the task description, using any language you may know.
Fix Rosetta Code deprecated code tags, with these rules:
Change <%s> to <lang %s> Change </%s> to </lang> Change <code %s> to <lang %s> Change </code> to </lang>
Usage:
cat wikisource.txt | ./convert.py > converted.txt
AutoHotkey
<lang AutoHotkey>
- usage
- > fixtags.ahk input.txt ouput.txt
FileRead, text, %1% langs = ada,awk,autohotkey,etc slang = /lang slang := "<" . slang . "/>" Loop, Parse, langs, `, {
tag1 = <%A_LoopField%>
tag2 = </%A_LoopField%>
text := RegExReplace(text, tag1, "<lang " . A_LoopField . ">")
text := RegExReplace(text, tag2, slang)
text := RegExReplace(text, "(.*?)
"
, "<lang $1>$2" . slang)
} FileAppend, % text, %2% </lang>
Perl
<lang perl> my @langs = qw(ada cpp-qt pascal lscript z80 visualprolog html4strict cil objc asm progress teraterm hq9plus genero tsql email pic16 tcl apt_sources io apache vhdl avisynth winbatch vbnet ini scilab ocaml-brief sas actionscript3 qbasic perl bnf cobol powershell php kixtart visualfoxpro mirc make javascript cpp sdlbasic cadlisp php-brief rails verilog xml csharp actionscript nsis bash typoscript freebasic dot applescript haskell dos oracle8 cfdg glsl lotusscript mpasm latex sql klonec ruby ocaml smarty python oracle11 caddcl robots groovy smalltalk diff fortran cfm lua modula3 vb autoit java text scala lotusformulas pixelbender reg _div whitespace providex asp css lolcode lisp inno mysql plsql matlab oobas vim delphi xorg_conf gml prolog bf per scheme mxml d basic4gl m68k gnuplot idl abap intercal c_mac thinbasic java5 xpp boo klonecpp blitzbasic eiffel povray c gettext);
my $text = join "", <STDIN>; my $slang="/lang"; for (@langs) {
$text =~ s|<$_>|<lang $_>|g; $text =~ s|</$_>|<$slang>|g;
}
$text =~ s|(.*?)
|<lang $1>$2<$slang>|sg;
print $text; </lang>
Python
<lang python>
- coding: utf-8
import sys import re
langs = ['ada', 'cpp-qt', 'pascal', 'lscript', 'z80', 'visualprolog', 'html4strict', 'cil', 'objc', 'asm', 'progress', 'teraterm', 'hq9plus', 'genero', 'tsql', 'email', 'pic16', 'tcl', 'apt_sources', 'io', 'apache', 'vhdl', 'avisynth', 'winbatch', 'vbnet', 'ini', 'scilab', 'ocaml-brief', 'sas', 'actionscript3', 'qbasic', 'perl', 'bnf', 'cobol', 'powershell', 'php', 'kixtart', 'visualfoxpro', 'mirc', 'make', 'javascript', 'cpp', 'sdlbasic', 'cadlisp', 'php-brief', 'rails', 'verilog', 'xml', 'csharp', 'actionscript', 'nsis', 'bash', 'typoscript', 'freebasic', 'dot', 'applescript', 'haskell', 'dos', 'oracle8', 'cfdg', 'glsl', 'lotusscript', 'mpasm', 'latex', 'sql', 'klonec', 'ruby', 'ocaml', 'smarty', 'python', 'oracle11', 'caddcl', 'robots', 'groovy', 'smalltalk', 'diff', 'fortran', 'cfm', 'lua', 'modula3', 'vb', 'autoit', 'java', 'text', 'scala', 'lotusformulas', 'pixelbender', 'reg', '_div', 'whitespace', 'providex', 'asp', 'css', 'lolcode', 'lisp', 'inno', 'mysql', 'plsql', 'matlab', 'oobas', 'vim', 'delphi', 'xorg_conf', 'gml', 'prolog', 'bf', 'per', 'scheme', 'mxml', 'd', 'basic4gl', 'm68k', 'gnuplot', 'idl', 'abap', 'intercal', 'c_mac', 'thinbasic', 'java5', 'xpp', 'boo', 'klonecpp', 'blitzbasic', 'eiffel', 'povray', 'c', 'gettext']
slang = '/lang' code='code'
text = sys.stdin.read()
for i in langs:
text = text.replace("<%s>" % i,"<lang %s>" % i) text = text.replace("</%s>" % i, "<%s>" % slang)
text = re.sub("(?s)<%s (.+?)>(.*?)</%s>"%(code,code), r"<lang \1>\2<%s>" % slang, text)
sys.stdout.write(text) </lang>
R
Note that the instances of ##### are to stop the wiki getting confused. Please remove them before running the code. <lang R> fixtags <- function(page) {
langs <- c("c", "c-sharp", "r") # a complete list is required, obviously langs <- paste(langs, collapse="|") page <- gsub(paste("<(", langs, ")>", sep=""), "<lang \\1>", page) page <- gsub(paste("</(", langs, ")>", sep=""), "</#####lang>", page) page <- gsub(paste("<code(", langs, ")>", sep=""), "<lang \\1>", page) page <- gsub(paste("", sep=""), "</#####lang>", page) page
}
page <- "lorem ipsum <c>some c code</c>dolor sit amet,<c-sharp>some c-sharp code</c-sharp>
consectetur adipisicing elit,some r code
sed do eiusmod tempor incididunt"
fixtags(page)
</lang>
Ruby
<lang ruby># get all stdin in one string
- text = $stdin.read
- for testing, use
text = DATA.read slash_lang = '/lang' langs = %w(foo bar baz) # actual list of languages declared here for lang in langs
text.gsub!(Regexp.new("<(#{lang})>")) {"<lang #$1>"} text.gsub!(Regexp.new("</#{lang}>"), "<#{slash_lang}>")
end
text.gsub!(//, '<lang \1>')
text.gsub!(/<\/code>/, "<#{slash_lang}>")
print text
__END__
Lorem ipsum
saepe audire
elaboraret ne quo, id equidem
atomorum inciderint usu. <foo>In sit inermis deleniti percipit</foo>,
ius ex tale civibus omittam. <barf>Vix ut doctus cetero invenire</barf>, his eu
altera electram. Tota adhuc altera te sea, soluta appetere ut mel</bar>.
Quo quis graecis vivendo te, <baz>posse nullam lobortis ex usu
. Eam volumus perpetua
constituto id, mea an omittam fierent vituperatoribus. </lang>
Lorem ipsum <lang foo>saepe audire</lang> elaboraret ne quo, id equidem
atomorum inciderint usu. <lang foo>In sit inermis deleniti percipit</lang>,
ius ex tale civibus omittam. <barf>Vix ut doctus cetero invenire</barf>, his eu
altera electram. Tota adhuc altera te sea, <lang bar>soluta appetere ut mel</lang>.
Quo quis graecis vivendo te, <lang baz>posse nullam lobortis ex usu</lang>. Eam volumus perpetua
constituto id, mea an omittam fierent vituperatoribus.
Tcl
<lang tcl>set langs {
ada cpp-qt pascal lscript z80 visualprolog html4strict cil objc asm progress teraterm
hq9plus genero tsql email pic16 tcl apt_sources io apache vhdl avisynth winbatch vbnet
ini scilab ocaml-brief sas actionscript3 qbasic perl bnf cobol powershell php kixtart
visualfoxpro mirc make javascript cpp sdlbasic cadlisp php-brief rails verilog xml
csharp actionscript nsis bash typoscript freebasic dot applescript haskell dos oracle8
cfdg glsl lotusscript mpasm latex sql klonec ruby ocaml smarty python oracle11 caddcl
robots groovy smalltalk diff fortran cfm lua modula3 vb autoit java text scala lotusformulas
pixelbender reg _div whitespace providex asp css lolcode lisp inno mysql plsql matlab
oobas vim delphi xorg_conf gml prolog bf per scheme mxml d basic4gl m68k gnuplot idl
abap intercal c_mac thinbasic java5 xpp boo klonecpp blitzbasic eiffel povray c gettext
}
set text [read stdin]
set slang /lang
foreach lang $langs {
set text [regsub -all "<$lang>" $text "<lang $lang>"]
set text [regsub -all "</$lang>" $text "<$slang>"]
}
set text [regsub -all "(.+?)
" $text "<lang \\1>\\2<$slang>"]</lang>
Alternatively, for foreach loop may be replaced with:
<lang tcl>set text [regexp -all "<([join $langs |])>" $text {<lang \1>}]
set text [regexp -all "</(?:[join $langs |])>" $text "<$slang>"]</lang>
This task does not require regular expressions at all:
<lang tcl>set replacements [list
<$slang>]
foreach lang $langs {
lappend replacements "<$lang>" "<lang $lang>"
lappend replacements "</$lang>" "<$slang>"
lappend replacements "" "<lang $lang>"
}
set text [string map $replacements $text]</lang>