Web scraping: Difference between revisions
m (Retro: omit) |
m (→{{header|Ada}}: Reformatting) |
||
Line 6: | Line 6: | ||
=={{header|Ada}}== |
=={{header|Ada}}== |
||
{{libheader|AWS}} |
{{libheader|AWS}} |
||
<lang Ada>with AWS.Client |
<lang Ada>with AWS.Client, AWS.Response, AWS.Resources, AWS.Messages; |
||
with AWS.Response; use AWS.Response; |
|||
with AWS.Resources; use AWS.Resources; |
|||
with AWS.Messages; use AWS.Messages; |
|||
with Ada.Text_IO, Ada.Strings.Fixed; |
with Ada.Text_IO, Ada.Strings.Fixed; |
||
use Ada, AWS; |
use Ada, AWS, AWS.Resources, AWS.Messages; |
||
procedure Get_UTC_Time is |
procedure Get_UTC_Time is |
||
Page : |
Page : Response.Data; |
||
File : |
File : Resources.File_Type; |
||
Buffer : String (1 .. 1024); |
Buffer : String (1 .. 1024); |
||
Position, Last : Natural := 0; |
Position, Last : Natural := 0; |
||
S : Messages.Status_Code; |
S : Messages.Status_Code; |
||
begin |
begin |
||
Page := |
Page := Client.Get ("http://tycho.usno.navy.mil/cgi-bin/timer.pl"); |
||
S := Response.Status_Code (Page); |
S := Response.Status_Code (Page); |
||
if S not in Success then |
if S not in Success then |
||
Text_IO.Put_Line |
Text_IO.Put_Line |
||
("Unable to retrieve data => Status Code :" & |
("Unable to retrieve data => Status Code :" & Image (S) & |
||
" Reason :" & Reason_Phrase (S)); |
|||
" Reason :" & |
|||
Reason_Phrase (S)); |
|||
return; |
return; |
||
end if; |
end if; |
||
Response.Message_Body (Page, File); |
|||
while not End_Of_File (File) loop |
while not End_Of_File (File) loop |
||
Resources.Get_Line (File, Buffer, Last); |
|||
Position := |
Position := |
||
Strings.Fixed.Index |
Strings.Fixed.Index |
||
Line 44: | Line 39: | ||
end if; |
end if; |
||
end loop; |
end loop; |
||
end Get_UTC_Time; |
end Get_UTC_Time;</lang> |
||
</lang> |
|||
=={{header|AutoHotkey}}== |
=={{header|AutoHotkey}}== |
Revision as of 18:05, 4 April 2011
You are encouraged to solve this task according to the task description, using any language you may know.
Create a program that downloads the time from this URL: http://tycho.usno.navy.mil/cgi-bin/timer.pl and then prints the current UTC time by extracting just the UTC time from the web page's HTML.
If possible, only use libraries that come at no extra monetary cost with the programming language and that are widely available and popular such as CPAN for Perl or Boost for C++.
Ada
<lang Ada>with AWS.Client, AWS.Response, AWS.Resources, AWS.Messages; with Ada.Text_IO, Ada.Strings.Fixed; use Ada, AWS, AWS.Resources, AWS.Messages;
procedure Get_UTC_Time is
Page : Response.Data; File : Resources.File_Type; Buffer : String (1 .. 1024); Position, Last : Natural := 0; S : Messages.Status_Code;
begin
Page := Client.Get ("http://tycho.usno.navy.mil/cgi-bin/timer.pl"); S := Response.Status_Code (Page); if S not in Success then Text_IO.Put_Line ("Unable to retrieve data => Status Code :" & Image (S) & " Reason :" & Reason_Phrase (S)); return; end if;
Response.Message_Body (Page, File); while not End_Of_File (File) loop Resources.Get_Line (File, Buffer, Last); Position := Strings.Fixed.Index (Source => Buffer (Buffer'First .. Last), Pattern => "UTC"); if Position > 0 then Text_IO.Put_Line (Buffer (5 .. Position + 2)); return; end if; end loop;
end Get_UTC_Time;</lang>
AutoHotkey
<lang AutoHotkey>UrlDownloadToFile, http://tycho.usno.navy.mil/cgi-bin/timer.pl, time.html FileRead, timefile, time.html pos := InStr(timefile, "UTC") msgbox % time := SubStr(timefile, pos - 9, 8)</lang>
AWK
This is inspired by GETURL example in the manual for gawk.
<lang awk>#! /usr/bin/awk -f
BEGIN {
purl = "/inet/tcp/0/tycho.usno.navy.mil/80" ORS = RS = "\r\n\r\n" print "GET /cgi-bin/timer.pl HTTP/1.0" |& purl purl |& getline header while ( (purl |& getline ) > 0 ) { split($0, a, "\n") for(i=1; i <= length(a); i++) { if ( a[i] ~ /UTC/ ) { sub(/^
/, "", a[i]) printf "%s\n", a[i] } } } close(purl)
}</lang>
ALGOL 68
<lang algol68>STRING
domain="tycho.usno.navy.mil", page="cgi-bin/timer.pl";
STRING # search for the needle in the haystack #
needle = "UTC", hay stack = "http://"+domain+"/"+page,
re success="^HTTP/[0-9.]* 200", re result description="^HTTP/[0-9.]* [0-9]+ [a-zA-Z ]*", re doctype ="\s\s<![Dd][Oo][Cc][Tt][Yy][Pp][Ee] [^>]+>\s+";
PROC raise error = (STRING msg)VOID: ( put(stand error, (msg, new line)); stop);
PROC is html page = (REF STRING page) BOOL: (
BOOL out=grep in string(re success, page, NIL, NIL) = 0; IF INT start, end; grep in string(re result description, page, start, end) = 0 THEN page:=page[end+1:]; IF grep in string(re doctype, page, start, end) = 0 THEN page:=page[start+2:] ELSE raise error("unknown format retrieving page") FI ELSE raise error("unknown error retrieving page") FI; out
);
STRING reply; INT rc = http content (reply, domain, haystack, 0); IF rc = 0 AND is html page (reply) THEN
STRING line; FILE freply; associate(freply, reply); on logical file end(freply, (REF FILE freply)BOOL: (done; SKIP)); DO get(freply,(line, new line)); IF string in string(needle, NIL, line) THEN print((line, new line)) FI OD; done: SKIP
ELSE raise error (strerror (rc)) FI</lang> Sample output:
<BR>Sep. 26, 21:51:17 UTC Universal Time
C
There's no any proper error handling.
<lang c>#include <stdio.h>
- include <string.h>
- include <curl/curl.h>
- include <sys/types.h>
- include <regex.h>
- define BUFSIZE 16384
size_t lr = 0;
size_t filterit(void *ptr, size_t size, size_t nmemb, void *stream) {
if ( (lr + size*nmemb) > BUFSIZE ) return BUFSIZE; memcpy(stream+lr, ptr, size*nmemb); lr += size*nmemb; return size*nmemb;
}
int main() {
CURL *curlHandle; char buffer[BUFSIZE]; regmatch_t amatch; regex_t cregex;
curlHandle = curl_easy_init(); curl_easy_setopt(curlHandle, CURLOPT_URL, "http://tycho.usno.navy.mil/cgi-bin/timer.pl"); curl_easy_setopt(curlHandle, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, filterit); curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, buffer); int success = curl_easy_perform(curlHandle); curl_easy_cleanup(curlHandle);
buffer[lr] = 0; regcomp(&cregex, " UTC", REG_NEWLINE); regexec(&cregex, buffer, 1, &amatch, 0); int bi = amatch.rm_so; while ( bi-- > 0 ) if ( memcmp(&buffer[bi], "
", 4) == 0 ) break;
buffer[amatch.rm_eo] = 0;
printf("%s\n", &buffer[bi+4]);
regfree(&cregex); return 0;
}</lang>
C++
Library: Boost
to be compiled under Linux with g++ -lboost_regex -lboost_system -lboost_thread
<lang cpp>#include <iostream>
- include <istream>
- include <ostream>
- include <string>
- include <boost/asio.hpp>
- include <boost/regex.hpp>
- include <sstream>
int main( ) {
try { boost::asio::io_service io_service ; boost::asio::ip::tcp::resolver resolver ( io_service ) ; //we now try to get a list of endpoints to the server boost::asio::ip::tcp::resolver::query query( "tycho.usno.navy.mil" ,
"http" ) ;
boost::asio::ip::tcp::resolver::iterator endpoint_iterator =
resolver.resolve( query ) ;
boost::asio::ip::tcp::resolver::iterator end ; //looking for a successful endpoint connection boost::asio::ip::tcp::socket socket ( io_service ) ; boost::system::error_code error =
boost::asio::error::host_not_found ;
while ( error && endpoint_iterator != end ) {
socket.close( ) ; socket.connect ( *endpoint_iterator++ , error ) ;
} if ( error )
throw boost::system::system_error ( error ) ;
boost::asio::streambuf request ; //we now write the request std::ostream request_stream( &request ) ; request_stream << "GET /cgi-bin/timer.pl HTTP/1.0\r\n" ; request_stream << "Host: " << "tycho.usno.navy.mil" << "\r\n" ; request_stream << "Accept: */*\r\n" ; request_stream << "Connection: close\r\n\r\n" ; boost::asio::write ( socket , request ) ; //we write the request boost::asio::streambuf response ; boost::asio::read_until( socket , response , "\r\n\r\n" ) ; std::ostringstream line ; //reading to end , disregarding possible error messages boost::regex e ( "
(.+\\s+UTC)" ) ; while ( boost::asio::read( socket , response,
boost::asio::transfer_at_least( 1 ) , error )) { line << &response ; boost::smatch matches ; if ( boost::regex_search( line.str( ) , matches, e ) ) std::cout << matches[ 1 ] << std::endl ; line.str( "" ) ;
} if ( error != boost::asio::error::eof )
throw boost::system::system_error( error ) ;
} catch ( std::exception & ex ) {
std::cout << "Exception: " << ex.what( ) << "\n" ;
} return 0 ;
}</lang>
C#
<lang csharp>class Program
{ static void Main(string[] args) { WebClient wc = new WebClient(); Stream myStream = wc.OpenRead("http://tycho.usno.navy.mil/cgi-bin/timer.pl"); string html = ""; using (StreamReader sr = new StreamReader(myStream)) { while (sr.Peek() >= 0) { html = sr.ReadLine(); if (html.Contains("UTC")) { break; } } } Console.WriteLine(html.Remove(0, 4));
Console.ReadLine(); } }
</lang>
Clojure
Clojure 1.2:
<lang clojure> (second (re-find #" (\d{1,2}:\d{1,2}:\d{1,2}) UTC" (slurp "http://tycho.usno.navy.mil/cgi-bin/timer.pl"))) </lang>
Common Lisp
<lang lisp>BOA> (let* ((url "http://tycho.usno.navy.mil/cgi-bin/timer.pl")
(regexp (load-time-value (cl-ppcre:create-scanner "(?m)^.{4}(.+? UTC)"))) (data (drakma:http-request url))) (multiple-value-bind (start end start-regs end-regs) (cl-ppcre:scan regexp data) (declare (ignore end)) (when start (subseq data (aref start-regs 0) (aref end-regs 0)))))
"Aug. 12, 04:29:51 UTC"</lang>
D
<lang d>import std.stdio, std.string, std.socket;
string loadPage(string url, string host) {
InternetAddress addr = new InternetAddress(url, 80); TcpSocket sock = new TcpSocket(addr);
sock.send("GET " ~ host ~ r" HTTP/1.0\r\n"); sock.send("Host: " ~ host ~ r"\r\n"); sock.send("Connection: close\r\n"); sock.send("\r\n");
char[1024] buffer; string msg; uint buflen = 1024; while (buflen == 1024) { buflen = sock.receive(buffer); msg ~= buffer[0 .. buflen]; }
sock.shutdown(SocketShutdown.BOTH); sock.close(); return msg;
}
void main() {
string page = loadPage(r"tycho.usno.navy.mil", r"/cgi-bin/timer.pl"); foreach (line; page.splitlines()) if (line.length > 4 && indexOf(line, " UTC") != -1) writeln(line[4 .. $]);
}</lang>
E
<lang e>interp.waitAtTop(when (def html := <http://tycho.usno.navy.mil/cgi-bin/timer.pl>.getText()) -> {
def rx`(?s).*>(@time.*? UTC).*` := html println(time)
})</lang>
Erlang
Using regular expressions:
<lang erlang>-module(scraping).
-export([main/0]).
-define(Url, "http://tycho.usno.navy.mil/cgi-bin/timer.pl").
-define(Match, "
(.+ UTC)").
main() -> inets:start(), {ok, {_Status, _Header, HTML}} = http:request(?Url), {match, [Time]} = re:run(HTML, ?Match, [{capture, all_but_first, binary}]), io:format("~s~n",[Time]).</lang>
F#
This code is asynchronous - it will not block any threads while it waits on a response from the remote server. <lang fsharp> open System open System.Net open System.Text.RegularExpressions
async {
use wc = new WebClient() let! html = wc.AsyncDownloadString(Uri("http://tycho.usno.navy.mil/cgi-bin/timer.pl")) return Regex.Match(html, @"
(.+ UTC)").Groups.[1].Value
} |> Async.RunSynchronously |> printfn "%s" </lang>
Factor
<lang factor>USING: http.get io sequences ;
"http://tycho.usno.navy.mil/cgi-bin/timer.pl" http-get nip [ "UTC" swap start [ 9 - ] [ 1 - ] bi ] keep subseq print</lang>
Forth
<lang forth>include unix/socket.fs
- extract-time ( addr len type len -- time len )
dup >r search 0= abort" that time not present!" dup >r begin -1 /string over 1- c@ [char] > = until \ seek back to
at start of line r> - r> + ;
s" tycho.usno.navy.mil" 80 open-socket dup s\" GET /cgi-bin/timer.pl HTTP/1.0\n\n" rot write-socket dup pad 4096 read-socket s\" \r\n\r\n" search 0= abort" can't find headers!" \ skip headers s" UTC" extract-time type cr close-socket</lang>
Haskell
Using package HTTP-4000.0.8 from HackgageDB <lang Haskell>import Data.List import Network.HTTP (simpleHTTP, getResponseBody, getRequest)
tyd = "http://tycho.usno.navy.mil/cgi-bin/timer.pl"
readUTC = simpleHTTP (getRequest tyd)>>=
fmap ((!!2).head.dropWhile ("UTC"`notElem`).map words.lines). getResponseBody>>=putStrLn</lang>
Usage in GHCi: <lang Haskell>*Main> readUTC 08:30:23</lang>
Icon and Unicon
Icon
Icon has capability to read web pages using the external function cfunc. The Unicon messaging extensions are more succinct.
Unicon
<lang Unicon>procedure main() m := open(url := "http://tycho.usno.navy.mil/cgi-bin/timer.pl","m") | stop("Unable to open ",url) every (p := "") ||:= |read(m) # read the page into a single string close(m)
map(p) ? ( tab(find("
")), ="
", write("UTC time=",p[&pos:find(" utc")])) # scrape and show
end</lang>
J
<lang j> require 'web/gethttp'
_8{. ' UTC' taketo gethttp 'http://tycho.usno.navy.mil/cgi-bin/timer.pl'
04:32:44</lang>
The web/gethttp
addon uses Wget on Linux or Windows (J ships with Wget on Windows) and cURL on the Mac.
(A sockets solution is also possible. But, while basic HTTP support is trivial to implement, a full standards compliant implementation and can involve a lot of code to deal with rare corner cases, and the time required to complete a web request is often significantly longer than the time to invoke an external program. This would imply a fair bit of maintenance and testing overhead to deal with issues which rarely matter, if a direct sockets implementation were used.)
Java
<lang java>import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection;
public class WebTime{
public static void main(String[] args){
try{
URL address = new URL(
"http://tycho.usno.navy.mil/cgi-bin/timer.pl");
URLConnection conn = address.openConnection();
BufferedReader in = new BufferedReader(
new InputStreamReader(conn.getInputStream()));
String line;
while(!(line = in.readLine()).contains("UTC"));
System.out.println(line.substring(4));
}catch(IOException e){
System.err.println("error connecting to server.");
e.printStackTrace();
}
}
}</lang>
Liberty BASIC
<lang lb>if DownloadToFile("http://tycho.usno.navy.mil/cgi-bin/timer.pl", DefaultDir$ + "\timer.htm") = 0 then
open DefaultDir$ + "\timer.htm" for input as #f html$ = lower$(input$(#f, LOF(#f))) close #f
a= instr( html$, "utc" )-1 print "UTC";mid$( html$, a-9,9)
end if
end
function DownloadToFile(urlfile$, localfile$)
open "URLmon" for dll as #url calldll #url, "URLDownloadToFileA",_ 0 as long,_ 'null urlfile$ as ptr,_ 'url to download localfile$ as ptr,_ 'save file name 0 as long,_ 'reserved, must be 0 0 as long,_ 'callback address, can be 0 DownloadToFile as ulong '0=success close #url
end function</lang>
Mathematica
<lang mathematica> test = StringSplit[Import["http://tycho.usno.navy.mil/cgi-bin/timer.pl"], "\n"]; Extract[test, Flatten@Position[StringFreeQ[test, "UTC"], False]] </lang>
Objeck
<lang objeck> use Net; use IO; use Structure;
bundle Default {
class Scrape { function : Main(args : String[]) ~ Nil { client := HttpClient->New("http://tycho.usno.navy.mil/cgi-bin/timer.pl", 80); lines := client->Get(); i := 0; found := false; while(found <> true & i < lines->Size()) { line := lines->Get(i)->As(String); index := line->Find("UTC"); if(index > -1) { time := line->SubString(index - 9, 9)->Trim(); time->PrintLine(); found := true; }; i += 1; }; } }
} </lang>
OCaml
<lang ocaml>let () =
let _,_, page_content = make_request ~url:Sys.argv.(1) ~kind:GET () in
let lines = Str.split (Str.regexp "\n") page_content in let str = List.find (fun line -> try ignore(Str.search_forward (Str.regexp "UTC") line 0); true with Not_found -> false) lines in let str = Str.global_replace (Str.regexp "
") "" str in print_endline str;
- </lang>
There are libraries for this, but it's rather interesting to see how to use a socket to achieve this, so see the implementation of the above function make_request on this page.
Oz
<lang oz>declare
[Regex] = {Module.link ['x-oz://contrib/regex']}
fun {GetPage Url} F = {New Open.file init(url:Url)} Contents = {F read(list:$ size:all)} in {F close} Contents end
fun {GetDateString Doc} case {Regex.search "
([A-Za-z0-9:., ]+ UTC)" Doc} of match(1:S#E ...) then {List.take {List.drop Doc S} E-S+1} end end
Url = "http://tycho.usno.navy.mil/cgi-bin/timer.pl"
in
{System.showInfo {GetDateString {GetPage Url}}}</lang>
Perl
<lang perl>use LWP::Simple;
my $url = 'http://tycho.usno.navy.mil/cgi-bin/timer.pl';
get($url) =~ /
(.+? UTC)/
and print "$1\n";</lang>
Perl 6
<lang perl6>use HTTP::Client; # http://github.com/carlins/http-client/
my $site = "http://tycho.usno.navy.mil/cgi-bin/timer.pl";
HTTP::Client.new.get($site).match(/'
'( .+? <ws> UTC )/)[0].say</lang>
Note that the string between '<' and '>' refers to regex tokens, so to match a literal '<BR>' you need to quote it, while <ws> refers to the built-in token whitespace. Also, whitespace is ignored by default in Perl 6 regexes.
PHP
By iterating over each line:
<lang PHP><?
$contents = file('http://tycho.usno.navy.mil/cgi-bin/timer.pl'); foreach ($contents as $line){
if (($pos = strpos($line, ' UTC')) === false) continue; echo subStr($line, 4, $pos - 4); //Prints something like "Dec. 06, 16:18:03" break;
}</lang>
By regular expressions (
):
<lang PHP><?
echo preg_replace(
"/^.*
(.*) UTC.*$/su",
"\\1",
file_get_contents('http://tycho.usno.navy.mil/cgi-bin/timer.pl')
);
</lang>
PicoLisp
<lang PicoLisp>(load "@lib/http.l")
(client "tycho.usno.navy.mil" 80 "cgi-bin/timer.pl"
(when (from "
") (pack (trim (till "U"))) ) )</lang>
Output:
-> "Feb. 19, 18:11:37"
PowerShell
<lang powershell>$wc = New-Object Net.WebClient $html = $wc.DownloadString('http://tycho.usno.navy.mil/cgi-bin/timer.pl') $html -match ', (.*) UTC' | Out-Null Write-Host $Matches[1]</lang>
Protium
English dialect, short form, using integrated Rexx pattern matcher:
<lang html><@ DEFAREPRS>Rexx Parse</@>
<@ DEFPRSLIT>Rexx Parse|'
' UTCtime 'UTC'</@>
<@ LETVARURL>timer|http://tycho.usno.navy.mil/cgi-bin/timer.pl</@>
<@ ACTRPNPRSVAR>Rexx Parse|timer</@>
<@ SAYVAR>UTCtime</@></lang>
English dialect, padded variable-length form:
<lang html><# DEFINE WORKAREA PARSEVALUES>Rexx Parse</#>
<# DEFINE PARSEVALUES LITERAL>Rexx Parse|'
' UTCtime 'UTC'</#>
<# LET VARIABLE URLSOURCE>timer|http://tycho.usno.navy.mil/cgi-bin/timer.pl</#>
<# ACT REPLACEBYPATTERN PARSEVALUES VARIABLE>Rexx Parse|timer</#>
<# SAY VARIABLE>UTCtime</#></lang>
PureBasic
<lang Purebasic>URLDownloadToFile_( #Null, "http://tycho.usno.navy.mil/cgi-bin/timer.pl", "timer.htm", 0, #Null) ReadFile(0, "timer.htm") While Not Eof(0) : Text$ + ReadString(0) : Wend MessageRequester("Time", Mid(Text$, FindString(Text$, "UTC", 1) - 9 , 8))</lang>
Python
<lang python>import urllib page = urllib.urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl') for line in page:
if ' UTC' in line: print line.strip()[4:] break
page.close()</lang> Sample output:
Aug. 12, 15:22:08 UTC Universal Time
R
First, retrieve the web page. See HTTP_Request for more options with this. <lang R>library(RCurl) webpage <- getURL("http://tycho.usno.navy.mil/cgi-bin/timer.pl")</lang> Now parse the html code into a tree and retrieve the interesting bit <lang R>library(XML) pagetree <- htmlTreeParse(webpage ) timesnode <- pagetree$children$html$children$body$children$h3$children$pre$children timesnode <- timesnode[names(timesnode)=="text"]</lang> Finally, find the line with universal time and parse it <lang R>timestrings <- sapply(timesnode, function(x) x$value) index <- grep("Universal Time", timestrings) utctimestr <- strsplit(timestrings[index], "\t")$text[1] utctime <- strptime(utctimestr, "%b. %d, %H:%M:%S UTC")
- Print the date in any format you desire.
strftime(utctime, "%A, %d %B %Y, %H:%M:%S")</lang>
Monday, 03 August 2009, 16:15:37
Solution with base R.
<lang R>line = grep("UTC", readLines("http://tycho.usno.navy.mil/cgi-bin/timer.pl"), value = TRUE) sub(".*(\\d{2}:\\d{2}:\\d{2}).*", "\\1", line)</lang>
REBOL
<lang REBOL>REBOL [ Title: "Web Scraping" Author: oofoe Date: 2009-12-07 URL: http://rosettacode.org/wiki/Web_Scraping ]
- Notice that REBOL understands unquoted URL's
service: http://tycho.usno.navy.mil/cgi-bin/timer.pl
- The 'read' function can read from any data scheme that REBOL knows
- about, which includes web URLs. NOTE
- Depending on your security
- settings, REBOL may ask you for permission to contact the service.
html: read service
- I parse the HTML to find the first
(note the unquoted HTML tag - -- REBOL understands those too), then copy the current time from
- there to the "UTC" terminator.
- I have the "to end" in the parse rule so the parse will succeed.
- Not strictly necessary once I've got the time, but good practice.
parse html [thru
copy current thru "UTC" to end]
print ["Current UTC time:" current]</lang>
Ruby
A verbose example for comparison
<lang ruby>require "open-uri"
open('http://tycho.usno.navy.mil/cgi-bin/timer.pl') do |p|
p.each_line do |line| if line =~ /UTC/ puts line.match(/ (\d{1,2}:\d{1,2}:\d{1,2}) /) break end end
end </lang>
A more concise example
<lang ruby>require 'open-uri' puts URI.parse('http://tycho.usno.navy.mil/cgi-bin/timer.pl').read.match(/ (\d{1,2}:\d{1,2}:\d{1,2}) UTC/)[1] </lang>
Scala
<lang scala> import scala.io.Source
object WebTime extends Application { val text = Source.fromURL("http://tycho.usno.navy.mil/cgi-bin/timer.pl") val utc = text.getLines.find(_.contains("UTC")) utc match { case Some(s) => println(s.substring(4)) case _ => println("error") } } </lang>
Tcl
<lang tcl>package require http
set request [http::geturl "http://tycho.usno.navy.mil/cgi-bin/timer.pl"]
if {[regexp -line {
(.* UTC)} [http::data $request] --> utc]} {
puts $utc
}</lang>
TUSCRIPT
<lang tuscript> $$ MODE TUSCRIPT SET time = REQUEST ("http://tycho.usno.navy.mil/cgi-bin/timer.pl") SET utc = FILTER (time,":*UTC*:",-) </lang>
UNIX Shell
This solution uses curl, which can be downloaded for free, and the popular (at least in the unix world) utilities programs grep and sed.
<lang bash>#!/bin/sh curl -s http://tycho.usno.navy.mil/cgi-bin/timer.pl |
grep ' UTC' | sed -e 's/^
//;s/ UTC.*$//'</lang>
Ursala
This works by launching the wget command in a separate process and capturing its output. The program is compiled to an executable command. <lang Ursala>#import std
- import cli
- executable ('parameterized',)
whatime =
<.file$[contents: --<>]>+ -+
@hm skip/*4+ ~=(9%cOi&)-~l*+ *~ ~&K3/'UTC', (ask bash)/0+ -[wget -O - http://tycho.usno.navy.mil/cgi-bin/timer.pl]-!+-</lang>
Here is a bash session.
$ whatime Jun. 26, 20:49:52 UTC
Visual Basic .NET
New, .NET way with StringReader: <lang vbnet>Imports System.Net Imports System.IO
Dim client As WebClient = New WebClient() Dim content As String = client.DownloadString("http://tycho.usno.navy.mil/cgi-bin/timer.pl") Dim sr As New StringReader(content) While sr.peek <> -1 Dim s As String = sr.ReadLine If s.Contains("UTC") Then Dim time As String() = s.Substring(4).Split(vbTab) Console.WriteLine(time(0)) End If End While</lang>
Alternative, old fashioned way using VB "Split" function: <lang vbnet>Imports System.Net
Dim client As WebClient = New WebClient() Dim content As String = client.DownloadString("http://tycho.usno.navy.mil/cgi-bin/timer.pl") Dim lines() As String = Split(content, vbLf) 'may need vbCrLf For Each line In lines If line.Contains("UTC") Then Dim time As String() = line.Substring(4).Split(vbTab) Console.WriteLine(time(0)) End If Next</lang>
- Programming Tasks
- Networking and Web Interaction
- Ada
- AWS
- AutoHotkey
- AWK
- ALGOL 68
- C
- Libcurl
- C++
- C sharp
- Clojure
- Common Lisp
- Cl-ppcre
- DRAKMA
- D
- E
- Erlang
- F Sharp
- Factor
- Forth
- Haskell
- Icon
- Unicon
- J
- Java
- Liberty BASIC
- Mathematica
- Objeck
- OCaml
- Oz
- Perl
- LWP
- Perl 6
- PHP
- PicoLisp
- PowerShell
- Protium
- PureBasic
- Python
- R
- RCurl
- XML
- REBOL
- Ruby
- Scala
- Tcl
- TUSCRIPT
- UNIX Shell
- Ursala
- TI-83 BASIC/Omit
- TI-89 BASIC/Omit
- Visual Basic .NET
- Input Output
- Batch File/Omit
- M4/Omit
- PARI/GP/Omit
- PostScript/Omit
- Retro/Omit
- ZX Spectrum Basic/Omit