HTTP: Difference between revisions

Content added Content deleted

Inline

Revision as of 20:19, 11 January 2011

Print a URL's content (source code) to the console. There is a separate task for HTTPS Requests.

ALGOL 68

Works with: ALGOL 68 version Revision 1 - however grep in string, http content and str error are from a non-standard library

Works with: ALGOL 68G version Any - tested with release 1.18.0-9h.tiny

<lang algol68>STRING domain="rosettacode.org"; STRING page="wiki/Main_Page";

STRING re success="^HTTP/[0-9.]* 200"; STRING re result description="^HTTP/[0-9.]* [0-9]+ [a-zA-Z ]*"; STRING re doctype ="\s\s<!DOCTYPE html PUBLIC ""[^>]+"">\s+";

PROC html page = (REF STRING page) BOOL: (

    BOOL out=grep in string(re success, page, NIL, NIL) = 0;
    IF INT start, end;
       grep in string(re result description, page, start, end) = 0
    THEN
       page:=page[end+1:];
       IF grep in string(re doctype, page, start, end) = 0
       THEN page:=page[start+2:]
       ELSE print ("unknown format retrieving page")
       FI
    ELSE print ("unknown error retrieving page")
    FI;
    out

);

IF STRING reply;

  INT rc =
     http content (reply, domain, "http://"+domain+"/"+page, 0);
  rc = 0 AND html page (reply)

THEN print (reply) ELSE print (strerror (rc)) FI</lang>

AutoHotkey

<lang AutoHotkey>UrlDownloadToFile, http://rosettacode.org, url.html Run, cmd /k type url.html</lang>

C++

<lang cpp>#include <winsock2.h>

include <ws2tcpip.h>
include <iostream>

int main() { WSADATA wsaData; WSAStartup( MAKEWORD( 2, 2 ), &wsaData );

addrinfo *result = NULL; addrinfo hints;

ZeroMemory( &hints, sizeof( hints ) ); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP;

getaddrinfo( "74.125.45.100", "80", &hints, &result ); // http://www.google.com

SOCKET s = socket( result->ai_family, result->ai_socktype, result->ai_protocol );

connect( s, result->ai_addr, (int)result->ai_addrlen );

freeaddrinfo( result );

send( s, "GET / HTTP/1.0\n\n", 16, 0 );

char buffer[512]; int bytes;

do { bytes = recv( s, buffer, 512, 0 );

if ( bytes > 0 ) std::cout.write(buffer, bytes); } while ( bytes > 0 );

return 0; }</lang>

Clojure

Using the Java API: <lang clojure>(defn get-http [url]

 (let [sc (java.util.Scanner.

(.openStream (java.net.URL. url)))]

   (while (.hasNext sc)
     (println (.nextLine sc)))))

(get-http "http://www.rosettacode.org")</lang>

Using clojure.contrib.http.agent: <lang clojure>(ns example

 (:use [clojure.contrib.http.agent :only (string http-agent)]))

(println (string (http-agent "http://www.rosettacode.org/")))</lang>

Works with: Clojure version 1.2

<lang clojure>(print (slurp "http://www.rosettacode.org/"))</lang>

Common Lisp

CLISP provides an extension function to read http sources. Other implementations may do this differently.

Works with: CLISP

<lang lisp>(defun wget-clisp (url)

   (ext:with-http-input (stream url)
       (loop for line = (read-line stream nil nil)
           while line
           do (format t "~a~%" line))))</lang>

Library: DRAKMA

First grabbing the entire body as a string, and then by pulling from a stream (as in the CLISP example).

<lang lisp>(defun wget-drakma-string (url &optional (out *standard-output*))

 "Grab the body as a string, and write it to out."
 (write-string (drakma:http-request url) out))

(defun wget-drakma-stream (url &optional (out *standard-output*))

 "Grab the body as a stream, and write it to out."
 (loop with body = (drakma:http-request url :want-stream t)
       for line = (read-line body nil nil)
       while line do (write-line line out)
       finally (close body)))</lang>

Erlang

Synchronous

<lang erlang>-module(main). -export([main/1]).

main([Url|[]]) ->

  inets:start(),
  case http:request(Url) of
      {ok, {_V, _H, Body}} -> io:fwrite("~p~n",[Body]);
      {error, Res} -> io:fwrite("~p~n", [Res])
  end.</lang>

Asynchronous

<lang erlang>-module(main). -export([main/1]).

main([Url|[]]) ->

  inets:start(),
  http:request(get, {Url, [] }, [], [{sync, false}]),
  receive
      {http, {_ReqId, Res}} -> io:fwrite("~p~n",[Res]);
      _Any -> io:fwrite("Error: ~p~n",[_Any])
      after 10000 -> io:fwrite("Timed out.~n",[])
  end.</lang>

Using it <lang erlang>|escript ./req.erl http://www.rosettacode.org</lang>

F#

In F# we can just use the .NET library to do this so its the same as the C# example. <lang fsharp>let wget (url : string) =

   use c = new System.Net.WebClient()
   c.DownloadString(url)

printfn "%s" (wget "http://www.rosettacode.com/")</lang>

However unlike C#, F# can use an asynchronous workflow to avoid blocking any threads while waiting for a response from the server. To asynchronously download three url's at once...

<lang fsharp>let wgetAsync url =

   async { let request = WebRequest.Create (url:string)
           use! response = request.AsyncGetResponse()
           use responseStream = response.GetResponseStream()
           use reader = new StreamReader(responseStream)
           return reader.ReadToEnd() }

let urls = ["http://www.rosettacode.com/"; "http://www.yahoo.com/"; "http://www.google.com/"] let content = urls

             |> List.map wgetAsync
             |> Async.Parallel
             |> Async.RunSynchronously</lang>

Factor

<lang factor>USE: http.client "http://www.rosettacode.org" http-get nip print</lang>

Forth

Works with: GNU Forth version 0.7.0

This works at the socket level, returning both the HTTP headers and page contents. <lang forth>include unix/socket.fs

s" localhost" 80 open-socket dup s\" GET / HTTP/1.0\n\n" rot write-socket dup pad 8092 read-socket type close-socket</lang>

Groovy

<lang groovy>new URL("http://www.rosettacode.org").eachLine { println it }</lang>

Haskell

Using

Library: HTTP

from HackageDB

<lang haskell>import Network.Browser import Network.HTTP import Network.URI

httpreq = do

     rsp <- Network.Browser.browse $ do
                setAllowRedirects True

setOutHandler $ const (return ())

                request $ getRequest "http://www.rosettacode.org/"

     putStrLn $ rspBody $ snd rsp</lang>

Icon and Unicon

Icon

<lang icon>link cfunc procedure main(arglist)

  get(arglist[1])

end

procedure get(url)

  local f, host, port, path
  url ? {
        ="http://" | ="HTTP://"
        host := tab(upto(':/') | 0)
        if not (=":" & (port := integer(tab(upto('/'))))) then port := 80
        if pos(0) then path := "/" else path := tab(0)
  }
  write(host)
  write(path)
  f := tconnect(host, port) | stop("Unable to connect")
  writes(f, "GET ", path | "/" ," HTTP/1.0\r\n\r\n")
  while write(read(f))

end</lang> Using it <lang icon>|icon req.icn http://www.rosettacode.org</lang>

Unicon provides improved socket and messaging support without the need for the external function cfunc: <lang unicon>procedure main(arglist) m := open(arglist[1],"m") while write(read(m)) end</lang>

J

Using gethttp from Web Scraping

<lang j>gethttp 'http://www.rosettacode.org'</lang>

Java

<lang java5>import java.util.Scanner; import java.net.URL;

public class Main {

    public static void main(String[] args) throws Exception {         
        URL url = new URL("http://www.rosettacode.org");         
        Scanner sc = new Scanner(url.openStream());
        while( sc.hasNext() ) System.out.println(sc.nextLine());         
    }

}</lang>

Apache Commons IO

<lang java5>import org.apache.commons.io.IOUtils; import java.net.*;

public class Main {

   public static void main(String[] args) throws Exception {
   	IOUtils.copy(new URL("http://rosettacode.org").openStream(),System.out);    	    	    		    
   }

}</lang>

Lua

<lang Lua> local http = require("socket.http") function url_encode(str)

 if (str) then
   str = string.gsub (str, "\n", "\r\n")
   str = string.gsub (str, "([^%w ])",
       function (c) return string.format ("%%%02X", string.byte(c)) end)
   str = string.gsub (str, " ", "+")
 end
 return str

end function url_decode(str)

 str = string.gsub (str, "+", " ")
 str = string.gsub (str, "%%(%x%x)",
     function(h) return string.char(tonumber(h,16)) end)
 str = string.gsub (str, "\r\n", "\n")
 return str

end

local page = http.request( 'http://www.google.com/m/search?q=' .. url_encode("lua") ) print( page ) </lang> LAWL?

Mathematica

<lang Mathematica>Print[Import["http://www.google.com/webhp?complete=1&hl=en", "Source"]]</lang>

MIRC Scripting Language

See HTTP/MIRC Scripting Language

Objeck

<lang objeck> use Net; use Structure;

bundle Default {

 class HttpTest {
   function : Main(args : String[]) ~ Nil {
     client := HttpClient->New("http://rosettacode.org", 80);
     lines := client->Get();
     each(i : lines) {
       lines->GetValue(i)->As(String)->PrintLine();
     };
   }
 }

} </lang>

Objective-C

import <Foundation/Foundation.h>

int main (int argc, const char * argv[]) {

   NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];

   NSError        *error;
   NSURLResponse *response;
   NSData *data = [NSURLConnection sendSynchronousRequest:[NSURLRequest requestWithURL:[NSURL URLWithString:@"http://rosettacode.org"]]
                                           returningResponse:&response
                                                       error:&error];
                                                    
   NSLog(@"%@", [[[NSString alloc] initWithData:data
                                         encoding:NSUTF8StringEncoding] autorelease]);
   
   [pool drain];
   return 0;

} </lang>

OCaml

<lang ocaml>let () =

 let url = "http://www.rosettacode.org" in
 let _,_, page_content = make_request ~url ~kind:GET () in
 print_endline page_content;

</lang>

The source code of the function make_request is here.

Oz

When creating a file object, it is possible to specify an URL instead of a filename: <lang oz>declare

 fun {GetPage Url}
    F = {New Open.file init(url:Url)}
    Contents = {F read(list:$ size:all)}
 in
    {F close}
    Contents
 end

in

 {System.showInfo {GetPage "http://www.rosettacode.org"}}</lang>

Library: OzHttpClient

If you need more fine-grained control of the request, you could use a custom library: <lang oz>declare

 [HTTPClient] = {Module.link ['x-ozlib://mesaros/net/HTTPClient.ozf']}

 fun {GetPage Url}
    Client = {New HTTPClient.urlGET

init(inPrms(toFile:false toStrm:true) httpReqPrms )}

    OutParams
    HttpResponseParams
 in
    {Client getService(Url ?OutParams ?HttpResponseParams)}
    {Client closeAll(true)}
    OutParams.sOut
 end

in

 {System.showInfo {GetPage "http://www.rosettacode.org"}}</lang>

Perl

Library: LWP

<lang perl>use LWP::Simple; print get("http://www.rosettacode.org");</lang>

or with more error-checking

<lang perl>use strict; use LWP::UserAgent;

my $url = 'http://www.rosettacode.org'; my $response = LWP::UserAgent->new->get( $url );

$response->is_success or die "Failed to GET '$url': ", $response->status_line;

print $response->as_string;</lang>

PHP

<lang php>print(file_get_contents("http://www.rosettacode.org"));</lang>

PicoLisp

<lang PicoLisp>(load "@lib/http.l")

(client "rosettacode.org" 80 NIL # Connect to rosettacode

  (out NIL (echo)) )                  # Echo to standard output</lang>

Pike

<lang pike>write("%s",Protocols.HTTP.get_url_data("http://www.rosettacode.org"));</lang>

PowerShell

<lang powershell>$wc = New-Object Net.WebClient $wc.DownloadString('http://www.rosettacode.org')</lang>

Protium

English dialect, short form: <lang html><@ SAYURLLIT>http://rosettacode.org/wiki/Main_Page</@></lang>

English dialect, padded variable-length form: <lang html><# SAY URLSOURCE LITERAL>http://rosettacode.org/wiki/Main_Page</#></lang>

PureBasic

<lang PureBasic>InitNetwork() OpenConsole()

tmpdir$ = GetTemporaryDirectory() filename$ = tmpdir$ + "PB_tempfile" + Str(Random(200000)) + ".html"

If ReceiveHTTPFile("http://rosettacode.org/wiki/Main_Page", filename$)

 If ReadFile(1, filename$)
   Repeat
     PrintN(ReadString(1))
   Until Eof(1)
   Input()
   ; to prevent console from closing if on windows
   CloseFile(1)
 EndIf
 DeleteFile(filename$)

EndIf</lang>

Another solution using general networking commands <lang PureBasic>InitNetwork() OpenConsole() id = OpenNetworkConnection("rosettacode.org", 80) SendNetworkString(id, "GET /wiki/Main_Page HTTP/1.1" + Chr(10) + "Host: rosettacode.org" + Chr(10) + Chr(10)) Repeat

 If NetworkClientEvent(id) = 2
   a$ = Space(1000)
   ReceiveNetworkData(id, @a$, 1000)
   out$ + a$
 EndIf

Until FindString(out$, "</html>", 0) PrintN(out$)

next line only to prevent console from closing on Windows

Input()</lang>

Of course you could use wget too.

Python

<lang python>import urllib url = urllib.urlopen("http://www.rosettacode.org") print url.read() url.close()</lang>

<lang python>import urllib print urllib.urlopen("http://rosettacode.org").read()</lang>

<lang python>import urllib2 print urllib2.urlopen("http://rosettacode.org").read()</lang>

Python 3 <lang python>import urllib.request print(urllib.request.urlopen("http://rosettacode.org").read())</lang>

R

Library: RCurl

Library: XML

First, retrieve the webpage. <lang R>library(RCurl) webpage <- getURL("http://rosettacode.org")

If you are linking to a page that no longer exists and need to follow the redirect, use followlocation=TRUE

webpage <- getURL("http://www.rosettacode.org", .opts=list(followlocation=TRUE))

If you are behind a proxy server, you will need to use something like:

webpage <- getURL("http://rosettacode.org",

  .opts=list(proxy="123.123.123.123", proxyusername="domain\\username", proxypassword="mypassword", proxyport=8080))

Don't forget that backslashes in your username or password need to be escaped!</lang>

Now parse the html code into a tree and print the html. <lang R>library(XML) pagetree <- htmlTreeParse(webpage ) pagetree$children$html</lang>

REBOL

<lang REBOL>print read http://rosettacode.org</lang>

RLaB

RLaB supports HTTP/FTP through its Read/Write facilities, which are organized around the concept of Universal Resource Locator (URL),

protocol://address

RLaB accepts the following values for protocol:

1. file or omitted, for generic text files or files in native binary format (partially compatible with matlab binary format);

2. h5 or hdf5 for files that use Hierarhical Data Format 5 (HDF5) version 1.8.0, and later. Here protocol can be omitted while address has to end with .h5 (file extension);

3. http, https, or ftp for accessing the data and files on web- and ftp-servers;

4. tcp, for accessing sockets over tcp/ip protocol;

5. serial, for accessing serial port on Un*x type systems.

For these URLs RLaB provides an internal book-keeping: It keeps track of the open ones and, say, upon quitting, closes them and releases the internal resources it allocated for managing them.

For accessing URLs on world wide web RLaB implements the library cURL (libcurl) [1] and its "easy" interface.

This said, this is how one would download financial data for Pfeizer from Yahoo [2].

<lang RLaB>// get cvs data from Yahoo for Pfeizer (PFE) url="http://ichart.finance.yahoo.com/table.csv?s=PFE&a=00&b=4&c=1982&d=00&e=10&f=2010&g=d&ignore=.csv";

opt = <<>>; // opt.CURLOPT_PROXY = "your.proxy.here"; // opt.CURLOPT_PROXYPORT = YOURPROXYPORT; // opt.CURLOPT_PROXYTYPE = "http"; open(url, opt); x = readm(url); close (url);</lang>

Ruby

<lang ruby>require 'open-uri' require 'kconv'

puts open("http://rosettacode.org").read</lang>

Scala

<lang scala>import scala.io._

object HttpTest {

  def main(args: Array[String]): Unit = {
     //if you are behind a firewall you can configure your proxy
     System.setProperty("http.proxySet", "true")
     System.setProperty("http.proxyHost", "0.0.0.0")
     System.setProperty("http.proxyPort", "8080")

     Source.fromURL("http://www.rosettacode.org").getLines.foreach(println)
  }

}</lang>

SNOBOL4

Works with: Macro SNOBOL4 in C

<lang snobol>-include "tcp.sno" tcp.open(.conn,'www.rosettacode.org','http') :s(cont1) terminal = "cannot open" :(end) cont1 conn = "GET http://rosettacode.org/wiki/Main_Page HTTP/1.0" char(10) char(10) while output = conn :s(while) tcp.close(.conn) end</lang>

Tcl

Note that the http package is distributed as part of Tcl. <lang tcl>package require http set request [http::geturl "http://www.rosettacode.org"] puts [http::data $request] http::cleanup $request</lang>

TUSCRIPT

<lang tuscript> $$ MODE TUSCRIPT SET DATEN = REQUEST ("http://www.rosettacode.org")

{daten}

</lang>

UNIX Shell

<lang bash>curl -s -L http://rosettacode.org/</lang> <lang bash>lynx -source http://rosettacode.org/</lang> <lang bash>(sleep 2 && echo "GET / HTTP/1.0" && echo && sleep 2) | nc rosettacode.org 80</lang> <lang bash>(sleep 2 && echo "GET / HTTP/1.0" && echo && sleep 2) | telnet rosettacode.org 80</lang> <lang bash>wget -O - -q http://rosettacode.org/</lang> <lang bash>lftp -c "cat http://rosettacode.org/"</lang>

VBScript

Library: Microsoft.XmlHTTP

Based on code at How to retrieve HTML web pages with VBScript via the Microsoft.XmlHttp object <lang vb>option explicit

dim sURL dim oHTTP

sURL="http://rosettacode.org/" set oHTTP = CreateObject("Microsoft.XmlHTTP")

On Error Resume Next oHTTP.open "GET", sURL, False oHTTP.send "" if Err.Number = 0 Then

    WScript.Echo oHTTP.responseText

Else

    Wscript.Echo "error " & Err.Number & ": " & Err.Description

End If

Set oHTTP = Nothing </lang>

Visual Basic .NET

<lang vbnet>Imports System.Net

Dim client As WebClient = New WebClient() Dim content As String = client.DownloadString("http://www.google.com") Console.WriteLine(content)</lang>

Zsh

<lang zsh>zmodload zsh/net/tcp ztcp example.com 80 fd=$REPLY print -l -u $fd -- 'GET / HTTP/1.1' 'Host: example.com' while read -u $fd -r -e -t 1; do; :; done ztcp -c $fd</lang>