URL parser: Difference between revisions
No edit summary |
Thundergnat (talk | contribs) m (syntax highlighting fixup automation) |
||
Line 60: | Line 60: | ||
=={{header|Ada}}== |
=={{header|Ada}}== |
||
{{libheader|AWS}} |
{{libheader|AWS}} |
||
< |
<syntaxhighlight lang="ada">with Ada.Text_IO; |
||
with AWS.URL; |
with AWS.URL; |
||
Line 126: | Line 126: | ||
Parse ("https://bob:pass@example.com/place"); |
Parse ("https://bob:pass@example.com/place"); |
||
Parse ("http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"); |
Parse ("http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"); |
||
end URL_Parser;</ |
end URL_Parser;</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 232: | Line 232: | ||
Uses the URI parser here: [[URL_parser/URI_parser_ALGOL68]]. |
Uses the URI parser here: [[URL_parser/URI_parser_ALGOL68]]. |
||
< |
<syntaxhighlight lang="algol68">PR read "uriParser.a68" PR |
||
PROC test uri parser = ( STRING uri )VOID: |
PROC test uri parser = ( STRING uri )VOID: |
||
Line 269: | Line 269: | ||
; test uri parser( "https://bob:pass@example.com/place" ) |
; test uri parser( "https://bob:pass@example.com/place" ) |
||
; test uri parser( "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" ) |
; test uri parser( "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" ) |
||
END</ |
END</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 351: | Line 351: | ||
Thanks to AppleScript's ability to interface with some of macOS's ObjectiveC API, the various components can simply be read off from an ''NSURLComponents'' object set from the URL text. |
Thanks to AppleScript's ability to interface with some of macOS's ObjectiveC API, the various components can simply be read off from an ''NSURLComponents'' object set from the URL text. |
||
< |
<syntaxhighlight lang="applescript">use AppleScript version "2.4" -- OS X 10.10 (Yosemite) or later |
||
use framework "Foundation" |
use framework "Foundation" |
||
Line 392: | Line 392: | ||
end repeat |
end repeat |
||
return join(output, linefeed & linefeed)</ |
return join(output, linefeed & linefeed)</syntaxhighlight> |
||
{{output}} |
{{output}} |
||
< |
<syntaxhighlight lang="applescript">"foo://example.com:8042/over/there?name=ferret#nose |
||
• scheme = foo |
• scheme = foo |
||
• host = example.com |
• host = example.com |
||
Line 455: | Line 455: | ||
• host = example.com |
• host = example.com |
||
• path = / |
• path = / |
||
• query = a=1&b=2+2&c=3&c=4&d=encoded"</ |
• query = a=1&b=2+2&c=3&c=4&d=encoded"</syntaxhighlight> |
||
=={{header|C sharp|C#}}== |
=={{header|C sharp|C#}}== |
||
< |
<syntaxhighlight lang="csharp">using System; |
||
namespace RosettaUrlParse |
namespace RosettaUrlParse |
||
Line 493: | Line 493: | ||
} |
} |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Crystal}}== |
=={{header|Crystal}}== |
||
Line 499: | Line 499: | ||
This example demonstrates use of the Crystal standard library's <code>URI</code> class. |
This example demonstrates use of the Crystal standard library's <code>URI</code> class. |
||
< |
<syntaxhighlight lang="crystal">require "uri" |
||
examples = ["foo://example.com:8042/over/there?name=ferret#nose", |
examples = ["foo://example.com:8042/over/there?name=ferret#nose", |
||
Line 530: | Line 530: | ||
end |
end |
||
puts |
puts |
||
end</ |
end</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 615: | Line 615: | ||
=={{header|Elixir}}== |
=={{header|Elixir}}== |
||
< |
<syntaxhighlight lang="elixir">test_cases = [ |
||
"foo://example.com:8042/over/there?name=ferret#nose", |
"foo://example.com:8042/over/there?name=ferret#nose", |
||
"urn:example:animal:ferret:nose", |
"urn:example:animal:ferret:nose", |
||
Line 635: | Line 635: | ||
IO.puts "\n#{str}" |
IO.puts "\n#{str}" |
||
IO.inspect URI.parse(str) |
IO.inspect URI.parse(str) |
||
end)</ |
end)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 713: | Line 713: | ||
<li>The System.URI class does some "Scheme-Based Normalization" (c/f rfc3986 section 6.2.3), i. e. it knows about certain |
<li>The System.URI class does some "Scheme-Based Normalization" (c/f rfc3986 section 6.2.3), i. e. it knows about certain |
||
defaults for some schemes. With the test data this shows with the port numbers for http, ftp, ldap, mailto.</li></ul> |
defaults for some schemes. With the test data this shows with the port numbers for http, ftp, ldap, mailto.</li></ul> |
||
< |
<syntaxhighlight lang="fsharp">open System |
||
open System.Text.RegularExpressions |
open System.Text.RegularExpressions |
||
Line 742: | Line 742: | ||
writeline " query:" (if u.Query.Length > 0 then u.Query.Substring(1) else "") |
writeline " query:" (if u.Query.Length > 0 then u.Query.Substring(1) else "") |
||
writeline " fragment:" (if u.Fragment.Length > 0 then u.Fragment.Substring(1) else "") |
writeline " fragment:" (if u.Fragment.Length > 0 then u.Fragment.Substring(1) else "") |
||
)</ |
)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre style="height:3cm"> |
<pre style="height:3cm"> |
||
Line 808: | Line 808: | ||
This uses Go's standard [https://golang.org/pkg/net/url/ <tt>net/url</tt>] package. |
This uses Go's standard [https://golang.org/pkg/net/url/ <tt>net/url</tt>] package. |
||
The [https://golang.org/src/net/url/url.go source code] for this package (excluding tests) is in a single file of ~720 lines. |
The [https://golang.org/src/net/url/url.go source code] for this package (excluding tests) is in a single file of ~720 lines. |
||
< |
<syntaxhighlight lang="go">package main |
||
import ( |
import ( |
||
Line 882: | Line 882: | ||
fmt.Println(" Fragment:", u.Fragment) |
fmt.Println(" Fragment:", u.Fragment) |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 958: | Line 958: | ||
Test: |
Test: |
||
< |
<syntaxhighlight lang="groovy">import java.net.URI |
||
[ |
[ |
||
Line 986: | Line 986: | ||
| query = ${u.query} |
| query = ${u.query} |
||
| fragment = ${u.fragment}""".stripMargin() |
| fragment = ${u.fragment}""".stripMargin() |
||
}</ |
}</syntaxhighlight> |
||
Output: |
Output: |
||
Line 1,082: | Line 1,082: | ||
Example uses [https://hackage.haskell.org/package/network-uri <tt>network-uri</tt>] package: |
Example uses [https://hackage.haskell.org/package/network-uri <tt>network-uri</tt>] package: |
||
< |
<syntaxhighlight lang="haskell">module Main (main) where |
||
import Data.Foldable (for_) |
import Data.Foldable (for_) |
||
Line 1,156: | Line 1,156: | ||
Nothing -> return () |
Nothing -> return () |
||
Just fragment -> putStrLn $ " fragment = " ++ fragment |
Just fragment -> putStrLn $ " fragment = " ++ fragment |
||
putStrLn ""</ |
putStrLn ""</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 1,234: | Line 1,234: | ||
Implementation: |
Implementation: |
||
< |
<syntaxhighlight lang="j">split=:1 :0 |
||
({. ; ] }.~ 1+[)~ i.&m |
({. ; ] }.~ 1+[)~ i.&m |
||
) |
) |
||
Line 1,280: | Line 1,280: | ||
export=. ;:'scheme user creds host port path query fragment' |
export=. ;:'scheme user creds host port path query fragment' |
||
(#~ 0<#@>@{:"1) (,. do each) export |
(#~ 0<#@>@{:"1) (,. do each) export |
||
)</ |
)</syntaxhighlight> |
||
Task examples: |
Task examples: |
||
< |
<syntaxhighlight lang="j"> taskparts 'foo://example.com:8042/over/there?name=ferret#nose' |
||
┌────────┬─────────────┐ |
┌────────┬─────────────┐ |
||
│scheme │foo │ |
│scheme │foo │ |
||
Line 1,379: | Line 1,379: | ||
├──────┼───────────────────────────────────────────────┤ |
├──────┼───────────────────────────────────────────────┤ |
||
│path │oasis:names:specification:docbook:dtd:xml:4.1.2│ |
│path │oasis:names:specification:docbook:dtd:xml:4.1.2│ |
||
└──────┴───────────────────────────────────────────────┘</ |
└──────┴───────────────────────────────────────────────┘</syntaxhighlight> |
||
Note that the <code>path</code> of the example <code>jdbc</code> uri is itself a uri which may be parsed: |
Note that the <code>path</code> of the example <code>jdbc</code> uri is itself a uri which may be parsed: |
||
< |
<syntaxhighlight lang="j"> taskparts 'mysql://test_user:ouupppssss@localhost:3306/sakila' |
||
┌──────┬──────────┐ |
┌──────┬──────────┐ |
||
│scheme│mysql │ |
│scheme│mysql │ |
||
Line 1,396: | Line 1,396: | ||
├──────┼──────────┤ |
├──────┼──────────┤ |
||
│path │/sakila │ |
│path │/sakila │ |
||
└──────┴──────────┘</ |
└──────┴──────────┘</syntaxhighlight> |
||
Also, examples borrowed from the <code>go</code> implementation: |
Also, examples borrowed from the <code>go</code> implementation: |
||
< |
<syntaxhighlight lang="j"> taskparts 'ssh://alice@example.com' |
||
┌──────┬───────────┐ |
┌──────┬───────────┐ |
||
│scheme│ssh │ |
│scheme│ssh │ |
||
Line 1,431: | Line 1,431: | ||
│ ││a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64││ |
│ ││a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64││ |
||
│ │└─────────────────────────────────────────┘│ |
│ │└─────────────────────────────────────────┘│ |
||
└──────┴───────────────────────────────────────────┘</ |
└──────┴───────────────────────────────────────────┘</syntaxhighlight> |
||
Note that escape decoding is left to the consumer (as well as decoding things like '+' as a replacement for the space character and determining the absolute significance of relative paths and the details of ip address parsing and so on...). This seems like a good match to the hierarchical nature of uri parsing. See [[URL_decoding#J|URL decoding]] for an implementation of escape decoding. |
Note that escape decoding is left to the consumer (as well as decoding things like '+' as a replacement for the space character and determining the absolute significance of relative paths and the details of ip address parsing and so on...). This seems like a good match to the hierarchical nature of uri parsing. See [[URL_decoding#J|URL decoding]] for an implementation of escape decoding. |
||
Line 1,440: | Line 1,440: | ||
=={{header|Java}}== |
=={{header|Java}}== |
||
In Java, you can use the <code>URI</code> class for this, so it's pretty straightforward. I just did a bit of tweaking to output.< |
In Java, you can use the <code>URI</code> class for this, so it's pretty straightforward. I just did a bit of tweaking to output.<syntaxhighlight lang="java">import java.net.URI; |
||
import java.net.URISyntaxException; |
import java.net.URISyntaxException; |
||
public class WebAddressParser{ |
public class WebAddressParser{ |
||
Line 1,467: | Line 1,467: | ||
} |
} |
||
} |
} |
||
</ |
</syntaxhighlight>I'm only showing two examples, but the others work too, honest. |
||
{{Out}} |
{{Out}} |
||
<pre>Parsing foo://example.com:8042/over/there?name=ferret#nose |
<pre>Parsing foo://example.com:8042/over/there?name=ferret#nose |
||
Line 1,490: | Line 1,490: | ||
Here is an example, tested against the JavaScript engines of current versions of Chrome and Safari, of taking this 'Gordian knot' approach to the task: |
Here is an example, tested against the JavaScript engines of current versions of Chrome and Safari, of taking this 'Gordian knot' approach to the task: |
||
< |
<syntaxhighlight lang="javascript">(function (lstURL) { |
||
var e = document.createElement('a'), |
var e = document.createElement('a'), |
||
Line 1,535: | Line 1,535: | ||
"https://bob:pass@example.com/place", |
"https://bob:pass@example.com/place", |
||
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" |
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" |
||
]);</ |
]);</syntaxhighlight> |
||
Results of applying this approach in the JavaScript of Safari 8 |
Results of applying this approach in the JavaScript of Safari 8 |
||
<syntaxhighlight lang="json">[ |
|||
<lang JSON>[ |
|||
{ |
{ |
||
"hash": "#nose", |
"hash": "#nose", |
||
Line 1,679: | Line 1,679: | ||
"search": "?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" |
"search": "?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" |
||
} |
} |
||
]</ |
]</syntaxhighlight> |
||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
This solution uses Julia's [https://github.com/JuliaWeb/URIParser.jl URIParser] package. The <code>detailview</code> function shows all of the non-empty components of the <code>URI</code> object created by this parser. No attempt is made to further parse more complex components, e.g. query or userinfo. Error detection is limited to indicating whether a string is parsable as a URI and providing a hint as to whether the <code>URI</code> is valid (according to this package's <code>isvalid</code> function). |
This solution uses Julia's [https://github.com/JuliaWeb/URIParser.jl URIParser] package. The <code>detailview</code> function shows all of the non-empty components of the <code>URI</code> object created by this parser. No attempt is made to further parse more complex components, e.g. query or userinfo. Error detection is limited to indicating whether a string is parsable as a URI and providing a hint as to whether the <code>URI</code> is valid (according to this package's <code>isvalid</code> function). |
||
< |
<syntaxhighlight lang="julia">using Printf, URIParser |
||
const FIELDS = names(URI) |
const FIELDS = names(URI) |
||
Line 1,737: | Line 1,737: | ||
println(detailview(uri)) |
println(detailview(uri)) |
||
end |
end |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
Line 1,863: | Line 1,863: | ||
=={{header|Kotlin}}== |
=={{header|Kotlin}}== |
||
Although the java.net.URL class can parse urls just fine, unfortunately (as far as this task is concerned) the constructor throws an exception if it does not recognize the scheme (or 'protocol' as it calls it). To deal with unrecognized protocols such as 'foo', we therefore need to replace them with a valid protocol such as 'http' to trick the URL class into parsing them properly: |
Although the java.net.URL class can parse urls just fine, unfortunately (as far as this task is concerned) the constructor throws an exception if it does not recognize the scheme (or 'protocol' as it calls it). To deal with unrecognized protocols such as 'foo', we therefore need to replace them with a valid protocol such as 'http' to trick the URL class into parsing them properly: |
||
< |
<syntaxhighlight lang="scala">// version 1.1.2 |
||
import java.net.URL |
import java.net.URL |
||
Line 1,912: | Line 1,912: | ||
) |
) |
||
for (url in urls) parseUrl(url) |
for (url in urls) parseUrl(url) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 1,994: | Line 1,994: | ||
{{libheader|LuaSocket}} |
{{libheader|LuaSocket}} |
||
< |
<syntaxhighlight lang="lua">local url = require('socket.url') |
||
local tests = { |
local tests = { |
||
Line 2,021: | Line 2,021: | ||
io.write('\n') |
io.write('\n') |
||
end |
end |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 2,088: | Line 2,088: | ||
=={{header|M2000 Interpreter}}== |
=={{header|M2000 Interpreter}}== |
||
===Using M2000 script to parse URL=== |
===Using M2000 script to parse URL=== |
||
<syntaxhighlight lang="m2000 interpreter"> |
|||
<lang M2000 Interpreter> |
|||
Module checkit { |
Module checkit { |
||
any=lambda (z$)->{=lambda z$ (a$)->instr(z$,a$)>0} |
any=lambda (z$)->{=lambda z$ (a$)->instr(z$,a$)>0} |
||
Line 2,255: | Line 2,255: | ||
} |
} |
||
Checkit |
Checkit |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre style="height:30ex;overflow:scroll"> |
<pre style="height:30ex;overflow:scroll"> |
||
Line 2,299: | Line 2,299: | ||
===Using an internal function (variation of String$())=== |
===Using an internal function (variation of String$())=== |
||
<syntaxhighlight lang="m2000 interpreter"> |
|||
<lang M2000 Interpreter> |
|||
module Checkit { |
module Checkit { |
||
Stack New { |
Stack New { |
||
Line 2,341: | Line 2,341: | ||
} |
} |
||
Checkit |
Checkit |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre style="height:30ex;overflow:scroll"> |
<pre style="height:30ex;overflow:scroll"> |
||
Line 2,503: | Line 2,503: | ||
=={{header|Mathematica}}/{{header|Wolfram Language}}== |
=={{header|Mathematica}}/{{header|Wolfram Language}}== |
||
< |
<syntaxhighlight lang="mathematica">URLParse["foo://example.com:8042/over/there?name=ferret#nose"]</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre><|"Scheme" -> "foo", "User" -> None, "Domain" -> "example.com", |
<pre><|"Scheme" -> "foo", "User" -> None, "Domain" -> "example.com", |
||
Line 2,512: | Line 2,512: | ||
The <code>uri</code> module provides a <code>parseUri</code> proc. |
The <code>uri</code> module provides a <code>parseUri</code> proc. |
||
< |
<syntaxhighlight lang="nim">import uri, strformat |
||
proc printUri(url: string) = |
proc printUri(url: string) = |
||
Line 2,552: | Line 2,552: | ||
for url in urls: |
for url in urls: |
||
printUri(url)</ |
printUri(url)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>foo://example.com:8042/over/there?name=ferret#nose |
<pre>foo://example.com:8042/over/there?name=ferret#nose |
||
Line 2,624: | Line 2,624: | ||
=={{header|Objeck}}== |
=={{header|Objeck}}== |
||
< |
<syntaxhighlight lang="objeck">use Web.HTTP; |
||
class Test { |
class Test { |
||
Line 2,649: | Line 2,649: | ||
}; |
}; |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 2,715: | Line 2,715: | ||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
You can use the URI module from CPAN to parse URIs. Note that the output is a bit different: for example, you don't get the host from the <code>foo://</code> scheme, as host is only valid for schemes that define it. |
You can use the URI module from CPAN to parse URIs. Note that the output is a bit different: for example, you don't get the host from the <code>foo://</code> scheme, as host is only valid for schemes that define it. |
||
< |
<syntaxhighlight lang="perl">#!/usr/bin/perl |
||
use warnings; |
use warnings; |
||
use strict; |
use strict; |
||
Line 2,742: | Line 2,742: | ||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>foo://example.com:8042/over/there?name=ferret#nose |
<pre>foo://example.com:8042/over/there?name=ferret#nose |
||
Line 2,799: | Line 2,799: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
There are some fairly rudimentary and lightly documented routines in builtins/url.e. |
There are some fairly rudimentary and lightly documented routines in builtins/url.e. |
||
<!--< |
<!--<syntaxhighlight lang="phix">(phixonline)--> |
||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
||
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">/</span><span style="color: #000000;">url</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> |
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">/</span><span style="color: #000000;">url</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> |
||
Line 2,835: | Line 2,835: | ||
<span style="color: #000000;">show_url_details</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span> |
<span style="color: #000000;">show_url_details</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span> |
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
||
<!--</ |
<!--</syntaxhighlight>--> |
||
{{Out}} |
{{Out}} |
||
<pre> |
<pre> |
||
Line 2,917: | Line 2,917: | ||
Using the parse_url function (Parse a URL and return its components) |
Using the parse_url function (Parse a URL and return its components) |
||
< |
<syntaxhighlight lang="php"><?php |
||
$urls = array( |
$urls = array( |
||
Line 2,938: | Line 2,938: | ||
print_r($p); |
print_r($p); |
||
echo PHP_EOL; |
echo PHP_EOL; |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 3,032: | Line 3,032: | ||
=={{header|PowerShell}}== |
=={{header|PowerShell}}== |
||
I was confused about the '''Path''' parameter. PowerShell returns '''LocalPath''', '''AbsolutePath''' and '''AbsoluteUri'''; I defaulted to '''LocalPath''', but all properties are returned in the <code>$parsedUrls</code> variable. |
I was confused about the '''Path''' parameter. PowerShell returns '''LocalPath''', '''AbsolutePath''' and '''AbsoluteUri'''; I defaulted to '''LocalPath''', but all properties are returned in the <code>$parsedUrls</code> variable. |
||
<syntaxhighlight lang="powershell"> |
|||
<lang PowerShell> |
|||
function Get-ParsedUrl |
function Get-ParsedUrl |
||
{ |
{ |
||
Line 3,076: | Line 3,076: | ||
} |
} |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
<syntaxhighlight lang="powershell"> |
|||
<lang PowerShell> |
|||
[string[]]$urls = @' |
[string[]]$urls = @' |
||
foo://example.com:8042/over/there?name=ferret#nose |
foo://example.com:8042/over/there?name=ferret#nose |
||
Line 3,095: | Line 3,095: | ||
$parsedUrls | Select-Object -Property Scheme, Port, Domain, Path, Query, Fragment | Format-Table |
$parsedUrls | Select-Object -Property Scheme, Port, Domain, Path, Query, Fragment | Format-Table |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{Out}} |
{{Out}} |
||
<pre> |
<pre> |
||
Line 3,115: | Line 3,115: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
Links to Python Documentation: v2: [https://docs.python.org/2/library/urlparse.html#module-urlparse], v3: [https://docs.python.org/3.4/library/urllib.parse.html] |
Links to Python Documentation: v2: [https://docs.python.org/2/library/urlparse.html#module-urlparse], v3: [https://docs.python.org/3.4/library/urllib.parse.html] |
||
< |
<syntaxhighlight lang="python">import urllib.parse as up # urlparse for Python v2 |
||
url = up.urlparse('http://user:pass@example.com:8081/path/file.html;params?query1=1#fragment') |
url = up.urlparse('http://user:pass@example.com:8081/path/file.html;params?query1=1#fragment') |
||
Line 3,129: | Line 3,129: | ||
print('url.username = ', url.username) |
print('url.username = ', url.username) |
||
print('url.password = ', url.password) |
print('url.password = ', url.password) |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 3,146: | Line 3,146: | ||
=={{header|R}}== |
=={{header|R}}== |
||
urltools::url_parse() do all the actually work. The rest is just for nice output. |
urltools::url_parse() do all the actually work. The rest is just for nice output. |
||
< |
<syntaxhighlight lang="rsplus"> |
||
library(urltools) |
library(urltools) |
||
Line 3,174: | Line 3,174: | ||
cat("\n") |
cat("\n") |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 3,250: | Line 3,250: | ||
Links: [http://docs.racket-lang.org/net/url.html?q=url#%28def._%28%28lib._net%2Furl-structs..rkt%29._url%29%29 <code>url</code> structure in Racket documentation]. |
Links: [http://docs.racket-lang.org/net/url.html?q=url#%28def._%28%28lib._net%2Furl-structs..rkt%29._url%29%29 <code>url</code> structure in Racket documentation]. |
||
< |
<syntaxhighlight lang="racket">#lang racket/base |
||
(require racket/match net/url) |
(require racket/match net/url) |
||
(define (debug-url-string U) |
(define (debug-url-string U) |
||
Line 3,285: | Line 3,285: | ||
"tel:+1-816-555-1212" |
"tel:+1-816-555-1212" |
||
"telnet://192.0.2.16:80/" |
"telnet://192.0.2.16:80/" |
||
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2"))</ |
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2"))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 3,370: | Line 3,370: | ||
{{works with|rakudo|2015-11-02}} |
{{works with|rakudo|2015-11-02}} |
||
Uses the URI library which implements a Raku grammar based on the RFC 3986 BNF grammar. |
Uses the URI library which implements a Raku grammar based on the RFC 3986 BNF grammar. |
||
<lang |
<syntaxhighlight lang="raku" line>use URI; |
||
my @test-uris = < |
my @test-uris = < |
||
Line 3,396: | Line 3,396: | ||
} |
} |
||
say ''; |
say ''; |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>URI: foo://example.com:8042/over/there?name=ferret#nose |
<pre>URI: foo://example.com:8042/over/there?name=ferret#nose |
||
Line 3,463: | Line 3,463: | ||
As you can see in the output below, the URI library doesn't parse all of these as recommended. |
As you can see in the output below, the URI library doesn't parse all of these as recommended. |
||
< |
<syntaxhighlight lang="ruby">require 'uri' |
||
test_cases = [ |
test_cases = [ |
||
Line 3,490: | Line 3,490: | ||
puts " #{attr.rjust(8)} = #{uri.send(attr)}" if uri.send(attr) |
puts " #{attr.rjust(8)} = #{uri.send(attr)}" if uri.send(attr) |
||
end |
end |
||
end</ |
end</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>foo://example.com:8042/over/there?name=ferret#nose |
<pre>foo://example.com:8042/over/there?name=ferret#nose |
||
Line 3,553: | Line 3,553: | ||
=={{header|Rust}}== |
=={{header|Rust}}== |
||
< |
<syntaxhighlight lang="rust">use url::Url; |
||
fn print_fields(url: Url) -> () { |
fn print_fields(url: Url) -> () { |
||
Line 3,609: | Line 3,609: | ||
} |
} |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
Output: |
Output: |
||
<pre> |
<pre> |
||
Line 3,700: | Line 3,700: | ||
=={{header|Scala}}== |
=={{header|Scala}}== |
||
< |
<syntaxhighlight lang="scala">import java.net.URI |
||
object WebAddressParser extends App { |
object WebAddressParser extends App { |
||
Line 3,733: | Line 3,733: | ||
} catch { case ex: Throwable => println('\u2718') } |
} catch { case ex: Throwable => println('\u2718') } |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{Out}}See it in running in your browser by [https://scastie.scala-lang.org/GZdtfkhfRsa9QPKZQ7W4XQ Scastie (JVM)]. |
{{Out}}See it in running in your browser by [https://scastie.scala-lang.org/GZdtfkhfRsa9QPKZQ7W4XQ Scastie (JVM)]. |
||
Line 3,748: | Line 3,748: | ||
The <tt>uri</tt> package doesn't presently handle IPv6 syntx as used in the example: a bug and patch will be submitted presently .. |
The <tt>uri</tt> package doesn't presently handle IPv6 syntx as used in the example: a bug and patch will be submitted presently .. |
||
< |
<syntaxhighlight lang="tcl">package require uri |
||
package require uri::urn |
package require uri::urn |
||
Line 3,787: | Line 3,787: | ||
puts \n$uri |
puts \n$uri |
||
pdict [parse_uri $uri] |
pdict [parse_uri $uri] |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 3,848: | Line 3,848: | ||
=={{header|VBScript}}== |
=={{header|VBScript}}== |
||
<syntaxhighlight lang="vb"> |
|||
<lang vb> |
|||
Function parse_url(url) |
Function parse_url(url) |
||
parse_url = "URL: " & url |
parse_url = "URL: " & url |
||
Line 3,945: | Line 3,945: | ||
WScript.StdOut.WriteLine "-------------------------------" |
WScript.StdOut.WriteLine "-------------------------------" |
||
WScript.StdOut.WriteLine parse_url("this code is messy, long, and needs a makeover!!!") |
WScript.StdOut.WriteLine parse_url("this code is messy, long, and needs a makeover!!!") |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{Out}} |
{{Out}} |
||
Line 4,011: | Line 4,011: | ||
=={{header|Vlang}}== |
=={{header|Vlang}}== |
||
{{trans|Go}} |
{{trans|Go}} |
||
<lang>import net.urllib |
<syntaxhighlight lang="text">import net.urllib |
||
const urls = ['jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true', |
const urls = ['jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true', |
||
Line 4,065: | Line 4,065: | ||
println(" Fragment: $u.fragment") |
println(" Fragment: $u.fragment") |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true |
<pre>jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true |
||
Line 4,118: | Line 4,118: | ||
{{trans|VBScript}} |
{{trans|VBScript}} |
||
... though modified quite a bit. |
... though modified quite a bit. |
||
< |
<syntaxhighlight lang="ecmascript">var urlParse = Fn.new { |url| |
||
var parseUrl = "URL = " + url |
var parseUrl = "URL = " + url |
||
var index |
var index |
||
Line 4,232: | Line 4,232: | ||
"http://example.com/?a=1&b=2+2&c=3&c=4&d=\%65\%6e\%63\%6F\%64\%65\%64" |
"http://example.com/?a=1&b=2+2&c=3&c=4&d=\%65\%6e\%63\%6F\%64\%65\%64" |
||
] |
] |
||
for (url in urls) urlParse.call(url)</ |
for (url in urls) urlParse.call(url)</syntaxhighlight> |
||
{{out}} |
{{out}} |
Revision as of 19:22, 28 August 2022
You are encouraged to solve this task according to the task description, using any language you may know.
URLs are strings with a simple syntax:
scheme://[username:password@]domain[:port]/path?query_string#fragment_id
- Task
Parse a well-formed URL to retrieve the relevant information: scheme, domain, path, ...
Note: this task has nothing to do with URL encoding or URL decoding.
According to the standards, the characters:
- ! * ' ( ) ; : @ & = + $ , / ? % # [ ]
only need to be percent-encoded (%) in case of possible confusion.
Also note that the path, query and fragment are case sensitive, even if the scheme and domain are not.
The way the returned information is provided (set of variables, array, structured, record, object,...) is language-dependent and left to the programmer, but the code should be clear enough to reuse.
Extra credit is given for clear error diagnostics.
- Here is the official standard: https://tools.ietf.org/html/rfc3986,
- and here is a simpler BNF: http://www.w3.org/Addressing/URL/5_URI_BNF.html.
- Test cases
According to T. Berners-Lee
foo://example.com:8042/over/there?name=ferret#nose should parse into:
- scheme = foo
- domain = example.com
- port = :8042
- path = over/there
- query = name=ferret
- fragment = nose
urn:example:animal:ferret:nose should parse into:
- scheme = urn
- path = example:animal:ferret:nose
other URLs that must be parsed include:
- jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true
- ftp://ftp.is.co.za/rfc/rfc1808.txt
- http://www.ietf.org/rfc/rfc2396.txt#header1
- ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two
- mailto:John.Doe@example.com
- news:comp.infosystems.www.servers.unix
- tel:+1-816-555-1212
- telnet://192.0.2.16:80/
- urn:oasis:names:specification:docbook:dtd:xml:4.1.2
Ada
with Ada.Text_IO;
with AWS.URL;
with AWS.Parameters;
with AWS.Containers.Tables;
procedure URL_Parser is
procedure Parse (URL : in String) is
use AWS.URL, Ada.Text_IO;
use AWS.Containers.Tables;
procedure Put_Cond (Item : in String;
Value : in String;
When_Not : in String := "") is
begin
if Value /= When_Not then
Put (" "); Put (Item); Put_Line (Value);
end if;
end Put_Cond;
Obj : Object;
List : Table_Type;
begin
Put_Line ("Parsing " & URL);
Obj := Parse (URL);
List := Table_Type (AWS.Parameters.List'(AWS.URL.Parameters (Obj)));
Put_Cond ("Scheme: ", Protocol_Name (Obj));
Put_Cond ("Domain: ", Host (Obj));
Put_Cond ("Port: ", Port (Obj), When_Not => "0");
Put_Cond ("Path: ", Path (Obj));
Put_Cond ("File: ", File (Obj));
Put_Cond ("Query: ", Query (Obj));
Put_Cond ("Fragment: ", Fragment (Obj));
Put_Cond ("User: ", User (Obj));
Put_Cond ("Password: ", Password (Obj));
if List.Count /= 0 then
Put_Line (" Parameters:");
end if;
for Index in 1 .. List.Count loop
Put (" "); Put (Get_Name (List, N => Index));
Put (" "); Put ("'" & Get_Value (List, N => Index) & "'");
New_Line;
end loop;
New_Line;
end Parse;
begin
Parse ("foo://example.com:8042/over/there?name=ferret#nose");
Parse ("urn:example:animal:ferret:nose");
Parse ("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true");
Parse ("ftp://ftp.is.co.za/rfc/rfc1808.txt");
Parse ("http://www.ietf.org/rfc/rfc2396.txt#header1");
Parse ("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two");
Parse ("mailto:John.Doe@example.com");
Parse ("news:comp.infosystems.www.servers.unix");
Parse ("tel:+1-816-555-1212");
Parse ("telnet://192.0.2.16:80/");
Parse ("urn:oasis:names:specification:docbook:dtd:xml:4.1.2");
Parse ("ssh://alice@example.com");
Parse ("https://bob:pass@example.com/place");
Parse ("http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64");
end URL_Parser;
- Output:
Parsing foo://example.com:8042/over/there?name=ferret#nose Scheme: foo Domain: example.com Port: 8042 Path: /over/ File: there Query: name=ferret Fragment: #nose Parameters: name 'ferret' Parsing urn:example:animal:ferret:nose Scheme: urn File: example:animal:ferret:nose Parsing jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Scheme: jdbc Path: mysql://test_user:ouupppssss@localhost:3306/ File: sakila Query: profileSQL=true Parameters: profileSQL 'true' Parsing ftp://ftp.is.co.za/rfc/rfc1808.txt Scheme: ftp Domain: ftp.is.co.za Port: 21 Path: /rfc/ File: rfc1808.txt Parsing http://www.ietf.org/rfc/rfc2396.txt#header1 Scheme: http Domain: www.ietf.org Port: 80 Path: /rfc/ File: rfc2396.txt Fragment: #header1 Parsing ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Scheme: ldap Domain: 2001:db8::7 Path: / File: c=GB Query: objectClass=one&objectClass=two Parameters: objectClass 'one' objectClass 'two' Parsing mailto:John.Doe@example.com Scheme: mailto File: John.Doe@example.com Parsing news:comp.infosystems.www.servers.unix Scheme: news File: comp.infosystems.www.servers.unix Parsing tel:+1-816-555-1212 Scheme: tel File: 1-816-555-1212 Parsing telnet://192.0.2.16:80/ Scheme: telnet Domain: 192.0.2.16 Port: 80 Path: / Parsing urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Scheme: urn File: oasis:names:specification:docbook:dtd:xml:4.1.2 Parsing ssh://alice@example.com Scheme: ssh Domain: alice@example.com Path: / Parsing https://bob:pass@example.com/place Scheme: https Domain: example.com Port: 443 Path: / File: place User: bob Password: pass Parsing http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 Scheme: http Domain: example.com Port: 80 Path: / Query: a=1&b=2%202&c=3&c=4&d=encoded Parameters: a '1' b '2 2' c '3' c '4' d 'encoded'
ALGOL 68
Uses the URI parser here: URL_parser/URI_parser_ALGOL68.
PR read "uriParser.a68" PR
PROC test uri parser = ( STRING uri )VOID:
BEGIN
URI result := parse uri( uri );
print( ( uri, ":", newline ) );
IF NOT ok OF result
THEN
# the parse failed #
print( ( " ", error OF result, newline ) )
ELSE
# parsed OK #
print( ( " scheme: ", scheme OF result, newline ) );
IF userinfo OF result /= "" THEN print( ( " userinfo: ", userinfo OF result, newline ) ) FI;
IF host OF result /= "" THEN print( ( " host: ", host OF result, newline ) ) FI;
IF port OF result /= "" THEN print( ( " port: ", port OF result, newline ) ) FI;
IF path OF result /= "" THEN print( ( " path: ", path OF result, newline ) ) FI;
IF query OF result /= "" THEN print( ( " query: ", query OF result, newline ) ) FI;
IF fragment id OF result /= "" THEN print( ( " fragment id: ", fragment id OF result, newline ) ) FI
FI;
print( ( newline ) )
END # test uri parser # ;
BEGIN test uri parser( "foo://example.com:8042/over/there?name=ferret#nose" )
; test uri parser( "urn:example:animal:ferret:nose" )
; test uri parser( "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true" )
; test uri parser( "ftp://ftp.is.co.za/rfc/rfc1808.txt" )
; test uri parser( "http://www.ietf.org/rfc/rfc2396.txt#header1" )
; test uri parser( "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two" )
; test uri parser( "mailto:John.Doe@example.com" )
; test uri parser( "news:comp.infosystems.www.servers.unix" )
; test uri parser( "tel:+1-816-555-1212" )
; test uri parser( "telnet://192.0.2.16:80/" )
; test uri parser( "urn:oasis:names:specification:docbook:dtd:xml:4.1.2" )
; test uri parser( "ssh://alice@example.com" )
; test uri parser( "https://bob:pass@example.com/place" )
; test uri parser( "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" )
END
- Output:
foo://example.com:8042/over/there?name=ferret#nose: scheme: foo host: example.com port: 8042 path: /over/there query: name=ferret fragment id: nose urn:example:animal:ferret:nose: scheme: urn path: example:animal:ferret:nose jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true: scheme: jdbc path: mysql://test_user:ouupppssss@localhost:3306/sakila query: profileSQL=true ftp://ftp.is.co.za/rfc/rfc1808.txt: scheme: ftp host: ftp.is.co.za path: /rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1: scheme: http host: www.ietf.org path: /rfc/rfc2396.txt fragment id: header1 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two: scheme: ldap host: 2001:db8::7 path: /c=GB query: objectClass=one&objectClass=two mailto:John.Doe@example.com: scheme: mailto path: John.Doe@example.com news:comp.infosystems.www.servers.unix: scheme: news path: comp.infosystems.www.servers.unix tel:+1-816-555-1212: scheme: tel path: +1-816-555-1212 telnet://192.0.2.16:80/: scheme: telnet host: 192.0.2.16 port: 80 path: / urn:oasis:names:specification:docbook:dtd:xml:4.1.2: scheme: urn path: oasis:names:specification:docbook:dtd:xml:4.1.2 ssh://alice@example.com: scheme: ssh userinfo: alice host: example.com https://bob:pass@example.com/place: scheme: https userinfo: bob:pass host: example.com path: /place http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64: scheme: http host: example.com path: / query: a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
AppleScript
Thanks to AppleScript's ability to interface with some of macOS's ObjectiveC API, the various components can simply be read off from an NSURLComponents object set from the URL text.
use AppleScript version "2.4" -- OS X 10.10 (Yosemite) or later
use framework "Foundation"
on parseURLString(URLString)
set output to {URLString}
set indent to tab & "• "
set componentsObject to current application's class "NSURLComponents"'s componentsWithString:(URLString)
repeat with thisKey in {"scheme", "user", "password", "host", "port", "path", "query", "fragment"}
set thisValue to (componentsObject's valueForKey:(thisKey))
if (thisValue is not missing value) then set end of output to indent & thisKey & (" = " & thisValue)
end repeat
return join(output, linefeed)
end parseURLString
on join(listOfText, delimiter)
set astid to AppleScript's text item delimiters
set AppleScript's text item delimiters to delimiter
set output to listOfText as text
set AppleScript's text item delimiters to astid
return output
end join
-- Test code:
local output, URLString
set output to {}
repeat with URLString in {"foo://example.com:8042/over/there?name=ferret#nose", ¬
"urn:example:animal:ferret:nose", ¬
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true", ¬
"ftp://ftp.is.co.za/rfc/rfc1808.txt", ¬
"http://www.ietf.org/rfc/rfc2396.txt#header1", ¬
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two", ¬
"mailto:John.Doe@example.com", ¬
"news:comp.infosystems.www.servers.unix", ¬
"tel:+1-816-555-1212", ¬
"telnet://192.0.2.16:80/", ¬
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2", ¬
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"}
set end of output to parseURLString(URLString's contents)
end repeat
return join(output, linefeed & linefeed)
- Output:
"foo://example.com:8042/over/there?name=ferret#nose
• scheme = foo
• host = example.com
• port = 8042
• path = /over/there
• query = name=ferret
• fragment = nose
urn:example:animal:ferret:nose
• scheme = urn
• path = example:animal:ferret:nose
jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true
• scheme = jdbc
• path = mysql://test_user:ouupppssss@localhost:3306/sakila
• query = profileSQL=true
ftp://ftp.is.co.za/rfc/rfc1808.txt
• scheme = ftp
• host = ftp.is.co.za
• path = /rfc/rfc1808.txt
http://www.ietf.org/rfc/rfc2396.txt#header1
• scheme = http
• host = www.ietf.org
• path = /rfc/rfc2396.txt
• fragment = header1
ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two
• scheme = ldap
• host = [2001:db8::7]
• path = /c=GB
• query = objectClass=one&objectClass=two
mailto:John.Doe@example.com
• scheme = mailto
• path = John.Doe@example.com
news:comp.infosystems.www.servers.unix
• scheme = news
• path = comp.infosystems.www.servers.unix
tel:+1-816-555-1212
• scheme = tel
• path = +1-816-555-1212
telnet://192.0.2.16:80/
• scheme = telnet
• host = 192.0.2.16
• port = 80
• path = /
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
• scheme = urn
• path = oasis:names:specification:docbook:dtd:xml:4.1.2
http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
• scheme = http
• host = example.com
• path = /
• query = a=1&b=2+2&c=3&c=4&d=encoded"
C#
using System;
namespace RosettaUrlParse
{
class Program
{
static void ParseUrl(string url)
{
var u = new Uri(url);
Console.WriteLine("URL: {0}", u.AbsoluteUri);
Console.WriteLine("Scheme: {0}", u.Scheme);
Console.WriteLine("Host: {0}", u.DnsSafeHost);
Console.WriteLine("Port: {0}", u.Port);
Console.WriteLine("Path: {0}", u.LocalPath);
Console.WriteLine("Query: {0}", u.Query);
Console.WriteLine("Fragment: {0}", u.Fragment);
Console.WriteLine();
}
static void Main(string[] args)
{
ParseUrl("foo://example.com:8042/over/there?name=ferret#nose");
ParseUrl("urn:example:animal:ferret:nose");
ParseUrl("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true");
ParseUrl("ftp://ftp.is.co.za/rfc/rfc1808.txt");
ParseUrl("http://www.ietf.org/rfc/rfc2396.txt#header1");
ParseUrl("ldap://[2001:db8::7]/c=GB?objectClass?one");
ParseUrl("mailto:John.Doe@example.com");
ParseUrl("news:comp.infosystems.www.servers.unix");
ParseUrl("tel:+1-816-555-1212");
ParseUrl("telnet://192.0.2.16:80/");
ParseUrl("urn:oasis:names:specification:docbook:dtd:xml:4.1.2");
}
}
}
Crystal
This example demonstrates use of the Crystal standard library's URI
class.
require "uri"
examples = ["foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"https://bob:password@[::1]/place?a=1&b=2%202"]
examples.each do |example|
puts "Parsing \"#{example}\":"
url = URI.parse example
{% for name in ["scheme", "host", "hostname", "port", "path", "userinfo",
"user", "password", "fragment", "query"] %}
unless url.{{name.id}}.nil?
puts " {{name.id}}: \"#{url.{{name.id}}}\""
end
{% end %}
unless url.query_params.empty?
puts " query_params:"
url.query_params.each do |k, v|
puts " #{k}: \"#{v}\""
end
end
puts
end
- Output:
Parsing "foo://example.com:8042/over/there?name=ferret#nose": scheme: "foo" host: "example.com" hostname: "example.com" port: "8042" path: "/over/there" fragment: "nose" query: "name=ferret" query_params: name: "ferret" Parsing "urn:example:animal:ferret:nose": scheme: "urn" path: "example:animal:ferret:nose" Parsing "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true": scheme: "jdbc" path: "mysql://test_user:ouupppssss@localhost:3306/sakila" query: "profileSQL=true" query_params: profileSQL: "true" Parsing "ftp://ftp.is.co.za/rfc/rfc1808.txt": scheme: "ftp" host: "ftp.is.co.za" hostname: "ftp.is.co.za" path: "/rfc/rfc1808.txt" Parsing "http://www.ietf.org/rfc/rfc2396.txt#header1": scheme: "http" host: "www.ietf.org" hostname: "www.ietf.org" path: "/rfc/rfc2396.txt" fragment: "header1" Parsing "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two": scheme: "ldap" host: "[2001:db8::7]" hostname: "2001:db8::7" path: "/c=GB" query: "objectClass=one&objectClass=two" query_params: objectClass: "one" objectClass: "two" Parsing "mailto:John.Doe@example.com": scheme: "mailto" path: "John.Doe@example.com" Parsing "news:comp.infosystems.www.servers.unix": scheme: "news" path: "comp.infosystems.www.servers.unix" Parsing "tel:+1-816-555-1212": scheme: "tel" path: "+1-816-555-1212" Parsing "telnet://192.0.2.16:80/": scheme: "telnet" host: "192.0.2.16" hostname: "192.0.2.16" port: "80" path: "/" Parsing "urn:oasis:names:specification:docbook:dtd:xml:4.1.2": scheme: "urn" path: "oasis:names:specification:docbook:dtd:xml:4.1.2" Parsing "https://bob:password@[::1]/place?a=1&b=2%202": scheme: "https" host: "[::1]" hostname: "::1" path: "/place" userinfo: "bob:password" user: "bob" password: "password" query: "a=1&b=2%202" query_params: a: "1" b: "2 2"
Elixir
test_cases = [
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"
]
Enum.each(test_cases, fn str ->
IO.puts "\n#{str}"
IO.inspect URI.parse(str)
end)
- Output:
foo://example.com:8042/over/there?name=ferret#nose %URI{authority: "example.com:8042", fragment: "nose", host: "example.com", path: "/over/there", port: 8042, query: "name=ferret", scheme: "foo", userinfo: nil} urn:example:animal:ferret:nose %URI{authority: nil, fragment: nil, host: nil, path: "example:animal:ferret:nose", port: nil, query: nil, scheme: "urn", userinfo: nil} jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true %URI{authority: nil, fragment: nil, host: nil, path: "mysql://test_user:ouupppssss@localhost:3306/sakila", port: nil, query: "profileSQL=true", scheme: "jdbc", userinfo: nil} ftp://ftp.is.co.za/rfc/rfc1808.txt %URI{authority: "ftp.is.co.za", fragment: nil, host: "ftp.is.co.za", path: "/rfc/rfc1808.txt", port: 21, query: nil, scheme: "ftp", userinfo: nil} http://www.ietf.org/rfc/rfc2396.txt#header1 %URI{authority: "www.ietf.org", fragment: "header1", host: "www.ietf.org", path: "/rfc/rfc2396.txt", port: 80, query: nil, scheme: "http", userinfo: nil} ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two %URI{authority: "2001:db8::7", fragment: nil, host: "2001:db8::7", path: "/c=GB", port: 389, query: "objectClass=one&objectClass=two", scheme: "ldap", userinfo: nil} mailto:John.Doe@example.com %URI{authority: nil, fragment: nil, host: nil, path: "John.Doe@example.com", port: nil, query: nil, scheme: "mailto", userinfo: nil} news:comp.infosystems.www.servers.unix %URI{authority: nil, fragment: nil, host: nil, path: "comp.infosystems.www.servers.unix", port: nil, query: nil, scheme: "news", userinfo: nil} tel:+1-816-555-1212 %URI{authority: nil, fragment: nil, host: nil, path: "+1-816-555-1212", port: nil, query: nil, scheme: "tel", userinfo: nil} telnet://192.0.2.16:80/ %URI{authority: "192.0.2.16:80", fragment: nil, host: "192.0.2.16", path: "/", port: 80, query: nil, scheme: "telnet", userinfo: nil} urn:oasis:names:specification:docbook:dtd:xml:4.1.2 %URI{authority: nil, fragment: nil, host: nil, path: "oasis:names:specification:docbook:dtd:xml:4.1.2", port: nil, query: nil, scheme: "urn", userinfo: nil} ssh://alice@example.com %URI{authority: "alice@example.com", fragment: nil, host: "example.com", path: nil, port: nil, query: nil, scheme: "ssh", userinfo: "alice"} https://bob:pass@example.com/place %URI{authority: "bob:pass@example.com", fragment: nil, host: "example.com", path: "/place", port: 443, query: nil, scheme: "https", userinfo: "bob:pass"} http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 %URI{authority: "example.com", fragment: nil, host: "example.com", path: "/", port: 80, query: "a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64", scheme: "http", userinfo: nil}
F#
DotNet has the builtin class System.URI which parses URI strings.
Some points of interest:
- The Query and Fragment properties do also show the separators '?' and '#' respectively, if those parts are given in the
URI to parse. This allows to distinguish between a missing query/fragment and a given empty query/fragment.
To align with the output shown for other languages the separators are removed here. - the Port property is typed as an int. Therefore "not given" is generally shown as -1 (but see the following point.)
- The System.URI class does some "Scheme-Based Normalization" (c/f rfc3986 section 6.2.3), i. e. it knows about certain defaults for some schemes. With the test data this shows with the port numbers for http, ftp, ldap, mailto.
open System
open System.Text.RegularExpressions
let writeline n v = if String.IsNullOrEmpty(v) then () else printfn "%-15s %s" n v
let toUri = fun s -> Uri(s.ToString())
let urisFromString = (Regex(@"\S+").Matches) >> Seq.cast >> (Seq.map toUri)
urisFromString """
foo://example.com:8042/over/there?name=ferret#nose
urn:example:animal:ferret:nose
jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true
ftp://ftp.is.co.za/rfc/rfc1808.txt
http://www.ietf.org/rfc/rfc2396.txt#header1
ldap://[2001:db8::7]/c=GB?objectClass?one
mailto:John.Doe@example.com
news:comp.infosystems.www.servers.unix
tel:+1-816-555-1212
telnet://192.0.2.16:80/
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
"""
|> Seq.iter (fun u ->
writeline "\nURI:" (u.ToString())
writeline " scheme:" (u.Scheme)
writeline " host:" (u.Host)
writeline " port:" (if u.Port < 0 then "" else u.Port.ToString())
writeline " path:" (u.AbsolutePath)
writeline " query:" (if u.Query.Length > 0 then u.Query.Substring(1) else "")
writeline " fragment:" (if u.Fragment.Length > 0 then u.Fragment.Substring(1) else "")
)
- Output:
URI: foo://example.com:8042/over/there?name=ferret#nose scheme: foo host: example.com port: 8042 path: /over/there query: name=ferret fragment: nose URI: urn:example:animal:ferret:nose scheme: urn path: example:animal:ferret:nose URI: jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme: jdbc path: mysql://test_user:ouupppssss@localhost:3306/sakila query: profileSQL=true URI: ftp://ftp.is.co.za/rfc/rfc1808.txt scheme: ftp host: ftp.is.co.za port: 21 path: /rfc/rfc1808.txt URI: http://www.ietf.org/rfc/rfc2396.txt#header1 scheme: http host: www.ietf.org port: 80 path: /rfc/rfc2396.txt fragment: header1 URI: ldap://[2001:db8::7]/c=GB?objectClass?one scheme: ldap host: [2001:db8::7] port: 389 path: /c=GB query: objectClass?one URI: mailto:John.Doe@example.com scheme: mailto host: example.com port: 25 URI: news:comp.infosystems.www.servers.unix scheme: news path: comp.infosystems.www.servers.unix URI: tel:+1-816-555-1212 scheme: tel path: +1-816-555-1212 URI: telnet://192.0.2.16:80/ scheme: telnet host: 192.0.2.16 port: 80 path: / URI: urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme: urn path: oasis:names:specification:docbook:dtd:xml:4.1.2
Go
This uses Go's standard net/url package. The source code for this package (excluding tests) is in a single file of ~720 lines.
package main
import (
"fmt"
"log"
"net"
"net/url"
)
func main() {
for _, in := range []string{
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64",
} {
fmt.Println(in)
u, err := url.Parse(in)
if err != nil {
log.Println(err)
continue
}
if in != u.String() {
fmt.Printf("Note: reassmebles as %q\n", u)
}
printURL(u)
}
}
func printURL(u *url.URL) {
fmt.Println(" Scheme:", u.Scheme)
if u.Opaque != "" {
fmt.Println(" Opaque:", u.Opaque)
}
if u.User != nil {
fmt.Println(" Username:", u.User.Username())
if pwd, ok := u.User.Password(); ok {
fmt.Println(" Password:", pwd)
}
}
if u.Host != "" {
if host, port, err := net.SplitHostPort(u.Host); err == nil {
fmt.Println(" Host:", host)
fmt.Println(" Port:", port)
} else {
fmt.Println(" Host:", u.Host)
}
}
if u.Path != "" {
fmt.Println(" Path:", u.Path)
}
if u.RawQuery != "" {
fmt.Println(" RawQuery:", u.RawQuery)
m, err := url.ParseQuery(u.RawQuery)
if err == nil {
for k, v := range m {
fmt.Printf(" Key: %q Values: %q\n", k, v)
}
}
}
if u.Fragment != "" {
fmt.Println(" Fragment:", u.Fragment)
}
}
- Output:
foo://example.com:8042/over/there?name=ferret#nose Scheme: foo Host: example.com Port: 8042 Path: /over/there RawQuery: name=ferret Key: "name" Values: ["ferret"] Fragment: nose urn:example:animal:ferret:nose Scheme: urn Opaque: example:animal:ferret:nose jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Scheme: jdbc Opaque: mysql://test_user:ouupppssss@localhost:3306/sakila RawQuery: profileSQL=true Key: "profileSQL" Values: ["true"] ftp://ftp.is.co.za/rfc/rfc1808.txt Scheme: ftp Host: ftp.is.co.za Path: /rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1 Scheme: http Host: www.ietf.org Path: /rfc/rfc2396.txt Fragment: header1 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Scheme: ldap Host: [2001:db8::7] Path: /c=GB RawQuery: objectClass=one&objectClass=two Key: "objectClass" Values: ["one" "two"] mailto:John.Doe@example.com Scheme: mailto Opaque: John.Doe@example.com news:comp.infosystems.www.servers.unix Scheme: news Opaque: comp.infosystems.www.servers.unix tel:+1-816-555-1212 Scheme: tel Opaque: +1-816-555-1212 telnet://192.0.2.16:80/ Scheme: telnet Host: 192.0.2.16 Port: 80 Path: / urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Scheme: urn Opaque: oasis:names:specification:docbook:dtd:xml:4.1.2 ssh://alice@example.com Scheme: ssh Username: alice Host: example.com https://bob:pass@example.com/place Scheme: https Username: bob Password: pass Host: example.com Path: /place http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 Scheme: http Host: example.com Path: / RawQuery: a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 Key: "a" Values: ["1"] Key: "b" Values: ["2 2"] Key: "c" Values: ["3" "4"] Key: "d" Values: ["encoded"]
Groovy
Test:
import java.net.URI
[
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2"
].each { String url ->
// magic happens here
URI u = url.toURI()
// Results displayed here
println """
|Parsing $url
| scheme = ${u.scheme}
| domain = ${u.host}
| port = ${(u.port + 1) ? u.port : 'default' }
| path = ${u.path ?: u.schemeSpecificPart}
| query = ${u.query}
| fragment = ${u.fragment}""".stripMargin()
}
Output:
Parsing foo://example.com:8042/over/there?name=ferret#nose scheme = foo domain = example.com port = 8042 path = /over/there query = name=ferret fragment = nose Parsing urn:example:animal:ferret:nose scheme = urn domain = null port = default path = example:animal:ferret:nose query = null fragment = null Parsing jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme = jdbc domain = null port = default path = mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true query = null fragment = null Parsing ftp://ftp.is.co.za/rfc/rfc1808.txt scheme = ftp domain = ftp.is.co.za port = default path = /rfc/rfc1808.txt query = null fragment = null Parsing http://www.ietf.org/rfc/rfc2396.txt#header1 scheme = http domain = www.ietf.org port = default path = /rfc/rfc2396.txt query = null fragment = header1 Parsing ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme = ldap domain = [2001:db8::7] port = default path = /c=GB query = objectClass=one&objectClass=two fragment = null Parsing mailto:John.Doe@example.com scheme = mailto domain = null port = default path = John.Doe@example.com query = null fragment = null Parsing news:comp.infosystems.www.servers.unix scheme = news domain = null port = default path = comp.infosystems.www.servers.unix query = null fragment = null Parsing tel:+1-816-555-1212 scheme = tel domain = null port = default path = +1-816-555-1212 query = null fragment = null Parsing telnet://192.0.2.16:80/ scheme = telnet domain = 192.0.2.16 port = 80 path = / query = null fragment = null Parsing urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme = urn domain = null port = default path = oasis:names:specification:docbook:dtd:xml:4.1.2 query = null fragment = null
Haskell
Example uses network-uri package:
module Main (main) where
import Data.Foldable (for_)
import Network.URI
( URI
, URIAuth
, parseURI
, uriAuthority
, uriFragment
, uriPath
, uriPort
, uriQuery
, uriRegName
, uriScheme
, uriUserInfo
)
uriStrings :: [String]
uriStrings =
[ "https://bob:pass@example.com/place"
, "foo://example.com:8042/over/there?name=ferret#nose"
, "urn:example:animal:ferret:nose"
, "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true"
, "ftp://ftp.is.co.za/rfc/rfc1808.txt"
, "http://www.ietf.org/rfc/rfc2396.txt#header1"
, "ldap://[2001:db8::7]/c=GB?objectClass?one"
, "mailto:John.Doe@example.com"
, "news:comp.infosystems.www.servers.unix"
, "tel:+1-816-555-1212"
, "telnet://192.0.2.16:80/"
, "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"
]
trimmedUriScheme :: URI -> String
trimmedUriScheme = init . uriScheme
trimmedUriUserInfo :: URIAuth -> Maybe String
trimmedUriUserInfo uriAuth =
case uriUserInfo uriAuth of
[] -> Nothing
userInfo -> if last userInfo == '@' then Just (init userInfo) else Nothing
trimmedUriPath :: URI -> String
trimmedUriPath uri = case uriPath uri of '/' : t -> t; p -> p
trimmedUriQuery :: URI -> Maybe String
trimmedUriQuery uri = case uriQuery uri of '?' : t -> Just t; _ -> Nothing
trimmedUriFragment :: URI -> Maybe String
trimmedUriFragment uri = case uriFragment uri of '#' : t -> Just t; _ -> Nothing
main :: IO ()
main = do
for_ uriStrings $ \uriString -> do
case parseURI uriString of
Nothing -> putStrLn $ "Could not parse" ++ uriString
Just uri -> do
putStrLn uriString
putStrLn $ " scheme = " ++ trimmedUriScheme uri
case uriAuthority uri of
Nothing -> return ()
Just uriAuth -> do
case trimmedUriUserInfo uriAuth of
Nothing -> return ()
Just userInfo -> putStrLn $ " user-info = " ++ userInfo
putStrLn $ " domain = " ++ uriRegName uriAuth
putStrLn $ " port = " ++ uriPort uriAuth
putStrLn $ " path = " ++ trimmedUriPath uri
case trimmedUriQuery uri of
Nothing -> return ()
Just query -> putStrLn $ " query = " ++ query
case trimmedUriFragment uri of
Nothing -> return ()
Just fragment -> putStrLn $ " fragment = " ++ fragment
putStrLn ""
- Output:
https://bob:pass@example.com/place scheme = https user-info = bob:pass domain = example.com port = path = place foo://example.com:8042/over/there?name=ferret#nose scheme = foo domain = example.com port = :8042 path = over/there query = name=ferret fragment = nose urn:example:animal:ferret:nose scheme = urn path = example:animal:ferret:nose jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme = jdbc path = mysql://test_user:ouupppssss@localhost:3306/sakila query = profileSQL=true ftp://ftp.is.co.za/rfc/rfc1808.txt scheme = ftp domain = ftp.is.co.za port = path = rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1 scheme = http domain = www.ietf.org port = path = rfc/rfc2396.txt fragment = header1 ldap://[2001:db8::7]/c=GB?objectClass?one scheme = ldap domain = [2001:db8::7] port = path = c=GB query = objectClass?one mailto:John.Doe@example.com scheme = mailto path = John.Doe@example.com news:comp.infosystems.www.servers.unix scheme = news path = comp.infosystems.www.servers.unix tel:+1-816-555-1212 scheme = tel path = +1-816-555-1212 telnet://192.0.2.16:80/ scheme = telnet domain = 192.0.2.16 port = :80 path = urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme = urn path = oasis:names:specification:docbook:dtd:xml:4.1.2
J
As most errors are contextual (e.g. invalid authority, invalid path, unrecognized scheme), we shall defer error testing to the relevant consumers. This might offend some on the grounds of temporary safety, but consumers already bear responsibility to parse and validate their relevant uri element(s).
Our parsing strategy is fixed format recursive descent. (Please do not criticize this on efficiency grounds without first investigating the implementations of other parsers.)
Implementation:
split=:1 :0
({. ; ] }.~ 1+[)~ i.&m
)
uriparts=:3 :0
'server fragment'=. '#' split y
'sa query'=. '?' split server
'scheme authpath'=. ':' split sa
scheme;authpath;query;fragment
)
queryparts=:3 :0
(0<#y)#<;._1 '?',y
)
authpathparts=:3 :0
if. '//' -: 2{.y do.
split=. <;.1 y
(}.1{::split);;2}.split
else.
'';y
end.
)
authparts=:3 :0
if. '@' e. y do.
'userinfo hostport'=. '@' split y
else.
hostport=. y [ userinfo=.''
end.
if. '[' = {.hostport do.
'host_t port_t'=. ']' split hostport
assert. (0=#port_t)+.':'={.port_t
(':' split userinfo),(host_t,']');}.port_t
else.
(':' split userinfo),':' split hostport
end.
)
taskparts=:3 :0
'scheme authpath querystring fragment'=. uriparts y
'auth path'=. authpathparts authpath
'user creds host port'=. authparts auth
query=. queryparts querystring
export=. ;:'scheme user creds host port path query fragment'
(#~ 0<#@>@{:"1) (,. do each) export
)
Task examples:
taskparts 'foo://example.com:8042/over/there?name=ferret#nose'
┌────────┬─────────────┐
│scheme │foo │
├────────┼─────────────┤
│host │example.com │
├────────┼─────────────┤
│port │8042 │
├────────┼─────────────┤
│path │/over/there │
├────────┼─────────────┤
│query │┌───────────┐│
│ ││name=ferret││
│ │└───────────┘│
├────────┼─────────────┤
│fragment│nose │
└────────┴─────────────┘
taskparts 'urn:example:animal:ferret:nose'
┌──────┬──────────────────────────┐
│scheme│urn │
├──────┼──────────────────────────┤
│path │example:animal:ferret:nose│
└──────┴──────────────────────────┘
taskparts 'jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true'
┌──────┬──────────────────────────────────────────────────┐
│scheme│jdbc │
├──────┼──────────────────────────────────────────────────┤
│path │mysql://test_user:ouupppssss@localhost:3306/sakila│
├──────┼──────────────────────────────────────────────────┤
│query │┌───────────────┐ │
│ ││profileSQL=true│ │
│ │└───────────────┘ │
└──────┴──────────────────────────────────────────────────┘
taskparts 'ftp://ftp.is.co.za/rfc/rfc1808.txt'
┌──────┬────────────────┐
│scheme│ftp │
├──────┼────────────────┤
│host │ftp.is.co.za │
├──────┼────────────────┤
│path │/rfc/rfc1808.txt│
└──────┴────────────────┘
taskparts 'http://www.ietf.org/rfc/rfc2396.txt#header1'
┌────────┬────────────────┐
│scheme │http │
├────────┼────────────────┤
│host │www.ietf.org │
├────────┼────────────────┤
│path │/rfc/rfc2396.txt│
├────────┼────────────────┤
│fragment│header1 │
└────────┴────────────────┘
taskparts 'ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two'
┌──────┬─────────────────────────────────┐
│scheme│ldap │
├──────┼─────────────────────────────────┤
│host │[2001:db8::7] │
├──────┼─────────────────────────────────┤
│path │/c=GB │
├──────┼─────────────────────────────────┤
│query │┌───────────────────────────────┐│
│ ││objectClass=one&objectClass=two││
│ │└───────────────────────────────┘│
└──────┴─────────────────────────────────┘
taskparts 'mailto:John.Doe@example.com'
┌──────┬────────────────────┐
│scheme│mailto │
├──────┼────────────────────┤
│path │John.Doe@example.com│
└──────┴────────────────────┘
taskparts 'news:comp.infosystems.www.servers.unix'
┌──────┬─────────────────────────────────┐
│scheme│news │
├──────┼─────────────────────────────────┤
│path │comp.infosystems.www.servers.unix│
└──────┴─────────────────────────────────┘
taskparts 'tel:+1-816-555-1212'
┌──────┬───────────────┐
│scheme│tel │
├──────┼───────────────┤
│path │+1-816-555-1212│
└──────┴───────────────┘
taskparts 'telnet://192.0.2.16:80/'
┌──────┬──────────┐
│scheme│telnet │
├──────┼──────────┤
│host │192.0.2.16│
├──────┼──────────┤
│port │80 │
├──────┼──────────┤
│path │/ │
└──────┴──────────┘
taskparts 'urn:oasis:names:specification:docbook:dtd:xml:4.1.2'
┌──────┬───────────────────────────────────────────────┐
│scheme│urn │
├──────┼───────────────────────────────────────────────┤
│path │oasis:names:specification:docbook:dtd:xml:4.1.2│
└──────┴───────────────────────────────────────────────┘
Note that the path
of the example jdbc
uri is itself a uri which may be parsed:
taskparts 'mysql://test_user:ouupppssss@localhost:3306/sakila'
┌──────┬──────────┐
│scheme│mysql │
├──────┼──────────┤
│user │test_user │
├──────┼──────────┤
│pass │ouupppssss│
├──────┼──────────┤
│host │localhost │
├──────┼──────────┤
│port │3306 │
├──────┼──────────┤
│path │/sakila │
└──────┴──────────┘
Also, examples borrowed from the go
implementation:
taskparts 'ssh://alice@example.com'
┌──────┬───────────┐
│scheme│ssh │
├──────┼───────────┤
│user │alice │
├──────┼───────────┤
│host │example.com│
└──────┴───────────┘
taskparts 'https://bob:pass@example.com/place'
┌──────┬───────────┐
│scheme│https │
├──────┼───────────┤
│user │bob │
├──────┼───────────┤
│creds │pass │
├──────┼───────────┤
│host │example.com│
├──────┼───────────┤
│path │/place │
└──────┴───────────┘
taskparts 'http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64'
┌──────┬───────────────────────────────────────────┐
│scheme│http │
├──────┼───────────────────────────────────────────┤
│host │example.com │
├──────┼───────────────────────────────────────────┤
│path │/ │
├──────┼───────────────────────────────────────────┤
│query │┌─────────────────────────────────────────┐│
│ ││a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64││
│ │└─────────────────────────────────────────┘│
└──────┴───────────────────────────────────────────┘
Note that escape decoding is left to the consumer (as well as decoding things like '+' as a replacement for the space character and determining the absolute significance of relative paths and the details of ip address parsing and so on...). This seems like a good match to the hierarchical nature of uri parsing. See URL decoding for an implementation of escape decoding.
Note that taskparts
was engineered specifically for the requirements of this task -- in idiomatic use you should instead expect to call the relevant ____parts routines directly as illustrated by the first four lines of taskparts
.
Note that w3c recommends a handling for query strings which differs from that of RFC-3986. For example, the use of ;
as replacement for the &
delimiter, or the use of the query element name as the query element value when the =
delimiter is omitted from the name/value pair. We do not implement that here, as it's not a part of this task. But that sort of implementation could be achieved by replacing the definition of queryparts
. And, of course, other treatments of query strings are also possible, should that become necessary...
Java
In Java, you can use the URI
class for this, so it's pretty straightforward. I just did a bit of tweaking to output.
import java.net.URI;
import java.net.URISyntaxException;
public class WebAddressParser{
public static void main(String[] args){
parseAddress("foo://example.com:8042/over/there?name=ferret#nose");
parseAddress("urn:example:animal:ferret:nose");
}
static void parseAddress(String a){
System.out.println("Parsing " + a);
try{
// this line does the work
URI u = new URI(a);
System.out.println("\tscheme = " + u.getScheme());
System.out.println("\tdomain = " + u.getHost());
System.out.println("\tport = " + (-1==u.getPort()?"default":u.getPort()));
System.out.println("\tpath = " + (null==u.getPath()?u.getSchemeSpecificPart():u.getPath()));
System.out.println("\tquery = " + u.getQuery());
System.out.println("\tfragment = " + u.getFragment());
}
catch (URISyntaxException x){
System.err.println("Oops: " + x);
}
}
}
I'm only showing two examples, but the others work too, honest.
- Output:
Parsing foo://example.com:8042/over/there?name=ferret#nose scheme = foo domain = example.com port = 8042 path = /over/there query = name=ferret fragment = nose Parsing urn:example:animal:ferret:nose scheme = urn domain = null port = default path = example:animal:ferret:nose query = null fragment = null
JavaScript
As JavaScript is (at the time of writing) still the native language of the DOM, the simplest first-pass approach will be to set the href property of a DOM element, and read off the various components of the DOM parse from that element.
Here is an example, tested against the JavaScript engines of current versions of Chrome and Safari, of taking this 'Gordian knot' approach to the task:
(function (lstURL) {
var e = document.createElement('a'),
lstKeys = [
'hash',
'host',
'hostname',
'origin',
'pathname',
'port',
'protocol',
'search'
],
fnURLParse = function (strURL) {
e.href = strURL;
return lstKeys.reduce(
function (dct, k) {
dct[k] = e[k];
return dct;
}, {}
);
};
return JSON.stringify(
lstURL.map(fnURLParse),
null, 2
);
})([
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"
]);
Results of applying this approach in the JavaScript of Safari 8
[
{
"hash": "#nose",
"host": "example.com:8042",
"hostname": "example.com",
"origin": "foo://example.com:8042",
"pathname": "/over/there",
"port": "8042",
"protocol": "foo:",
"search": "?name=ferret"
},
{
"hash": "",
"host": "",
"hostname": "",
"origin": "urn://",
"pathname": "example:animal:ferret:nose",
"port": "",
"protocol": "urn:",
"search": ""
},
{
"hash": "",
"host": "",
"hostname": "",
"origin": "jdbc://",
"pathname": "mysql://test_user:ouupppssss@localhost:3306/sakila",
"port": "",
"protocol": "jdbc:",
"search": "?profileSQL=true"
},
{
"hash": "",
"host": "ftp.is.co.za",
"hostname": "ftp.is.co.za",
"origin": "ftp://ftp.is.co.za",
"pathname": "/rfc/rfc1808.txt",
"port": "",
"protocol": "ftp:",
"search": ""
},
{
"hash": "#header1",
"host": "www.ietf.org",
"hostname": "www.ietf.org",
"origin": "http://www.ietf.org",
"pathname": "/rfc/rfc2396.txt",
"port": "",
"protocol": "http:",
"search": ""
},
{
"hash": "",
"host": "[2001:db8::7]",
"hostname": "[2001:db8::7]",
"origin": "ldap://[2001:db8::7]",
"pathname": "/c=GB",
"port": "",
"protocol": "ldap:",
"search": "?objectClass=one&objectClass=two"
},
{
"hash": "",
"host": "",
"hostname": "",
"origin": "mailto://",
"pathname": "John.Doe@example.com",
"port": "",
"protocol": "mailto:",
"search": ""
},
{
"hash": "",
"host": "",
"hostname": "",
"origin": "news://",
"pathname": "comp.infosystems.www.servers.unix",
"port": "",
"protocol": "news:",
"search": ""
},
{
"hash": "",
"host": "",
"hostname": "",
"origin": "tel://",
"pathname": "+1-816-555-1212",
"port": "",
"protocol": "tel:",
"search": ""
},
{
"hash": "",
"host": "192.0.2.16:80",
"hostname": "192.0.2.16",
"origin": "telnet://192.0.2.16:80",
"pathname": "/",
"port": "80",
"protocol": "telnet:",
"search": ""
},
{
"hash": "",
"host": "",
"hostname": "",
"origin": "urn://",
"pathname": "oasis:names:specification:docbook:dtd:xml:4.1.2",
"port": "",
"protocol": "urn:",
"search": ""
},
{
"hash": "",
"host": "example.com",
"hostname": "example.com",
"origin": "ssh://example.com",
"pathname": "",
"port": "",
"protocol": "ssh:",
"search": ""
},
{
"hash": "",
"host": "example.com",
"hostname": "example.com",
"origin": "https://example.com",
"pathname": "/place",
"port": "",
"protocol": "https:",
"search": ""
},
{
"hash": "",
"host": "example.com",
"hostname": "example.com",
"origin": "http://example.com",
"pathname": "/",
"port": "",
"protocol": "http:",
"search": "?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"
}
]
Julia
This solution uses Julia's URIParser package. The detailview
function shows all of the non-empty components of the URI
object created by this parser. No attempt is made to further parse more complex components, e.g. query or userinfo. Error detection is limited to indicating whether a string is parsable as a URI and providing a hint as to whether the URI
is valid (according to this package's isvalid
function).
using Printf, URIParser
const FIELDS = names(URI)
function detailview(uri::URI, indentlen::Int=4)
indent = " "^indentlen
s = String[]
for f in FIELDS
d = string(getfield(uri, f))
!isempty(d) || continue
f != :port || d != "0" || continue
push!(s, @sprintf("%s%s: %s", indent, string(f), d))
end
join(s, "\n")
end
test = ["foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"This is not a URI!",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"]
isfirst = true
for st in test
if isfirst
isfirst = false
else
println()
end
println("Attempting to parse\n \"", st, "\" as a URI:")
uri = try
URI(st)
catch
println("URIParser failed to parse this URI, is it OK?")
continue
end
print("This URI is parsable ")
if isvalid(uri)
println("and appears to be valid.")
else
println("but may be invalid.")
end
println(detailview(uri))
end
- Output:
Attempting to parse "foo://example.com:8042/over/there?name=ferret#nose" as a URI: This URI is parsable but may be invalid. schema: foo host: example.com port: 8042 path: /over/there query: name=ferret fragment: nose specifies_authority: true Attempting to parse "urn:example:animal:ferret:nose" as a URI: This URI is parsable and appears to be valid. schema: urn path: example:animal:ferret:nose specifies_authority: false Attempting to parse "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true" as a URI: This URI is parsable but may be invalid. schema: jdbc path: mysql://test_user:ouupppssss@localhost:3306/sakila query: profileSQL=true specifies_authority: false Attempting to parse "ftp://ftp.is.co.za/rfc/rfc1808.txt" as a URI: This URI is parsable and appears to be valid. schema: ftp host: ftp.is.co.za path: /rfc/rfc1808.txt specifies_authority: true Attempting to parse "http://www.ietf.org/rfc/rfc2396.txt#header1" as a URI: This URI is parsable and appears to be valid. schema: http host: www.ietf.org path: /rfc/rfc2396.txt fragment: header1 specifies_authority: true Attempting to parse "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two" as a URI: This URI is parsable and appears to be valid. schema: ldap host: 2001:db8::7 path: /c=GB query: objectClass=one&objectClass=two specifies_authority: true Attempting to parse "mailto:John.Doe@example.com" as a URI: This URI is parsable and appears to be valid. schema: mailto path: John.Doe@example.com specifies_authority: false Attempting to parse "news:comp.infosystems.www.servers.unix" as a URI: This URI is parsable and appears to be valid. schema: news path: comp.infosystems.www.servers.unix specifies_authority: false Attempting to parse "tel:+1-816-555-1212" as a URI: This URI is parsable and appears to be valid. schema: tel path: +1-816-555-1212 specifies_authority: false Attempting to parse "telnet://192.0.2.16:80/" as a URI: This URI is parsable and appears to be valid. schema: telnet host: 192.0.2.16 port: 80 path: / specifies_authority: true Attempting to parse "urn:oasis:names:specification:docbook:dtd:xml:4.1.2" as a URI: This URI is parsable and appears to be valid. schema: urn path: oasis:names:specification:docbook:dtd:xml:4.1.2 specifies_authority: false Attempting to parse "This is not a URI!" as a URI: URIParser failed to parse this URI, is it OK? Attempting to parse "ssh://alice@example.com" as a URI: This URI is parsable but may be invalid. schema: ssh host: example.com userinfo: alice specifies_authority: true Attempting to parse "https://bob:pass@example.com/place" as a URI: This URI is parsable and appears to be valid. schema: https host: example.com path: /place userinfo: bob:pass specifies_authority: true Attempting to parse "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" as a URI: This URI is parsable and appears to be valid. schema: http host: example.com path: / query: a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 specifies_authority: true
Kotlin
Although the java.net.URL class can parse urls just fine, unfortunately (as far as this task is concerned) the constructor throws an exception if it does not recognize the scheme (or 'protocol' as it calls it). To deal with unrecognized protocols such as 'foo', we therefore need to replace them with a valid protocol such as 'http' to trick the URL class into parsing them properly:
// version 1.1.2
import java.net.URL
import java.net.MalformedURLException
fun parseUrl(url: String) {
var u: URL
var scheme: String
try {
u = URL(url)
scheme = u.protocol
}
catch (ex: MalformedURLException) {
val index = url.indexOf(':')
scheme = url.take(index)
u = URL("http" + url.drop(index))
}
println("Parsing $url")
println(" scheme = $scheme")
with(u) {
if (userInfo != null) println(" userinfo = $userInfo")
if (!host.isEmpty()) println(" domain = $host")
if (port != -1) println(" port = $port")
if (!path.isEmpty()) println(" path = $path")
if (query != null) println(" query = $query")
if (ref != null) println(" fragment = $ref")
}
println()
}
fun main(args: Array<String>){
val urls = arrayOf(
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"
)
for (url in urls) parseUrl(url)
}
- Output:
Parsing foo://example.com:8042/over/there?name=ferret#nose scheme = foo domain = example.com port = 8042 path = /over/there query = name=ferret fragment = nose Parsing urn:example:animal:ferret:nose scheme = urn path = example:animal:ferret:nose Parsing jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme = jdbc path = mysql://test_user:ouupppssss@localhost:3306/sakila query = profileSQL=true Parsing ftp://ftp.is.co.za/rfc/rfc1808.txt scheme = ftp domain = ftp.is.co.za path = /rfc/rfc1808.txt Parsing http://www.ietf.org/rfc/rfc2396.txt#header1 scheme = http domain = www.ietf.org path = /rfc/rfc2396.txt fragment = header1 Parsing ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme = ldap domain = [2001:db8::7] path = /c=GB query = objectClass=one&objectClass=two Parsing mailto:John.Doe@example.com scheme = mailto path = John.Doe@example.com Parsing news:comp.infosystems.www.servers.unix scheme = news path = comp.infosystems.www.servers.unix Parsing tel:+1-816-555-1212 scheme = tel path = +1-816-555-1212 Parsing telnet://192.0.2.16:80/ scheme = telnet domain = 192.0.2.16 port = 80 path = / Parsing urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme = urn path = oasis:names:specification:docbook:dtd:xml:4.1.2 Parsing ssh://alice@example.com scheme = ssh userinfo = alice domain = example.com Parsing https://bob:pass@example.com/place scheme = https userinfo = bob:pass domain = example.com path = /place Parsing http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 scheme = http domain = example.com path = / query = a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
Lua
local url = require('socket.url')
local tests = {
'foo://example.com:8042/over/there?name=ferret#nose',
'urn:example:animal:ferret:nose',
'jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true',
'ftp://ftp.is.co.za/rfc/rfc1808.txt',
'http://www.ietf.org/rfc/rfc2396.txt#header1',
'ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two',
'mailto:John.Doe@example.com',
'news:comp.infosystems.www.servers.unix',
'tel:+1-816-555-1212',
'telnet://192.0.2.16:80/',
'urn:oasis:names:specification:docbook:dtd:xml:4.1.2'
}
for _, test in ipairs(tests) do
local parsed = url.parse(test)
io.write('URI: ' .. test .. '\n')
for k, v in pairs(parsed) do
io.write(string.format(' %s: %s\n', k, v))
end
io.write('\n')
end
- Output:
URI: foo://example.com:8042/over/there?name=ferret#nose fragment: nose authority: example.com:8042 host: example.com query: name=ferret scheme: foo path: /over/there port: 8042 URI: urn:example:animal:ferret:nose scheme: urn path: example:animal:ferret:nose URI: jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true query: profileSQL=true scheme: jdbc path: mysql://test_user:ouupppssss@localhost:3306/sakila URI: ftp://ftp.is.co.za/rfc/rfc1808.txt host: ftp.is.co.za path: /rfc/rfc1808.txt authority: ftp.is.co.za scheme: ftp URI: http://www.ietf.org/rfc/rfc2396.txt#header1 fragment: header1 authority: www.ietf.org host: www.ietf.org path: /rfc/rfc2396.txt scheme: http URI: ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme: ldap query: objectClass=one&objectClass=two host: 2001:db8::7 path: /c=GB authority: [2001:db8::7] URI: mailto:John.Doe@example.com scheme: mailto path: John.Doe@example.com URI: news:comp.infosystems.www.servers.unix scheme: news path: comp.infosystems.www.servers.unix URI: tel:+1-816-555-1212 scheme: tel path: +1-816-555-1212 URI: telnet://192.0.2.16:80/ authority: 192.0.2.16:80 host: 192.0.2.16 scheme: telnet path: / port: 80 URI: urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme: urn path: oasis:names:specification:docbook:dtd:xml:4.1.2
M2000 Interpreter
Using M2000 script to parse URL
Module checkit {
any=lambda (z$)->{=lambda z$ (a$)->instr(z$,a$)>0}
one=lambda (z$)->{=lambda z$ (a$)->z$=a$}
number$="0123456789"
series=Lambda -> {
func=Array([])
=lambda func (&line$, &res$)->{
if line$="" then exit
k=each(func)
def p=0,ok as boolean
while k {
ok=false : p++ : f=array(k)
if not f(mid$(line$,p,1)) then exit
ok=true
}
if ok then res$=left$(line$, p) : line$=mid$(line$, p+1)
=ok
}
}
is_any=lambda series, any (c$) ->series(any(c$))
is_one=lambda series, one (c$) ->series(one(c$))
Is_Alpha=series(lambda (a$)-> a$ ~ "[a-zA-Z]")
Is_digit=series(any(number$))
Is_hex=any(number$+"abcdefABCDEF")
optionals=Lambda -> {
func=Array([])
=lambda func (&line$, &res$)->{
k=each(func)
def ok as boolean
while k {
f=array(k)
if f(&line$,&res$) then ok=true : exit
}
=ok
}
}
repeated=Lambda (func)-> {
=lambda func (&line$, &res$)->{
def ok as boolean, a$
res$=""
do {
sec=len(line$)
if not func(&line$,&a$) then exit
res$+=a$
ok=true
} until line$="" or sec=len(line$)
=ok
}
}
oneAndoptional=lambda (func1, func2) -> {
=lambda func1, func2 (&line$, &res$)->{
def ok as boolean, a$
res$=""
if not func1(&line$,&res$) then exit
if func2(&line$,&a$) then res$+=a$
=True
}
}
many=Lambda -> {
func=Array([])
=lambda func (&line$, &res$)->{
k=each(func)
def p=0,ok as boolean, acc$
oldline$=line$
while k {
ok=false
res$=""
if line$="" then exit
f=array(k)
if not f(&line$,&res$) then exit
acc$+=res$
ok=true
}
if not ok then {line$=oldline$} else res$=acc$
=ok
}
}
is_safe=series(any("$-_@.&"))
Is_extra=series(any("!*'(),"+chr$(34)))
Is_Escape=series(any("%"), is_hex, is_hex)
\\Is_reserved=series(any("=;/#?: "))
is_xalpha=optionals(Is_Alpha, is_digit, is_safe, is_extra, is_escape)
is_xalphas=oneAndoptional(is_xalpha,repeated(is_xalpha))
is_xpalpha=optionals(is_xalpha, is_one("+"))
is_xpalphas=oneAndoptional(is_xpalpha,repeated(is_xpalpha))
Is_ialpha=oneAndoptional(Is_Alpha,repeated(is_xpalphas))
is_fragmentid=lambda is_xalphas (&lines$, &res$) -> {
=is_xalphas(&lines$, &res$)
}
is_search=oneAndoptional(is_xalphas, repeated(many(series(one("+")), is_xalphas)))
is_void=lambda (f)-> {
=lambda f (&oldline$, &res$)-> {
line$=oldline$
if f(&line$, &res$) then {oldline$=line$ } else res$=""
=true
}
}
is_scheme=is_ialpha
is_path=repeated(oneAndoptional(is_void(is_xpalphas), series(one("/"))))
is_uri=oneAndoptional(many(is_scheme, series(one(":")), is_path), many(series(one("?")),is_search))
is_fragmentaddress=oneAndoptional(is_uri, many(series(one("#")),is_fragmentid ))
data "foo://example.com:8042/over/there?name=ferret#nose"
data "urn:example:animal:ferret:nose"
data "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true "
data "ftp://ftp.is.co.za/rfc/rfc1808.txt"
data "http://www.ietf.org/rfc/rfc2396.txt#header1"
data "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two"
data "mailto:John.Doe@example.com"
data "tel:+1-816-555-1212"
data "telnet://192.0.2.16:80/"
data "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"
while not empty {
read What$
pen 15 {
Print What$
}
a$=""
If is_scheme(&What$, &a$) Then Print "Scheme=";a$ : What$=mid$(What$,2)
If is_path(&What$, &a$) Then {
count=0
while left$(a$, 1)="/" { a$=mid$(a$,2): count++}
if count>1 then {
domain$=leftpart$(a$+"/", "/")
a$=rightpart$(a$,"/")
if domain$<>"" Then Print "Domain:";Domain$
if a$<>"" Then Print "Path:";a$
} else.if left$(What$,1) =":" then {
Print "path:";a$+What$: What$=""
} Else Print "Data:"; a$
}
if left$(What$,1) =":" then {
is_number=repeated(is_digit)
What$=mid$(What$,2): If is_number(&What$, &a$) Then Print "Port:";a$
if not left$(What$,1)="/" then exit
If is_path(&What$, &a$) Then {
while left$(a$, 1)="/" { a$=mid$(a$,2)}
if a$<>"" Then Print "Path:";a$
}
}
if left$(What$, 1)="?" then {
What$=mid$(What$,2)
If is_search(&What$, &a$) Then {
v$=""
if left$(What$, 1)="=" then {
What$=mid$(What$,2)
If is_search(&What$, &v$) Then Print "Query:";a$;"=";v$
} else Print "Query:";a$
}
}
While left$(What$, 1)="#" {
What$=mid$(What$,2)
if not is_xalphas(&What$, &a$) Then exit
Print "fragment:";a$
}
if What$<>"" Then Print "Data:"; What$
}
}
Checkit
- Output:
foo://example.com:8042/over/there?name=ferret#nose Scheme=foo Domain:example.com Port:8042 Path:over/there Query:name=ferret fragment:nose urn:example:animal:ferret:nose Scheme=urn path:example:animal:ferret:nose jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Scheme=jdbc path:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true ftp://ftp.is.co.za/rfc/rfc1808.txt Scheme=ftp Domain:ftp.is.co.za Path:rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1 Scheme=http Domain:www.ietf.org Path:rfc/rfc2396.txt fragment:header1 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Scheme=ldap Data:[2001:db8::7]/c=GB?objectClass=one&objectClass=two mailto:John.Doe@example.com Scheme=mailto Data:John.Doe@example.com tel:+1-816-555-1212 Scheme=tel Data:+1-816-555-1212 telnet://192.0.2.16:80/ Scheme=telnet Domain:192.0.2.16 Port:80 urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Scheme=urn path:oasis:names:specification:docbook:dtd:xml:4.1.2
Using an internal function (variation of String$())
module Checkit {
Stack New {
Data "foo://example.com:8042/over/there?name=ferret#nose", "urn:example:animal:ferret:nose"
Data "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true", "ftp://ftp.is.co.za/rfc/rfc1808.txt"
Data "http://www.ietf.org/rfc/rfc2396.txt#header1", "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two"
Data "mailto:John.Doe@example.com", "news:comp.infosystems.www.servers.unix", "tel:+1-816-555-1212"
Data "telnet://192.0.2.16:80/", "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", "ssh://alice@example.com"
Data "https://bob:pass@example.com/place", "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"
a=Array([])
}
function prechar$(a$, b$) {
if a$<>"" then {=quote$(b$+a$)} else ={""}
}
z=each(a)
document s$="["+{
}
While z {
a$=array$(z)
s1$={ "uri": }+quote$(a$)+{,
"authority": }+ quote$(string$(a$ as URLAuthority))+{,
"userInfo": }+ quote$(string$(a$ as URLUserInfo))+{,
"scheme": }+quote$(string$(a$ as URLScheme))+{,
"hostname": }+quote$(string$(a$ as UrlHost))+{,
"Port": }+quote$(string$(a$ as UrlPort))+{,
"pathname": }+quote$(string$(a$ as UrlPath))+{,
"search": }+prechar$(string$(a$ as URLpart 6),"?")+{,
"hash": }+prechar$(string$(a$ as UrlFragment),"#")+{
}
s$=" {"+{
}+s1$+" }"
\\ z^ is the iteraror's counter (z is an iterator of a, a touple - array in M2000)
if z^<len(a)-1 then s$=" ," ' append to document
s$={
}
}
s$="]"
Print "Press any keyboard key or mouse key to continue scrolling"
Report s$
Clipboard s$
}
Checkit
- Output:
[ { "uri": "foo://example.com:8042/over/there?name=ferret#nose", "authority": "example.com:8042", "userInfo": "", "scheme": "foo", "hostname": "example.com", "Port": "8042", "pathname": "/over/there", "search": "?name=ferret", "hash": "#nose" } , { "uri": "urn:example:animal:ferret:nose", "authority": "", "userInfo": "", "scheme": "urn", "hostname": "", "Port": "", "pathname": "example:animal:ferret:nose", "search": "", "hash": "" } , { "uri": "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true", "authority": "", "userInfo": "", "scheme": "jdbc", "hostname": "", "Port": "", "pathname": "mysql://test_user:ouupppssss@localhost:3306/sakila", "search": "?profileSQL=true", "hash": "" } , { "uri": "ftp://ftp.is.co.za/rfc/rfc1808.txt", "authority": "ftp.is.co.za", "userInfo": "", "scheme": "ftp", "hostname": "ftp.is.co.za", "Port": "21", "pathname": "/rfc/rfc1808.txt", "search": "", "hash": "" } , { "uri": "http://www.ietf.org/rfc/rfc2396.txt#header1", "authority": "www.ietf.org", "userInfo": "", "scheme": "http", "hostname": "www.ietf.org", "Port": "80", "pathname": "/rfc/rfc2396.txt", "search": "", "hash": "#header1" } , { "uri": "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two", "authority": "2001:db8::7", "userInfo": "", "scheme": "ldap", "hostname": "2001:db8::7", "Port": "389", "pathname": "/c=GB", "search": "?objectClass=one&objectClass=two", "hash": "" } , { "uri": "mailto:John.Doe@example.com", "authority": "", "userInfo": "", "scheme": "mailto", "hostname": "", "Port": "", "pathname": "John.Doe@example.com", "search": "", "hash": "" } , { "uri": "news:comp.infosystems.www.servers.unix", "authority": "", "userInfo": "", "scheme": "news", "hostname": "", "Port": "", "pathname": "comp.infosystems.www.servers.unix", "search": "", "hash": "" } , { "uri": "tel:+1-816-555-1212", "authority": "", "userInfo": "", "scheme": "tel", "hostname": "", "Port": "", "pathname": "+1-816-555-1212", "search": "", "hash": "" } , { "uri": "telnet://192.0.2.16:80/", "authority": "192.0.2.16:80", "userInfo": "", "scheme": "telnet", "hostname": "192.0.2.16", "Port": "80", "pathname": "", "search": "", "hash": "" } , { "uri": "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", "authority": "", "userInfo": "", "scheme": "urn", "hostname": "", "Port": "", "pathname": "oasis:names:specification:docbook:dtd:xml:4.1.2", "search": "", "hash": "" } , { "uri": "ssh://alice@example.com", "authority": "alice@example.com", "userInfo": "alice", "scheme": "ssh", "hostname": "example.com", "Port": "", "pathname": "", "search": "", "hash": "" } , { "uri": "https://bob:pass@example.com/place", "authority": "bob:pass@example.com", "userInfo": "bob:pass", "scheme": "https", "hostname": "example.com", "Port": "443", "pathname": "/place", "search": "", "hash": "" } , { "uri": "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64", "authority": "example.com", "userInfo": "", "scheme": "http", "hostname": "example.com", "Port": "80", "pathname": "/", "search": "?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64", "hash": "" } ]
Mathematica/Wolfram Language
URLParse["foo://example.com:8042/over/there?name=ferret#nose"]
- Output:
<|"Scheme" -> "foo", "User" -> None, "Domain" -> "example.com", "Port" -> 8042, "Path" -> {"", "over", "there"}, "Query" -> {"name" -> "ferret"}, "Fragment" -> "nose"|>
Nim
The uri
module provides a parseUri
proc.
import uri, strformat
proc printUri(url: string) =
echo url
let res = parseUri(url)
if res.scheme != "":
echo &"\t Scheme: {res.scheme}"
if res.hostname != "":
echo &"\tHostname: {res.hostname}"
if res.username != "":
echo &"\tUsername: {res.username}"
if res.password != "":
echo &"\tPassword: {res.password}"
if res.path != "":
echo &"\t Path: {res.path}"
if res.query != "":
echo &"\t Query: {res.query}"
if res.port != "":
echo &"\t Port: {res.port}"
if res.anchor != "":
echo &"\t Anchor: {res.anchor}"
if res.opaque:
echo &"\t Opaque: {res.opaque}"
let urls = ["foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"]
for url in urls:
printUri(url)
- Output:
foo://example.com:8042/over/there?name=ferret#nose Scheme: foo Hostname: example.com Path: /over/there Query: name=ferret Port: 8042 Anchor: nose urn:example:animal:ferret:nose Scheme: urn Path: example:animal:ferret:nose Opaque: true jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Scheme: jdbc Path: mysql://test_user:ouupppssss@localhost:3306/sakila Query: profileSQL=true Opaque: true ftp://ftp.is.co.za/rfc/rfc1808.txt Scheme: ftp Hostname: ftp.is.co.za Path: /rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1 Scheme: http Hostname: www.ietf.org Path: /rfc/rfc2396.txt Anchor: header1 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Scheme: ldap Hostname: 2001:db8::7 Path: /c=GB Query: objectClass=one&objectClass=two mailto:John.Doe@example.com Scheme: mailto Hostname: example.com Username: John.Doe Opaque: true news:comp.infosystems.www.servers.unix Scheme: news Path: comp.infosystems.www.servers.unix Opaque: true tel:+1-816-555-1212 Scheme: tel Path: +1-816-555-1212 Opaque: true telnet://192.0.2.16:80/ Scheme: telnet Hostname: 192.0.2.16 Path: / Port: 80 urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Scheme: urn Path: oasis:names:specification:docbook:dtd:xml:4.1.2 Opaque: true ssh://alice@example.com Scheme: ssh Hostname: example.com Username: alice https://bob:pass@example.com/place Scheme: https Hostname: example.com Username: bob Password: pass Path: /place http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 Scheme: http Hostname: example.com Path: / Query: a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
Objeck
use Web.HTTP;
class Test {
function : Main(args : String[]) ~ Nil {
urls := [
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64:"];
each(i : urls) {
url := Url->New(urls[i]);
if(url->Parsed()) {
url->ToString()->PrintLine();
};
};
}
}
- Output:
foo://example.com:8042/over/there?name=ferret#nose scheme='foo' host='example.com' path='/over/there' port='8042' query='name=ferret' fragment='nose' urn:example:animal:ferret:nose scheme='urn' path='example:animal:ferret:nose' jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme='jdbc' path='mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true' ftp://ftp.is.co.za/rfc/rfc1808.txt scheme='ftp' host='ftp.is.co.za' path='/rfc/rfc1808.txt' http://www.ietf.org/rfc/rfc2396.txt#header1 scheme='http' host='www.ietf.org' path='/rfc/rfc2396.txt' fragment='header1' ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme='ldap' host='[2001:db8::7]' path='/c=GB' query='objectClass=one&objectClass=two' mailto:John.Doe@example.com scheme='mailto' path='John.Doe@example.com' news:comp.infosystems.www.servers.unix scheme='news' path='comp.infosystems.www.servers.unix' tel:+1-816-555-1212 scheme='tel' path='+1-816-555-1212' telnet://192.0.2.16:80/ scheme='telnet' host='192.0.2.16' path='/' port='80' urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme='urn' path='oasis:names:specification:docbook:dtd:xml:4.1.2' http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64: scheme='http' host='example.com' path='/' query='a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64:'
Perl
You can use the URI module from CPAN to parse URIs. Note that the output is a bit different: for example, you don't get the host from the foo://
scheme, as host is only valid for schemes that define it.
#!/usr/bin/perl
use warnings;
use strict;
use URI;
for my $uri (do { no warnings 'qw';
qw( foo://example.com:8042/over/there?name=ferret#nose
urn:example:animal:ferret:nose
jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true
ftp://ftp.is.co.za/rfc/rfc1808.txt
http://www.ietf.org/rfc/rfc2396.txt#header1
ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two
mailto:John.Doe@example.com
news:comp.infosystems.www.servers.unix
tel:+1-816-555-1212
telnet://192.0.2.16:80/
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
)}) {
my $u = 'URI'->new($uri);
print "$uri\n";
for my $part (qw( scheme path fragment authority host port query )) {
eval { my $parsed = $u->$part;
print "\t", $part, "\t", $parsed, "\n" if defined $parsed;
};
}
}
- Output:
foo://example.com:8042/over/there?name=ferret#nose scheme foo path /over/there fragment nose authority example.com:8042 query name=ferret urn:example:animal:ferret:nose scheme urn path example:animal:ferret:nose jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme jdbc path mysql://test_user:ouupppssss@localhost:3306/sakila query profileSQL=true ftp://ftp.is.co.za/rfc/rfc1808.txt scheme ftp path /rfc/rfc1808.txt authority ftp.is.co.za host ftp.is.co.za port 21 http://www.ietf.org/rfc/rfc2396.txt#header1 scheme http path /rfc/rfc2396.txt fragment header1 authority www.ietf.org host www.ietf.org port 80 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme ldap path /c=GB authority [2001:db8::7] host 2001:db8::7 port 389 query objectClass=one&objectClass=two mailto:John.Doe@example.com scheme mailto path John.Doe@example.com news:comp.infosystems.www.servers.unix scheme news path comp.infosystems.www.servers.unix port 119 tel:+1-816-555-1212 scheme tel path +1-816-555-1212 telnet://192.0.2.16:80/ scheme telnet path / authority 192.0.2.16:80 host 192.0.2.16 port 80 urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme urn path oasis:names:specification:docbook:dtd:xml:4.1.2
Phix
There are some fairly rudimentary and lightly documented routines in builtins/url.e.
with javascript_semantics include builtins/url.e procedure show_url_details(string uri) ?uri sequence r = parse_url(uri) for i=1 to length(r) do if r[i]!=0 then string desc = url_element_desc(i) printf(1,"%s : %v\n",{desc,r[i]}) end if end for puts(1,"\n") end procedure constant tests = { "foo://example.com:8042/over/there?name=ferret#nose", "urn:example:animal:ferret:nose", "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true", "ftp://ftp.is.co.za/rfc/rfc1808.txt", "http://www.ietf.org/rfc/rfc2396.txt#header1", "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two", "mailto:John.Doe@example.com", "news:comp.infosystems.www.servers.unix", "tel:+1-816-555-1212", "telnet://192.0.2.16:80/", "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", "ssh://alice@example.com", "https://bob:pass@example.com/place", "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" } for i=1 to length(tests) do show_url_details(tests[i]) end for
- Output:
"foo://example.com:8042/over/there?name=ferret#nose" scheme : "foo" domain : "example.com" port : 8042 path : "/over/there" query : "name=ferret" fragment : "nose" "urn:example:animal:ferret:nose" scheme : "urn" path : "example:animal:ferret:nose" "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true" scheme : "jdbc" path : "mysql://test_user:ouupppssss@localhost:3306/sakila" query : "profileSQL=true" "ftp://ftp.is.co.za/rfc/rfc1808.txt" scheme : "ftp" domain : "ftp.is.co.za" path : "/rfc/rfc1808.txt" "http://www.ietf.org/rfc/rfc2396.txt#header1" scheme : "http" domain : "www.ietf.org" path : "/rfc/rfc2396.txt" fragment : "header1" "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two" scheme : "ldap" domain : "[2001:db8::7]" path : "/c=GB" query : "objectClass=one&objectClass=two" "mailto:John.Doe@example.com" scheme : "mailto" path : "John.Doe@example.com" "news:comp.infosystems.www.servers.unix" scheme : "news" path : "comp.infosystems.www.servers.unix" "tel:+1-816-555-1212" scheme : "tel" path : "+1-816-555-1212" "telnet://192.0.2.16:80/" scheme : "telnet" domain : "192.0.2.16" port : 80 path : "/" "urn:oasis:names:specification:docbook:dtd:xml:4.1.2" scheme : "urn" path : "oasis:names:specification:docbook:dtd:xml:4.1.2" "ssh://alice@example.com" scheme : "ssh" domain : "example.com" user : "alice" "https://bob:pass@example.com/place" scheme : "https" domain : "example.com" path : "/place" user : "bob" password : "pass" "http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64" scheme : "http" domain : "example.com" path : "/" query : "a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"
PHP
Using the parse_url function (Parse a URL and return its components)
<?php
$urls = array(
'foo://example.com:8042/over/there?name=ferret#nose',
'urn:example:animal:ferret:nose',
'jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true',
'ftp://ftp.is.co.za/rfc/rfc1808.txt',
'http://www.ietf.org/rfc/rfc2396.txt#header1',
'ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two',
'mailto:John.Doe@example.com',
'news:comp.infosystems.www.servers.unix',
'tel:+1-816-555-1212',
'telnet://192.0.2.16:80/',
'urn:oasis:names:specification:docbook:dtd:xml:4.1.2',
);
foreach ($urls AS $url) {
$p = parse_url($url);
echo $url, PHP_EOL;
print_r($p);
echo PHP_EOL;
}
- Output:
foo://example.com:8042/over/there?name=ferret#nose Array ( [scheme] => foo [host] => example.com [port] => 8042 [path] => /over/there [query] => name=ferret [fragment] => nose ) urn:example:animal:ferret:nose Array ( [scheme] => urn [path] => example:animal:ferret:nose ) jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Array ( [scheme] => jdbc [path] => mysql://test_user:ouupppssss@localhost:3306/sakila [query] => profileSQL=true ) ftp://ftp.is.co.za/rfc/rfc1808.txt Array ( [scheme] => ftp [host] => ftp.is.co.za [path] => /rfc/rfc1808.txt ) http://www.ietf.org/rfc/rfc2396.txt#header1 Array ( [scheme] => http [host] => www.ietf.org [path] => /rfc/rfc2396.txt [fragment] => header1 ) ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Array ( [scheme] => ldap [host] => [2001:db8::7] [path] => /c=GB [query] => objectClass=one&objectClass=two ) mailto:John.Doe@example.com Array ( [scheme] => mailto [path] => John.Doe@example.com ) news:comp.infosystems.www.servers.unix Array ( [scheme] => news [path] => comp.infosystems.www.servers.unix ) tel:+1-816-555-1212 Array ( [scheme] => tel [path] => +1-816-555-1212 ) telnet://192.0.2.16:80/ Array ( [scheme] => telnet [host] => 192.0.2.16 [port] => 80 [path] => / ) urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Array ( [scheme] => urn [path] => oasis:names:specification:docbook:dtd:xml:4.1.2 )
PowerShell
I was confused about the Path parameter. PowerShell returns LocalPath, AbsolutePath and AbsoluteUri; I defaulted to LocalPath, but all properties are returned in the $parsedUrls
variable.
function Get-ParsedUrl
{
[CmdletBinding()]
[OutputType([PSCustomObject])]
Param
(
[Parameter(Mandatory=$true,
ValueFromPipeline=$true,
ValueFromPipelineByPropertyName=$true,
Position=0)]
[System.Uri]
$InputObject
)
Process
{
foreach ($url in $InputObject)
{
$url | Select-Object -Property Scheme,
@{Name="Domain"; Expression={$_.Host}},
Port,
@{Name="Path" ; Expression={$_.LocalPath}},
Query,
Fragment,
AbsolutePath,
AbsoluteUri,
Authority,
HostNameType,
IsDefaultPort,
IsFile,
IsLoopback,
PathAndQuery,
Segments,
IsUnc,
OriginalString,
DnsSafeHost,
IdnHost,
IsAbsoluteUri,
UserEscaped,
UserInfo
}
}
}
[string[]]$urls = @'
foo://example.com:8042/over/there?name=ferret#nose
urn:example:animal:ferret:nose
jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true
ftp://ftp.is.co.za/rfc/rfc1808.txt
http://www.ietf.org/rfc/rfc2396.txt#header1
ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two
mailto:John.Doe@example.com
news:comp.infosystems.www.servers.unix
tel:+1-816-555-1212
telnet://192.0.2.16:80/
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
'@ -split [Environment]::NewLine
$parsedUrls = $urls | Get-ParsedUrl
$parsedUrls | Select-Object -Property Scheme, Port, Domain, Path, Query, Fragment | Format-Table
- Output:
Scheme Port Domain Path Query Fragment ------ ---- ------ ---- ----- -------- foo 8042 example.com /over/there ?name=ferret #nose urn -1 example:animal:ferret:nose jdbc -1 mysql://test_user:ouupppssss@localhost:3306/sakila ?profileSQL=true ftp 21 ftp.is.co.za /rfc/rfc1808.txt http 80 www.ietf.org /rfc/rfc2396.txt #header1 ldap 389 [2001:db8::7] /c=GB ?objectClass=one&objectClass=two mailto 25 example.com news -1 comp.infosystems.www.servers.unix tel -1 +1-816-555-1212 telnet 80 192.0.2.16 / urn -1 oasis:names:specification:docbook:dtd:xml:4.1.2
Python
Links to Python Documentation: v2: [1], v3: [2]
import urllib.parse as up # urlparse for Python v2
url = up.urlparse('http://user:pass@example.com:8081/path/file.html;params?query1=1#fragment')
print('url.scheme = ', url.scheme)
print('url.netloc = ', url.netloc)
print('url.hostname = ', url.hostname)
print('url.port = ', url.port)
print('url.path = ', url.path)
print('url.params = ', url.params)
print('url.query = ', url.query)
print('url.fragment = ', url.fragment)
print('url.username = ', url.username)
print('url.password = ', url.password)
- Output:
url.scheme = http url.netloc = user:pass@example.com:8081 url.hostname = example.com url.port = 8081 url.path = /path/file.html url.params = params url.query = query1=1 url.fragment = fragment url.username = user url.password = pass
R
urltools::url_parse() do all the actually work. The rest is just for nice output.
library(urltools)
urls <- c("foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64")
for (an_url in urls) {
parsed <- url_parse(an_url)
cat(an_url,"\n")
for (idx in 1:ncol(parsed)) {
if (!is.na(parsed[[idx]])) {
cat(colnames(parsed)[[idx]],"\t:",parsed[[idx]],"\n")
}
}
cat("\n")
}
- Output:
foo://example.com:8042/over/there?name=ferret#nose scheme : foo domain : example.com port : 8042 path : over/there parameter : name=ferret fragment : nose urn:example:animal:ferret:nose domain : urn port : example:animal:ferret:nose jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme : jdbc:mysql domain : localhost port : 3306 path : sakila parameter : profileSQL=true ftp://ftp.is.co.za/rfc/rfc1808.txt scheme : ftp domain : ftp.is.co.za path : rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1 scheme : http domain : www.ietf.org path : rfc/rfc2396.txt fragment : header1 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme : ldap path : c=GB parameter : objectClass=one&objectClass=two mailto:John.Doe@example.com domain : example.com news:comp.infosystems.www.servers.unix domain : news port : comp.infosystems.www.servers.unix tel:+1-816-555-1212 domain : tel port : +1-816-555-1212 telnet://192.0.2.16:80/ scheme : telnet domain : 192.0.2.16 port : 80 urn:oasis:names:specification:docbook:dtd:xml:4.1.2 domain : urn port : oasis:names:specification:docbook:dtd:xml:4.1.2 ssh://alice@example.com scheme : ssh domain : example.com https://bob:pass@example.com/place scheme : https domain : example.com path : place http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 scheme : http domain : example.com parameter : a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
Racket
Links: url
structure in Racket documentation.
#lang racket/base
(require racket/match net/url)
(define (debug-url-string U)
(match-define (url s u h p pa? (list (path/param pas prms) ...) q f) (string->url U))
(printf "URL: ~s~%" U)
(printf "-----~a~%" (make-string (string-length (format "~s" U)) #\-))
(when #t (printf "scheme: ~s~%" s))
(when u (printf "user: ~s~%" u))
(when h (printf "host: ~s~%" h))
(when p (printf "port: ~s~%" p))
;; From documentation link in text:
;; > For Unix paths, the root directory is not included in `path';
;; > its presence or absence is implicit in the path-absolute? flag.
(printf "path-absolute?: ~s~%" pa?)
(printf "path bits: ~s~%" pas)
;; prms will often be a list of lists. this will print iff
;; one of the inner lists is not null
(when (memf pair? prms)
(printf "param bits: ~s [interleaved with path bits]~%" prms))
(unless (null? q) (printf "query: ~s~%" q))
(when f (printf "fragment: ~s~%" f))
(newline))
(for-each
debug-url-string
'("foo://example.com:8042/over/there?name=ferret#nose"
"urn:example:animal:ferret:nose"
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true"
"ftp://ftp.is.co.za/rfc/rfc1808.txt"
"http://www.ietf.org/rfc/rfc2396.txt#header1"
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two"
"mailto:John.Doe@example.com"
"news:comp.infosystems.www.servers.unix"
"tel:+1-816-555-1212"
"telnet://192.0.2.16:80/"
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2"))
- Output:
URL: "foo://example.com:8042/over/there?name=ferret#nose" --------------------------------------------------------- scheme: "foo" host: "example.com" port: 8042 path-absolute?: #t path bits: ("over" "there") query: ((name . "ferret")) fragment: "nose" URL: "urn:example:animal:ferret:nose" ------------------------------------- scheme: "urn" path-absolute?: #f path bits: ("example:animal:ferret:nose") URL: "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true" ------------------------------------------------------------------------------ scheme: "jdbc" path-absolute?: #f path bits: ("mysql:" "" "test_user:ouupppssss@localhost:3306" "sakila") query: ((profileSQL . "true")) URL: "ftp://ftp.is.co.za/rfc/rfc1808.txt" ----------------------------------------- scheme: "ftp" host: "ftp.is.co.za" path-absolute?: #t path bits: ("rfc" "rfc1808.txt") URL: "http://www.ietf.org/rfc/rfc2396.txt#header1" -------------------------------------------------- scheme: "http" host: "www.ietf.org" path-absolute?: #t path bits: ("rfc" "rfc2396.txt") fragment: "header1" URL: "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two" ---------------------------------------------------------------- scheme: "ldap" host: "[2001" path-absolute?: #f path bits: ("db8::7]" "c=GB") query: ((objectClass . "one") (objectClass . "two"))
IPv6 URL address parses incorrectly. See issue https://github.com/plt/racket/issues/980
URL: "mailto:John.Doe@example.com" ---------------------------------- scheme: "mailto" path-absolute?: #f path bits: ("John.Doe@example.com") URL: "news:comp.infosystems.www.servers.unix" --------------------------------------------- scheme: "news" path-absolute?: #f path bits: ("comp.infosystems.www.servers.unix") URL: "tel:+1-816-555-1212" -------------------------- scheme: "tel" path-absolute?: #f path bits: ("+1-816-555-1212") URL: "telnet://192.0.2.16:80/" ------------------------------ scheme: "telnet" host: "192.0.2.16" port: 80 path-absolute?: #t path bits: ("") URL: "urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ---------------------------------------------------------- scheme: "urn" path-absolute?: #f path bits: ("oasis:names:specification:docbook:dtd:xml:4.1.2")
Raku
(formerly Perl 6)
Uses the URI library which implements a Raku grammar based on the RFC 3986 BNF grammar.
use URI;
my @test-uris = <
foo://example.com:8042/over/there?name=ferret#nose
urn:example:animal:ferret:nose
jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true
ftp://ftp.is.co.za/rfc/rfc1808.txt
http://www.ietf.org/rfc/rfc2396.txt#header1
ldap://[2001:db8::7]/c=GB?objectClass?one
mailto:John.Doe@example.com
news:comp.infosystems.www.servers.unix
tel:+1-816-555-1212
telnet://192.0.2.16:80/
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
>;
my $u = URI.new;
for @test-uris -> $uri {
say "URI:\t", $uri;
$u.parse($uri);
for <scheme host port path query frag> -> $t {
my $token = try {$u."$t"()} || '';
say "$t:\t", $token if $token;
}
say '';
}
- Output:
URI: foo://example.com:8042/over/there?name=ferret#nose scheme: foo host: example.com port: 8042 path: /over/there query: name=ferret frag: nose URI: urn:example:animal:ferret:nose scheme: urn path: example:animal:ferret:nose URI: jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme: jdbc path: mysql://test_user:ouupppssss@localhost:3306/sakila query: profileSQL=true URI: ftp://ftp.is.co.za/rfc/rfc1808.txt scheme: ftp host: ftp.is.co.za port: 21 path: /rfc/rfc1808.txt URI: http://www.ietf.org/rfc/rfc2396.txt#header1 scheme: http host: www.ietf.org port: 80 path: /rfc/rfc2396.txt frag: header1 URI: ldap://[2001:db8::7]/c=GB?objectClass?one scheme: ldap host: [2001:db8::7] port: 389 path: /c=GB query: objectClass?one URI: mailto:John.Doe@example.com scheme: mailto path: John.Doe@example.com URI: news:comp.infosystems.www.servers.unix scheme: news port: 119 path: comp.infosystems.www.servers.unix URI: tel:+1-816-555-1212 scheme: tel path: +1-816-555-1212 URI: telnet://192.0.2.16:80/ scheme: telnet host: 192.0.2.16 port: 80 path: / URI: urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme: urn path: oasis:names:specification:docbook:dtd:xml:4.1.2
Ruby
Link to Ruby Documentation.
As you can see in the output below, the URI library doesn't parse all of these as recommended.
require 'uri'
test_cases = [
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64"
]
class URI::Generic; alias_method :domain, :host; end
test_cases.each do |test_case|
puts test_case
uri = URI.parse(test_case)
%w[ scheme domain port path query fragment user password ].each do |attr|
puts " #{attr.rjust(8)} = #{uri.send(attr)}" if uri.send(attr)
end
end
- Output:
foo://example.com:8042/over/there?name=ferret#nose scheme = foo domain = example.com port = 8042 path = /over/there query = name=ferret fragment = nose urn:example:animal:ferret:nose scheme = urn jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme = jdbc ftp://ftp.is.co.za/rfc/rfc1808.txt scheme = ftp domain = ftp.is.co.za port = 21 path = rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1 scheme = http domain = www.ietf.org port = 80 path = /rfc/rfc2396.txt fragment = header1 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme = ldap domain = [2001:db8::7] port = 389 path = /c=GB query = objectClass=one&objectClass=two mailto:John.Doe@example.com scheme = mailto news:comp.infosystems.www.servers.unix scheme = news tel:+1-816-555-1212 scheme = tel telnet://192.0.2.16:80/ scheme = telnet domain = 192.0.2.16 port = 80 path = / urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme = urn ssh://alice@example.com scheme = ssh domain = example.com path = user = alice https://bob:pass@example.com/place scheme = https domain = example.com port = 443 path = /place user = bob password = pass http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 scheme = http domain = example.com port = 80 path = / query = a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
Rust
use url::Url;
fn print_fields(url: Url) -> () {
println!("scheme: {}", url.scheme());
println!("username: {}", url.username());
if let Some(password) = url.password() {
println!("password: {}", password);
}
if let Some(domain) = url.domain() {
println!("domain: {}", domain);
}
if let Some(port) = url.port() {
println!("port: {}", port);
}
println!("path: {}", url.path());
if let Some(query) = url.query() {
println!("query: {}", query);
}
if let Some(fragment) = url.fragment() {
println!("fragment: {}", fragment);
}
}
fn main() {
let urls = vec![
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64",
];
for url in urls {
println!("Parsing {}", url);
match Url::parse(url) {
Ok(valid_url) => {
print_fields(valid_url);
println!();
}
Err(e) => println!("Error Parsing url - {:?}", e),
}
}
}
Output:
Parsing foo://example.com:8042/over/there?name=ferret#nose scheme: foo username: domain: example.com port: 8042 path: /over/there query: name=ferret fragment: nose Parsing urn:example:animal:ferret:nose scheme: urn username: path: example:animal:ferret:nose Parsing jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true scheme: jdbc username: path: mysql://test_user:ouupppssss@localhost:3306/sakila query: profileSQL=true Parsing ftp://ftp.is.co.za/rfc/rfc1808.txt scheme: ftp username: domain: ftp.is.co.za path: /rfc/rfc1808.txt Parsing http://www.ietf.org/rfc/rfc2396.txt#header1 scheme: http username: domain: www.ietf.org path: /rfc/rfc2396.txt fragment: header1 Parsing ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two scheme: ldap username: path: /c=GB query: objectClass=one&objectClass=two Parsing mailto:John.Doe@example.com scheme: mailto username: path: John.Doe@example.com Parsing news:comp.infosystems.www.servers.unix scheme: news username: path: comp.infosystems.www.servers.unix Parsing tel:+1-816-555-1212 scheme: tel username: path: +1-816-555-1212 Parsing telnet://192.0.2.16:80/ scheme: telnet username: domain: 192.0.2.16 port: 80 path: / Parsing urn:oasis:names:specification:docbook:dtd:xml:4.1.2 scheme: urn username: path: oasis:names:specification:docbook:dtd:xml:4.1.2 Parsing ssh://alice@example.com scheme: ssh username: alice domain: example.com path: Parsing https://bob:pass@example.com/place scheme: https username: bob password: pass domain: example.com path: /place Parsing http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 scheme: http username: domain: example.com path: / query: a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
Scala
import java.net.URI
object WebAddressParser extends App {
parseAddress("foo://example.com:8042/over/there?name=ferret#nose")
parseAddress("ftp://ftp.is.co.za/rfc/rfc1808.txt")
parseAddress("http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64")
parseAddress("http://www.ietf.org/rfc/rfc2396.txt#header1")
parseAddress("https://bob:pass@example.com/place")
parseAddress("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")
parseAddress("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")
parseAddress("ldap://[2001:db8::7]/c=GB?objectClass?one")
parseAddress("mailto:John.Doe@example.com")
parseAddress("news:comp.infosystems.www.servers.unix")
parseAddress("ssh://alice@example.com")
parseAddress("tel:+1-816-555-1212")
parseAddress("telnet://192.0.2.16:80/")
parseAddress("urn:example:animal:ferret:nose")
parseAddress("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")
parseAddress("This is not a URI!")
private def parseAddress(a: String): Unit = {
print(f"Parsing $a%-72s")
try {
val u = new URI(a)
print("\u2714\tscheme = " + u.getScheme)
print("\tdomain = " + u.getHost)
print("\tport = " + (if (-1 == u.getPort) "default" else u.getPort))
print("\tpath = " + (if (u.getPath == null) u.getSchemeSpecificPart else u.getPath))
print("\tquery = " + u.getQuery)
println("\tfragment = " + u.getFragment)
} catch { case ex: Throwable => println('\u2718') }
}
}
- Output:
See it in running in your browser by Scastie (JVM).
Tcl
Tcllib's uri package already knows how to decompose many kinds of URIs. The implementation is a a quite readable example of this kind of parsing. For this task, we'll use it directly.
Schemes can be added with uri::register, but the rules for this task assume HTTP-style decomposition for unknown schemes, which is done below by reaching into the documented interfaces $::uri::schemes and uri::SplitHttp.
For some URI types (such as urn, news, mailto), this provides more information than the task description demands, which is simply to parse them all as HTTP URIs.
The uri package doesn't presently handle IPv6 syntx as used in the example: a bug and patch will be submitted presently ..
package require uri
package require uri::urn
# a little bit of trickery to format results:
proc pdict {d} {
array set \t $d
parray \t
}
proc parse_uri {uri} {
regexp {^(.*?):(.*)$} $uri -> scheme rest
if {$scheme in $::uri::schemes} {
# uri already knows how to split it:
set parts [uri::split $uri]
} else {
# parse as though it's http:
set parts [uri::SplitHttp $rest]
dict set parts scheme $scheme
}
dict filter $parts value ?* ;# omit empty sections
}
set tests {
foo://example.com:8042/over/there?name=ferret#nose
urn:example:animal:ferret:nose
jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true
ftp://ftp.is.co.za/rfc/rfc1808.txt
http://www.ietf.org/rfc/rfc2396.txt#header1
ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two
mailto:John.Doe@example.com
news:comp.infosystems.www.servers.unix
tel:+1-816-555-1212
telnet://192.0.2.16:80/
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
}
foreach uri $tests {
puts \n$uri
pdict [parse_uri $uri]
}
- Output:
foo://example.com:8042/over/there?name=ferret#nose (fragment) = nose (host) = example.com (path) = over/there (port) = 8042 (query) = name=ferret (scheme) = foo urn:example:animal:ferret:nose (nid) = example (nss) = animal:ferret:nose (scheme) = urn jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true (path) = mysql://test_user:ouupppssss@localhost:3306/sakila (query) = profileSQL=true (scheme) = jdbc ftp://ftp.is.co.za/rfc/rfc1808.txt (host) = ftp.is.co.za (path) = rfc/rfc1808.txt (scheme) = ftp http://www.ietf.org/rfc/rfc2396.txt#header1 (fragment) = header1 (host) = www.ietf.org (path) = rfc/rfc2396.txt (scheme) = http ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two (host) = [2001 (scheme) = ldap mailto:John.Doe@example.com (host) = example.com (scheme) = mailto (user) = John.Doe news:comp.infosystems.www.servers.unix (newsgroup-name) = comp.infosystems.www.servers.unix (scheme) = news tel:+1-816-555-1212 (path) = +1-816-555-1212 (scheme) = tel telnet://192.0.2.16:80/ (host) = 192.0.2.16 (port) = 80 (scheme) = telnet urn:oasis:names:specification:docbook:dtd:xml:4.1.2 (nid) = oasis (nss) = names:specification:docbook:dtd:xml:4.1.2 (scheme) = urn
VBScript
Function parse_url(url)
parse_url = "URL: " & url
If InStr(url,"//") Then
'parse the scheme
scheme = Split(url,"//")
parse_url = parse_url & vbcrlf & "Scheme: " & Mid(scheme(0),1,Len(scheme(0))-1)
'parse the domain
domain = Split(scheme(1),"/")
'check if the domain includes a username, password, and port
If InStr(domain(0),"@") Then
cred = Split(domain(0),"@")
If InStr(cred(0),".") Then
username = Mid(cred(0),1,InStr(1,cred(0),".")-1)
password = Mid(cred(0),InStr(1,cred(0),".")+1,Len(cred(0))-InStr(1,cred(0),"."))
ElseIf InStr(cred(0),":") Then
username = Mid(cred(0),1,InStr(1,cred(0),":")-1)
password = Mid(cred(0),InStr(1,cred(0),":")+1,Len(cred(0))-InStr(1,cred(0),":"))
End If
parse_url = parse_url & vbcrlf & "Username: " & username & vbCrLf &_
"Password: " & password
'check if the domain have a port
If InStr(cred(1),":") Then
host = Mid(cred(1),1,InStr(1,cred(1),":")-1)
port = Mid(cred(1),InStr(1,cred(1),":")+1,Len(cred(1))-InStr(1,cred(1),":"))
parse_url = parse_url & vbCrLf & "Domain: " & host & vbCrLf & "Port: " & port
Else
parse_url = parse_url & vbCrLf & "Domain: " & cred(1)
End If
ElseIf InStr(domain(0),":") And Instr(domain(0),"[") = False And Instr(domain(0),"]") = False Then
host = Mid(domain(0),1,InStr(1,domain(0),":")-1)
port = Mid(domain(0),InStr(1,domain(0),":")+1,Len(domain(0))-InStr(1,domain(0),":"))
parse_url = parse_url & vbCrLf & "Domain: " & host & vbCrLf & "Port: " & port
ElseIf Instr(domain(0),"[") And Instr(domain(0),"]:") Then
host = Mid(domain(0),1,InStr(1,domain(0),"]"))
port = Mid(domain(0),InStr(1,domain(0),"]")+2,Len(domain(0))-(InStr(1,domain(0),"]")+1))
parse_url = parse_url & vbCrLf & "Domain: " & host & vbCrLf & "Port: " & port
Else
parse_url = parse_url & vbCrLf & "Domain: " & domain(0)
End If
'parse the path if exist
If UBound(domain) > 0 Then
For i = 1 To UBound(domain)
If i < UBound(domain) Then
path = path & domain(i) & "/"
ElseIf InStr(domain(i),"?") Then
path = path & Mid(domain(i),1,InStr(1,domain(i),"?")-1)
If InStr(domain(i),"#") Then
query = Mid(domain(i),InStr(1,domain(i),"?")+1,InStr(1,domain(i),"#")-InStr(1,domain(i),"?")-1)
fragment = Mid(domain(i),InStr(1,domain(i),"#")+1,Len(domain(i))-InStr(1,domain(i),"#"))
path = path & vbcrlf & "Query: " & query & vbCrLf & "Fragment: " & fragment
Else
query = Mid(domain(i),InStr(1,domain(i),"?")+1,Len(domain(i))-InStr(1,domain(i),"?"))
path = path & vbcrlf & "Query: " & query
End If
ElseIf InStr(domain(i),"#") Then
fragment = Mid(domain(i),InStr(1,domain(i),"#")+1,Len(domain(i))-InStr(1,domain(i),"#"))
path = path & Mid(domain(i),1,InStr(1,domain(i),"#")-1) & vbCrLf &_
"Fragment: " & fragment
Else
path = path & domain(i)
End If
Next
parse_url = parse_url & vbCrLf & "Path: " & path
End If
ElseIf InStr(url,":") Then
scheme = Mid(url,1,InStr(1,url,":")-1)
path = Mid(url,InStr(1,url,":")+1,Len(url)-InStr(1,url,":"))
parse_url = parse_url & vbcrlf & "Scheme: " & scheme & vbCrLf & "Path: " & path
Else
parse_url = parse_url & vbcrlf & "Invalid!!!"
End If
End Function
'test the convoluted function :-(
WScript.StdOut.WriteLine parse_url("foo://example.com:8042/over/there?name=ferret#nose")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("ftp://ftp.is.co.za/rfc/rfc1808.txt")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("http://www.ietf.org/rfc/rfc2396.txt#header1")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("mailto:John.Doe@example.com")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("news:comp.infosystems.www.servers.unix")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("tel:+1-816-555-1212")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("telnet://192.0.2.16:80/")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")
WScript.StdOut.WriteLine "-------------------------------"
WScript.StdOut.WriteLine parse_url("this code is messy, long, and needs a makeover!!!")
- Output:
URL: foo://example.com:8042/over/there?name=ferret#nose Scheme: foo Domain: example.com Port: 8042 Path: over/there Query: name=ferret Fragment: nose ------------------------------- URL: jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Scheme: jdbc:mysql Username: test_user Password: ouupppssss Domain: localhost Port: 3306 Path: sakila Query: profileSQL=true ------------------------------- URL: ftp://ftp.is.co.za/rfc/rfc1808.txt Scheme: ftp Domain: ftp.is.co.za Path: rfc/rfc1808.txt ------------------------------- URL: http://www.ietf.org/rfc/rfc2396.txt#header1 Scheme: http Domain: www.ietf.org Path: rfc/rfc2396.txt Fragment: header1 ------------------------------- URL: ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Scheme: ldap Domain: [2001:db8::7] Path: c=GB Query: objectClass=one&objectClass=two ------------------------------- URL: mailto:John.Doe@example.com Scheme: mailto Path: John.Doe@example.com ------------------------------- URL: news:comp.infosystems.www.servers.unix Scheme: news Path: comp.infosystems.www.servers.unix ------------------------------- URL: tel:+1-816-555-1212 Scheme: tel Path: +1-816-555-1212 ------------------------------- URL: telnet://192.0.2.16:80/ Scheme: telnet Domain: 192.0.2.16 Port: 80 Path: ------------------------------- URL: urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Scheme: urn Path: oasis:names:specification:docbook:dtd:xml:4.1.2 ------------------------------- URL: this code is messy, long, and needs a makeover!!! Invalid!!!
Vlang
import net.urllib
const urls = ['jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true',
'ftp://ftp.is.co.za/rfc/rfc1808.txt',
'http://www.ietf.org/rfc/rfc2396.txt#header1',
'ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two',
'mailto:John.Doe@example.com',
'news:comp.infosystems.www.servers.unix',
'tel:+1-816-555-1212',
'telnet://192.0.2.16:80/',
'urn:oasis:names:specification:docbook:dtd:xml:4.1.2',
'foo://example.com:8042/over/there?name=ferret#nose'
]
fn main() {
for url in urls {
u := urllib.parse(url)?
println(u)
print_url(u)
}
}
fn print_url(u urllib.URL) {
println(" Scheme: $u.scheme")
if u.opaque != "" {
println(" Opaque: $u.opaque")
}
if u.str() == '' {
println(" Username: $u.user.username")
if u.user.password != '' {
println(" Password: $u.user.password")
}
}
if u.host != "" {
if u.port() != '' {
println(" Host: ${u.hostname()}")
println(" Port: ${u.port()}")
} else {
println(" Host: $u.host")
}
}
if u.path != "" {
println(" Path: $u.path")
}
if u.raw_query != "" {
println(" RawQuery: $u.raw_query")
m := u.query().data
for q in m {
println(" Key: $q.key Values: $q.value")
}
}
if u.fragment != "" {
println(" Fragment: $u.fragment")
}
}
- Output:
jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Scheme: jdbc Opaque: mysql://test_user:ouupppssss@localhost:3306/sakila RawQuery: profileSQL=true Key: profileSQL Values: true ftp://ftp.is.co.za/rfc/rfc1808.txt Scheme: ftp Host: ftp.is.co.za Path: /rfc/rfc1808.txt http://www.ietf.org/rfc/rfc2396.txt#header1 Scheme: http Host: www.ietf.org Path: /rfc/rfc2396.txt Fragment: header1 ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Scheme: ldap Host: [2001:db8::7] Path: /c=GB RawQuery: objectClass=one&objectClass=two Key: objectClass Values: one Key: objectClass Values: two mailto:John.Doe@example.com Scheme: mailto Opaque: John.Doe@example.com news:comp.infosystems.www.servers.unix Scheme: news Opaque: comp.infosystems.www.servers.unix tel:+1-816-555-1212 Scheme: tel Opaque: +1-816-555-1212 telnet://192.0.2.16:80/ Scheme: telnet Host: 192.0.2.16 Port: 80 Path: / urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Scheme: urn Opaque: oasis:names:specification:docbook:dtd:xml:4.1.2 foo://example.com:8042/over/there?name=ferret#nose Scheme: foo Host: example.com Port: 8042 Path: /over/there RawQuery: name=ferret Key: name Values: ferret Fragment: nose
Wren
... though modified quite a bit.
var urlParse = Fn.new { |url|
var parseUrl = "URL = " + url
var index
if ((index = url.indexOf("//")) && index >= 0 && url[0...index].count { |c| c == ":" } == 1) {
// parse the scheme
var scheme = url.split("//")
parseUrl = parseUrl + "\n" + "Scheme = " + scheme[0][0..-2]
// parse the domain
var domain = scheme[1].split("/")
// check if the domain includes a username, password and port
if (domain[0].contains("@")) {
var cred = domain[0].split("@")
var split = [cred[0], ""]
if (cred[0].contains(".")) {
split = cred[0].split(".")
} else if (cred[0].contains(":")) {
split = cred[0].split(":")
}
var username = split[0]
var password = split[1]
parseUrl = parseUrl + "\n" + "Username = " + username
if (password != "") parseUrl = parseUrl + "\n" + "Password = " + password
// check if the domain has a port
if (cred[1].contains(":")) {
split = cred[1].split(":")
var host = split[0]
var port = ":" + split[1]
parseUrl = parseUrl + "\n" + "Domain = " + host + "\n" + "Port = " + port
} else {
parseUrl = parseUrl + "\n" + "Domain = " + cred[1]
}
} else if (domain[0].contains(":") && !domain[0].contains("[") && !domain[0].contains("]")) {
var split = domain[0].split(":")
var host = split[0]
var port = ":" + split[1]
parseUrl = parseUrl + "\n" + "Domain = " + host + "\n" + "Port = " + port
} else if (domain[0].contains("[") && domain[0].contains("]:")) {
var split = domain[0].split("]")
var host = split[0] + "]"
var port = ":" + split[1][1..-1]
parseUrl = parseUrl + "\n" + "Domain = " + host + "\n" + "Port = " + port
} else {
parseUrl = parseUrl + "\n" + "Domain = " + domain[0]
}
// parse the path if it exists
if (domain.count > 1) {
var path = "/"
for (i in 1...domain.count) {
if (i < domain.count - 1) {
path = path + domain[i] + "/"
} else if (domain[i].contains("?")) {
var split = domain[i].split("?")
path = path + split[0]
if (domain[i].contains("#")) {
var split2 = split[1].split("#")
var query = split2[0]
var fragment = split2[1]
path = path + "\n" + "Query = " + query + "\n" + "Fragment = " + fragment
} else {
var query = split[1]
path = path + "\n" + "Query = " + query
}
} else if (domain[i].contains("#")) {
var split = domain[i].split("#")
var fragment = split[1]
path = path + split[0] + "\n" + "Fragment = " + fragment
} else {
path = path + domain[i]
}
}
parseUrl = parseUrl + "\n" + "Path = " + path
}
} else if (url.contains(":")) {
var index = url.indexOf(":")
var scheme = url[0...index]
parseUrl = parseUrl + "\n" + "Scheme = " + scheme + "\n"
var path = url[index+1..-1]
if (!path.contains("?")) {
parseUrl = parseUrl + "Path = " + path
} else {
var split = path.split("?")
var query = split[1]
parseUrl = parseUrl + "Path = " + split[0] + "\n"
if (!query.contains("#")) {
parseUrl = parseUrl + "Query = " + query
} else {
split = query.split("#")
var fragment = split[1]
parseUrl = parseUrl + "Query = " + split[0] + "Fragment = " + fragment
}
}
} else {
parseUrl = parseUrl + "\n" + "Invalid!!!"
}
System.print(parseUrl)
System.print()
}
var urls = [
"foo://example.com:8042/over/there?name=ferret#nose",
"urn:example:animal:ferret:nose",
"jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt#header1",
"ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"ssh://alice@example.com",
"https://bob:pass@example.com/place",
"http://example.com/?a=1&b=2+2&c=3&c=4&d=\%65\%6e\%63\%6F\%64\%65\%64"
]
for (url in urls) urlParse.call(url)
- Output:
URL = foo://example.com:8042/over/there?name=ferret#nose Scheme = foo Domain = example.com Port = :8042 Path = /over/there Query = name=ferret Fragment = nose URL = urn:example:animal:ferret:nose Scheme = urn Path = example:animal:ferret:nose URL = jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true Scheme = jdbc Path = mysql://test_user:ouupppssss@localhost:3306/sakila Query = profileSQL=true URL = ftp://ftp.is.co.za/rfc/rfc1808.txt Scheme = ftp Domain = ftp.is.co.za Path = /rfc/rfc1808.txt URL = http://www.ietf.org/rfc/rfc2396.txt#header1 Scheme = http Domain = www.ietf.org Path = /rfc/rfc2396.txt Fragment = header1 URL = ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two Scheme = ldap Domain = [2001:db8::7] Path = /c=GB Query = objectClass=one&objectClass=two URL = mailto:John.Doe@example.com Scheme = mailto Path = John.Doe@example.com URL = news:comp.infosystems.www.servers.unix Scheme = news Path = comp.infosystems.www.servers.unix URL = tel:+1-816-555-1212 Scheme = tel Path = +1-816-555-1212 URL = telnet://192.0.2.16:80/ Scheme = telnet Domain = 192.0.2.16 Port = :80 Path = / URL = urn:oasis:names:specification:docbook:dtd:xml:4.1.2 Scheme = urn Path = oasis:names:specification:docbook:dtd:xml:4.1.2 URL = ssh://alice@example.com Scheme = ssh Username = alice Domain = example.com URL = https://bob:pass@example.com/place Scheme = https Username = bob Password = pass Domain = example.com Path = /place URL = http://example.com/?a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64 Scheme = http Domain = example.com Path = / Query = a=1&b=2+2&c=3&c=4&d=%65%6e%63%6F%64%65%64
- Programming Tasks
- Solutions by Programming Task
- String manipulation
- Parser
- Ada
- AWS
- ALGOL 68
- AppleScript
- C sharp
- Crystal
- Elixir
- F Sharp
- Go
- Groovy
- Haskell
- J
- Java
- JavaScript
- Julia
- Kotlin
- Lua
- LuaSocket
- M2000 Interpreter
- Mathematica
- Wolfram Language
- Nim
- Objeck
- Perl
- Phix
- PHP
- PowerShell
- Python
- R
- Racket
- Raku
- Ruby
- Rust
- Scala
- Tcl
- Tcllib
- VBScript
- Vlang
- Wren