XML/XPath: Difference between revisions

Content added Content deleted

Inline

Revision as of 18:07, 6 November 2009

Perform the following three XPath queries on the XML Document below:

Retrieve the first "item" element
Perform an action on each "price" element (print it out)
Get an array of all the "name" elements

XML Document:

<inventory title="OmniCorp Store #45x10^3">
  
    <item upc="123456789" stock="12">
      <name>Invisibility Cream</name>
      <price>14.50</price>
      <description>Makes you invisible</description>
    </item>
    <item upc="445322344" stock="18">
      <name>Levitation Salve</name>
      <price>23.99</price>
      <description>Levitate yourself for up to 3 hours per application</description>
    </item>
  
  
    <item upc="485672034" stock="653">
      <name>Blork and Freen Instameal</name>
      <price>4.95</price>
      <description>A tasty meal in a tablet; just add water</description>
    </item>
    <item upc="132957764" stock="44">
      <name>Grob winglets</name>
      <price>3.56</price>
      <description>Tender winglets of Grob. Just add water</description>
    </item>
  
</inventory>

AutoHotkey

With regular expressions <lang AutoHotkey> FileRead, inventory, xmlfile.xml

RegExMatch(inventory, "<item.*?</item>", item1) MsgBox % item1

pos = 1 While, pos := RegExMatch(inventory, "<price>(.*?)</price>", price, pos + 1)

 MsgBox % price1

While, pos := RegExMatch(inventory, "<name>.*?</name>", name, pos + 1)

 names .= name . "`n"

MsgBox % names </lang>

Library: AHK XPath

Include xpath.ahk

xpath_load(doc, "xmlfile.xml")

Retrieve the first "item" element

MsgBox % xpath(doc, "/inventory/section[1]/item[1]/text()")

Perform an action on each "price" element (print it out)

prices := xpath(doc, "/inventory/section/item/price/text()") Loop, Parse, prices,`,

 reordered .= A_LoopField "`n"

MsgBox % reordered

Get an array of all the "name" elements

MsgBox % xpath(doc, "/inventory/section/item/name") </lang>

C#

<lang csharp> XmlReader XReader;

 // Either read the xml from a string ...
 XReader = XmlReader.Create(new StringReader("<inventory title=... </inventory>"));

 // ... or read it from the file system.
 XReader = XmlReader.Create("xmlfile.xml");

 // Create a XPathDocument object (which implements the IXPathNavigable interface)
 // which is optimized for XPath operation. (very fast).
 IXPathNavigable XDocument = new XPathDocument(XReader);

 // Create a Navigator to navigate through the document.
 XPathNavigator Nav = XDocument.CreateNavigator();
 Nav = Nav.SelectSingleNode("//item");

 // Move to the first element of the selection. (if available).
 if(Nav.MoveToFirst())
 {
   Console.WriteLine(Nav.OuterXml); // The outer xml of the first item element.
 }

 // Get an iterator to loop over multiple selected nodes.
 XPathNodeIterator Iterator = XDocument.CreateNavigator().Select("//price");

 while (Iterator.MoveNext())
 {
   Console.WriteLine(Iterator.Current.Value);
 }

 Iterator = XDocument.CreateNavigator().Select("//name");

 // Use a generic list.
 List<string> NodesValues = new List<string>();

 while (Iterator.MoveNext())
 {
   NodesValues.Add(Iterator.Current.Value);
 }

 // Convert the generic list to an array and output the count of items.
 Console.WriteLine(NodesValues.ToArray().Length);</lang>

" "<item upc=\"123456789\" stock=\"12\">" "<name>Invisibility Cream</name>" "<price>14.50</price>" "<description>Makes you invisible</description>" "</item>" "<item upc=\"445322344\" stock=\"18\">" "<name>Levitation Salve</name>" "<price>23.99</price>" "<description>Levitate yourself for up to 3 hours per application</description>" "</item>" "

" "

" "<item upc=\"485672034\" stock=\"653\">" "<name>Blork and Freen Instameal</name>" "<price>4.95</price>" "<description>A tasty meal in a tablet; just add water</description>" "</item>" "<item upc=\"132957764\" stock=\"44\">" "<name>Grob winglets</name>" "<price>3.56</price>" "<description>Tender winglets of Grob. Just add water</description>" "</item>" "

"

     "</inventory>" ) ;
  std::string::size_type found = xmltext.find( "<item" , 0 ) ; //beginning of first item
  std::string::size_type foundnext = xmltext.find(  "</item>" , found + 5 ) ; //and its end
  std::cout << "The first item is\n" << xmltext.substr( found + 5 , foundnext - ( found + 5 ) ) << '\n' ;
  std::string::const_iterator start , end ;
  start = xmltext.begin( ) ;
  end = xmltext.end( ) ;
  boost::match_results<std::string::const_iterator> what ;
  boost::regex pricefind( "<price>(\\d+\\.?\\d+)</price>" ) ;//this regex finds the prices
  start = xmltext.begin( ) ;
  std::cout << "The prices are:\n" ;
  while ( boost::regex_search( start , end , what , pricefind ) ) { 
     std::string price( what[ 1 ].first , what[ 1 ].second ) ;//find the first price
     std::cout << price << std::endl ;                        
     start = what[ 1 ].second ;                               //continue search after first price found
  }
  start = xmltext.begin( ) ;
  std::vector<std::string> names ;
  boost::regex namefind( "<name>(.+?)</name>" ) ;            //find characters, be greedy!
  while ( boost::regex_search ( start , end , what , namefind ) ) {
     std::string name ( what[ 1 ].first , what[ 1 ].second ) ;
     names.push_back( name ) ;
     start = what[ 1 ].second ;
  }
  std::cout << "The following name elements were found in the xml string:\n" ;
  std::copy( names.begin( ) , names.end( ) , std::ostream_iterator<std::string>( std::cout , "\n" )) ;
  return 0 ;

} </lang>

ColdFusion

 <cfsavecontent variable="xmlString">
 <inventory
 ...
 </inventory>
 </cfsavecontent>
 <cfset xml = xmlParse(xmlString)>
 <!--- First Task --->
 <cfset itemSearch = xmlSearch(xml, "//item")>
 <!--- item = the first Item (xml element object) --->
 <cfset item = itemSearch[1]>
 <!--- Second Task --->
 <cfset priceSearch = xmlSearch(xml, "//price")>
 <!--- loop and print each price --->
 <cfloop from="1" to="#arrayLen(priceSearch)#" index="i">
   #priceSearch[i].xmlText#<br/>
 </cfloop>
 <!--- Third Task --->
 <!--- array of all the name elements --->
 <cfset names = xmlSearch(xml, "//name")>
 <!--- visualize the results --->
 <cfdump var="#variables#">

D

It is important to note that the KXML library currently only supports XPath minimally.

Library: KXML

<lang d> import kxml.xml; char[]xmlinput = "<inventory title=\"OmniCorp Store #45x10^3\">

 
   <item upc=\"123456789\" stock=\"12\">
     <name>Invisibility Cream</name>
     <price>14.50</price>
     <description>Makes you invisible</description>
   </item>
   <item upc=\"445322344\" stock=\"18\">
     <name>Levitation Salve</name>
     <price>23.99</price>
     <description>Levitate yourself for up to 3 hours per application</description>
   </item>
 
 
   <item upc=\"485672034\" stock=\"653\">
     <name>Blork and Freen Instameal</name>
     <price>4.95</price>
     <description>A tasty meal in a tablet; just add water</description>
   </item>
   <item upc=\"132957764\" stock=\"44\">
     <name>Grob winglets</name>
     <price>3.56</price>
     <description>Tender winglets of Grob. Just add water</description>
   </item>

</inventory> "; void main() {

       auto root = readDocument(xmlinput);
       auto firstitem = root.parseXPath("inventory/section/item")[0];
       foreach(price;root.parseXPath("inventory/section/item/price")) {
               std.stdio.writefln("%s",price.getCData);
       }
       auto namearray = root.parseXPath("inventory/section/item/name");

}

</lang>

Groovy

<lang groovy>def inventory = new XmlSlurper().parseText("<inventory...") //optionally parseText(new File("inv.xml").text) def firstItem = inventory.section.item[0] //1. first item inventory.section.item.price.each { println it } //2. print each price def allNamesArray = inventory.section.item.name.collect {it} //3. collect item names into an array</lang>

JavaScript

Works with: Firefox version 2.0

<lang javascript> //create XMLDocument object from file

var xhr = new XMLHttpRequest();
xhr.open('GET', 'file.xml', false);
xhr.send(null);
var doc = xhr.responseXML;

//get first <item> element
var firstItem = doc.evaluate( '//item[1]', doc, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue;
alert( firstItem.textContent );

//output contents of <price> elements
var prices = doc.evaluate( '//price', doc, null, XPathResult.ANY_TYPE, null );
for( var price = prices.iterateNext(); price != null; price = prices.iterateNext() ) {
  alert( price.textContent );
}

//add <name> elements to array
var names = doc.evaluate( '//name', doc, null, XPathResult.ANY_TYPE, null);
var namesArray = [];
for( var name = names.iterateNext(); name != null; name = names.iterateNext() ) {
  namesArray.push( name );
}
alert( namesArray );</lang>

Although some browsers support XPath, working with XML is much easier with E4X.

<lang javascript> //create XML object from file

var xhr = new XMLHttpRequest();
xhr.open('GET', 'file.xml', false);
xhr.send(null);
var doc = new XML(xhr.responseText);

//get first <item> element
var firstItem = doc..item[0];
alert( firstItem );

//output contents of <price> elements
for each( var price in doc..price ) {
  alert( price );
}

//add <name> elements to array
var names = [];
for each( var name in doc..name ) {
  names.push( name );
}
alert( names );</lang>

Perl

Library: XML::XPath XPath

<lang perl> use XML::XPath qw();

my $x = XML::XPath->new('<inventory ... </inventory>');

[$x->findnodes('//item[1]')->get_nodelist]->[0];
print $x->findnodes_as_string('//price');
$x->findnodes('//name')->get_nodelist;</lang>

PHP

 <?php
 //PHP5 only example due to changes in XML extensions between version 4 and 5 (Tested on PHP5.2.0)
 $doc = DOMDocument::loadXML('<inventory title="OmniCorp Store #45x10^3">...</inventory>');
 //Load from file instead with $doc = DOMDocument::load('filename');
 $xpath = new DOMXPath($doc);
 /* 
     1st Task: Retrieve the first "item" element
 */
 $nodelist = $xpath->query('//item');
 $result = $nodelist->item(0);
 /* 
     2nd task: Perform an action on each "price" element (print it out)
 */
 $nodelist = $xpath->query('//price');
 for($i = 0; $i < $nodelist->length; $i++)
 {
   //print each price element in the DOMNodeList instance, $nodelist, as text/xml followed by a newline
   print $doc->saveXML($nodelist->item($i))."\n";
 }
 /* 
     3rd Task: Get an array of all the "name" elements
 */
 $nodelist = $xpath->query('//name');
 //our array to hold all the name elements, though in practice you'd probably not need to do this and simply use the DOMNodeList
 $result = array(); 
 //a different way of iterating through the DOMNodeList
 foreach($nodelist as $node)
 {
   $result[] = $node; 
 }

</lang>

Python

Python has basic xml parsing built in

from xml.dom import minidom

xmlfile = file("test3.xml") # load xml document from file xmldoc = xmldom.parse(xmlfile).documentElement # parse from file stream or... xmldoc = xmldom.parseString("<inventory title="OmniCorp Store #45x10^3">...</inventory>").documentElement # alternatively, parse a string

1st Task: Retrieve the first "item" element

i = xmldoc.getElementsByTagName("item") # get a list of all "item" tags firstItemElement = i[0] # get the first element

2nd task: Perform an action on each "price" element (print it out)

for j in xmldoc.getElementsByTagName("price"): # get a list of all "price" tags print j.childNodes[0].data # XML Element . TextNode . data of textnode

3rd Task: Get an array of all the "name" elements

namesArray = xmldoc.getElementsByTagName("name") </lang>

R

Library: XML (R)

1. Require the XML package you can download from http://www.omegahat.org/RSXML/

library("XML") doc <- xmlInternalTreeParse("test3.xml")

1st Task: Retrieve the first "item" element

(firstItemElement <- getNodeSet(doc, "//item")1)

2nd task: Perform an action on each "price" element (print it out)

prices <- sapply(getNodeSet(doc, "//price"), xmlValue) for(i in 1:length(prices)) print(prices[i])

3rd Task: Get an array of all the "name" elements

(namesArray <- sapply(getNodeSet(doc, "//name"), xmlValue))

</lang>

Ruby

Library: REXML

<lang ruby> #Example taken from the REXML tutorial (http://www.germane-software.com/software/rexml/docs/tutorial.html)

 require "rexml/document"
 include REXML
 #create the REXML Document from the string (%q is Ruby's multiline string, everything between the two @-characters is the string)
 doc = Document.new(
         %q@<inventory title="OmniCorp Store #45x10^3">
              ...
            </inventory>
           @
                           )
 # The invisibility cream is the first <item>
 invisibility = XPath.first( doc, "//item" ) 
 # Prints out all of the prices
 XPath.each( doc, "//price") { |element| puts element.text }
 # Gets an array of all of the "name" elements in the document.
 names = XPath.match( doc, "//name" )</lang>

Scala

The problem description isn't clear on whether what is wanted is the element or the text. Because of that, I'm doing both except for the first problem.

The commands are being inputted into Scala's REPL, to better show the results.

<lang scala> scala> val xml = <inventory title="OmniCorp Store #45x10^3">

    |   
    |     <item upc="123456789" stock="12">
    |       <name>Invisibility Cream</name>
    |       <price>14.50</price>
    |       <description>Makes you invisible</description>
    |     </item>
    |     <item upc="445322344" stock="18">
    |       <name>Levitation Salve</name>
    |       <price>23.99</price>
    |       <description>Levitate yourself for up to 3 hours per application</description>
    |     </item>
    |   
    |   
    |     <item upc="485672034" stock="653">
    |       <name>Blork and Freen Instameal</name>
    |       <price>4.95</price>
    |       <description>A tasty meal in a tablet; just add water</description>
    |     </item>
    |     <item upc="132957764" stock="44">
    |       <name>Grob winglets</name>
    |       <price>3.56</price>
    |       <description>Tender winglets of Grob. Just add water</description>
    |     </item>
    |   
    | </inventory>

xml: scala.xml.Elem = <inventory title="OmniCorp Store #45x10^3">

        
          <item upc="123456789" stock="12">
            <name>Invisibility Cream</name>
            <price>14.50</price>
            <description>Makes you invisible</description>
          </item>
          <item upc="445322344" stock="18">
            <name>Levitation Salve</name>
            <price>23.99</price>
            <description>Levitate yourself for up to 3 hours per application</description>
          </item>
        
        
          <item upc="485672034" stock="653">
            <name>Blork and Freen Instameal</name>
            <price>4.95</price>
            <description>A tasty meal in a tablet; just add water</description>
          </item>
          <item upc="132957764" stock="44">
            <name>Grob winglets</name>
            <price>3.56</price>
            <description>Tender winglets of Grob. Just add water</description>
          </item>
        
      </inventory>

scala> val firstItem = xml \\ "item" head firstItem: scala.xml.Node = <item upc="123456789" stock="12">

            <name>Invisibility Cream</name>
            <price>14.50</price>
            <description>Makes you invisible</description>
          </item>

scala> xml \\ "price" foreach println <price>14.50</price> <price>23.99</price> <price>4.95</price> <price>3.56</price>

scala> xml \\ "price" map (_ text) foreach println 14.50 23.99 4.95 3.56

scala> val elements = xml \\ "name" toArray elements: Array[scala.xml.Node] = Array(<name>Invisibility Cream</name>, <name>Levitation Salve</name>, <name>Blork and Freen Instameal</name>, <name>Grob winglets</name>)

scala> val values = xml \\ "name" map (_ text) toArray values: Array[String] = Array(Invisibility Cream, Levitation Salve, Blork and Freen Instameal, Grob winglets) </lang>

Tcl

Library: tDOM

<lang tcl># assume $xml holds the XML data package require tdom set doc [dom parse $xml] set root [$doc documentElement]

set allNames [$root selectNodes //name] puts [llength $allNames] ;# ==> 4

set firstItem [lindex [$root selectNodes //item] 0] puts [$firstItem @upc] ;# ==> 123456789

foreach node [$root selectNodes //price] {

   puts [$node text]

}</lang>

Visual Basic .NET

       Dim first_item = xml.XPathSelectElement("//item")
       Console.WriteLine(first_item)

       For Each price In xml.XPathSelectElements("//price")
           Console.WriteLine(price.Value)
       Next

       Dim names = (From item In xml.XPathSelectElements("//name") Select item.Value).ToArray

XQuery

(:

 1. Retrieve the first "item" element  
 Notice the braces around //item. This evaluates first all item elements and then retrieving the first one.
 Whithout the braces you get the first item for every section.

)

let $firstItem := (//item)[1]

(: 2. Perform an action on each "price" element (print it out) :) let $price := //price/data(.)

(: 3. Get an array of all the "name" elements :) let $names := //name

return

 <result>
   <firstItem>{$firstItem}</firstItem>
   <prices>{$price}</prices>
   <names>{$names}</names>
 </result>

</lang>

Performing this XQuery on the given input document results in <lang> <?xml version="1.0" encoding="UTF-8"?> <result>

  <firstItem>
     <item upc="123456789" stock="12">
        <name>Invisibility Cream</name>
        <price>14.50</price>
        <description>Makes you invisible</description>
     </item>
  </firstItem>
  <prices>14.50 23.99 4.95 3.56</prices>
  <names>
     <name>Invisibility Cream</name>
     <name>Levitation Salve</name>
     <name>Blork and Freen Instameal</name>
     <name>Grob winglets</name>
  </names>

</result>

</lang>

XSLT

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text" />
  <xsl:template match="/">
  
    <!-- 1. first item element -->
    <xsl:text>
The first item element is</xsl:text>
    <xsl:value-of select="//item[1]" />
    
    <!-- 2. Print each price element -->
    <xsl:text>
The prices are: </xsl:text>
    <xsl:for-each select="//price">
      <xsl:text>
      </xsl:text>
      <xsl:copy-of select="." />
    </xsl:for-each>
    
    <!-- 3. Collect all the name elements -->
    <xsl:text>
The names are: </xsl:text>
    <xsl:copy-of select="//name" />
  </xsl:template>  
</xsl:stylesheet>