XML validation
Given an XML document and an XSD schema definition validate that the document follows the schema described.
C#
<lang csharp> using System; using System.Xml; using System.Xml.Schema; using System.IO;
public class Test { public static void Main() { // your code goes here XmlSchemaSet sc = new XmlSchemaSet(); sc.Add(null, "http://venus.eas.asu.edu/WSRepository/xml/Courses.xsd"); XmlReaderSettings settings = new XmlReaderSettings(); settings.ValidationType = ValidationType.Schema; settings.Schemas = sc; settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallBack); // Create the XmlReader object. XmlReader reader = XmlReader.Create("http://venus.eas.asu.edu/WSRepository/xml/Courses.xml", settings); // Parse the file. while (reader.Read()); // will call event handler if invalid Console.WriteLine("The XML file is valid for the given xsd file"); }
// Display any validation errors. private static void ValidationCallBack(object sender, ValidationEventArgs e) { Console.WriteLine("Validation Error: {0}", e.Message); } } </lang>
F#
Using an inline stylesheet:
<lang fsharp>open System.Xml open System.Xml.Schema open System.IO
let xml = @"<root> <xs:schema id='an-element' targetNamespace='example' xmlns:mstns='example' xmlns='example' xmlns:xs='http://www.w3.org/2001/XMLSchema' attributeFormDefault='unqualified' elementFormDefault='qualified'>
<xs:element name='an-element'> <xs:complexType> <xs:sequence minOccurs='0' maxOccurs='unbounded'> <xs:element name='another-element' nillable='true'> <xs:complexType> <xs:simpleContent> <xs:extension base='xs:string'> <xs:attribute name='an-attribute' form='unqualified' type='xs:boolean' /> </xs:extension> </xs:simpleContent> </xs:complexType> </xs:element> </xs:sequence> </xs:complexType> </xs:element>
</xs:schema> <an-element xmlns='example'>
<another-element an-attribute='false'>...</another-element> <another-element an-attribute='wrong'>123</another-element>
</an-element> </root>"
let validationData withWarnings =
let errors = ref 0 let warnings = ref 0 fun input -> match input with | Some(msg, severity) -> if severity = XmlSeverityType.Error then errors := !errors + 1 printfn "Validation error: %s" msg elif withWarnings then warnings := !warnings + 1 printfn "Validation warning: %s" msg None | None -> if withWarnings then Some(dict[XmlSeverityType.Error, !errors; XmlSeverityType.Warning, !warnings]) else Some(dict[XmlSeverityType.Error, !errors])
[<EntryPoint>] let main argv =
let withWarnings = argv.Length > 0 && argv.[0] = "-w" let vData = validationData withWarnings let validationEvent = new ValidationEventHandler(fun _ e -> vData (Some(e.Message, e.Severity)) |> ignore) let settings = new XmlReaderSettings() settings.ValidationType <- ValidationType.Schema settings.ValidationEventHandler.AddHandler(validationEvent) settings.ValidationFlags <- settings.ValidationFlags ||| XmlSchemaValidationFlags.ProcessInlineSchema ||| XmlSchemaValidationFlags.ReportValidationWarnings let reader = XmlReader.Create(new StringReader(xml), settings); while reader.Read() do () printfn "%A" (Seq.toList (vData None).Value) 0
</lang>
- Output:
>RosettaCode Validation error: The 'an-attribute' element is invalid - The value 'wrong' is invalid according to its datatype 'http://www.w3.org/2001/XMLSchema:boolean' - The string 'wrong' is not a valid boolean value. [[Error, 1]] >RosettaCode -w Validation warning: Could not find schema information for the element 'root'. Validation error: The 'an-attribute' element is invalid - The value 'wrong' is invalid according to its datatype 'http://www.w3.org/2001/XMLSchema:boolean' - The string 'wrong' is not a valid boolean value. [[Error, 1]; [Warning, 1]]
Changing wrong
to a boolean, e. g. true
, The result (without -w) is
[[Error, 0]]
Java
<lang java>import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
import java.net.MalformedURLException; import java.net.URL;
import javax.xml.transform.stream.StreamSource; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import javax.xml.ws.Holder;
import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException;
public class XmlValidation { public static void main(String... args) throws MalformedURLException { URL schemaLocation = new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xsd"); URL documentLocation = new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xml"); if (validate(schemaLocation, documentLocation)) { System.out.println("document is valid"); } else { System.out.println("document is invalid"); } }
// The least code you need for validation public static boolean minimalValidate(URL schemaLocation, URL documentLocation) { SchemaFactory factory = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI); try { Validator validator = factory.newSchema(schemaLocation).newValidator(); validator.validate(new StreamSource(documentLocation.toString())); return true; } catch (Exception e) { return false; } }
// A more complete validator public static boolean validate(URL schemaLocation, URL documentLocation) { SchemaFactory factory = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI); final Holder<Boolean> valid = new Holder<>(true); try { Validator validator = factory.newSchema(schemaLocation).newValidator(); // Get some better diagnostics out validator.setErrorHandler(new ErrorHandler(){ @Override public void warning(SAXParseException exception) { System.out.println("warning: " + exception.getMessage()); }
@Override public void error(SAXParseException exception) { System.out.println("error: " + exception.getMessage()); valid.value = false; }
@Override public void fatalError(SAXParseException exception) throws SAXException { System.out.println("fatal error: " + exception.getMessage()); throw exception; }}); validator.validate(new StreamSource(documentLocation.toString())); return valid.value; } catch (SAXException e) { // Already reported above return false; } catch (Exception e) { // If this is the only thing that throws, it's a gross error System.err.println(e); return false; } } }</lang>
Perl
<lang perl>#!/usr/bin/env perl -T use 5.018_002; use warnings; use Try::Tiny; use XML::LibXML;
our $VERSION = 1.000_000;
my $parser = XML::LibXML->new();
my $good_xml = '<a>5</a>'; my $bad_xml = '<a>5foobar</a>'; my $xmlschema_markup = <<'END'; <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="a" type="xsd:integer"/>
</xsd:schema> END
my $xmlschema = XML::LibXML::Schema->new( string => $xmlschema_markup );
for ( $good_xml, $bad_xml ) {
my $doc = $parser->parse_string($_); try { $xmlschema->validate($doc); } finally { if (@_) { say "Not valid: @_"; } else { say 'Valid'; } };
}</lang>
- Output:
Valid Not valid: unknown-7fe99976a9a0:0: Schemas validity error : Element 'a': Element content is not allowed, because the type definition is simple.
Python
<lang python>#!/bin/python from __future__ import print_function import lxml from lxml import etree
if __name__=="__main__":
parser = etree.XMLParser(dtd_validation=True) schema_root = etree.XML(\ <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <xsd:element name="a" type="xsd:integer"/> </xsd:schema> ) schema = etree.XMLSchema(schema_root)
#Good xml parser = etree.XMLParser(schema = schema) try: root = etree.fromstring("<a>5</a>", parser) print ("Finished validating good xml") except lxml.etree.XMLSyntaxError as err: print (err)
#Bad xml parser = etree.XMLParser(schema = schema) try: root = etree.fromstring("<a>5foobar</a>", parser) except lxml.etree.XMLSyntaxError as err: print (err)</lang>
- Output:
Finished validating good xml Element 'a': Element content is not allowed, because the type definition is simple.
Sidef
<lang ruby>require('XML::LibXML')
func is_valid_xml(str, schema) {
var parser = %O<XML::LibXML>.new var xmlschema = %O<XML::LibXML::Schema>.new(string => schema)
try { xmlschema.validate(parser.parse_string(str)) true } catch { false }
}
var good_xml = '<a>5</a>' var bad_xml = '<a>5foobar</a>'
var xmlschema_markup = <<'END' <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="a" type="xsd:integer"/>
</xsd:schema> END
[good_xml, bad_xml].each { |xml|
say "is_valid_xml(#{xml.dump}) : #{is_valid_xml(xml, xmlschema_markup)}"
}</lang>
- Output:
is_valid_xml("<a>5</a>") : true is_valid_xml("<a>5<b>foobar</b></a>") : false