The Validation Task

In this section we describe what the validator actually does.

Let's start with an example of a very simple schema, like this:

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
    
 <xs:element name="books">
  <xs:complexType>
   <xs:sequence>
    <xs:element ref="book" maxOccurs="unbounded"/>
   </xs:sequence>
  </xs:complexType>
  <xs:key name="isbn-key">
   <xs:selector xpath="book"/>
   <xs:field xpath="@isbn"/>
  </xs:key>
 </xs:element>

 <xs:element name="book">
  <xs:complexType>
   <xs:sequence>
    <xs:element name="title" type="xs:string"/>
    <xs:element name="publisher" type="xs:string"/>
    <xs:element name="author" type="xs:string" minOccurs="1" maxOccurs="5"/>
    <xs:element name="date" type="xs:gYear"/>
    <xs:element name="price" type="moneyType"/>
   </xs:sequence>
   <xs:attribute name="isbn" type="ISBNType" use="required"/>
   <xs:assert test="if (publisher eq 'McGraw-Hill') then starts-with(@isbn, '007') else 
                    if (publisher eq 'Academic Press') then starts-with(@isbn, '012') 
                    else true()"/>
  </xs:complexType>
 </xs:element>
 
 <xs:complexType name="moneyType">
  <xs:simpleContent>
   <xs:extension base="xs:decimal">
    <xs:attribute name="currency" type="currencyType"/>
   </xs:extension> 
  </xs:simpleContent>
 </xs:complexType>
 
 <xs:simpleType name="currencyType">
  <xs:restriction base="xs:string">
   <xs:enumeration value="USD"/>
   <xs:enumeration value="GBP"/>
   <xs:enumeration value="EUR"/>
   <xs:enumeration value="CAD"/>
  </xs:restriction>
 </xs:simpleType> 
 
 <xs:simpleType name="ISBNType">
  <xs:restriction base="xs:string">
   <xs:pattern value="[0-9]{9}[0-9X]"/>
  </xs:restriction>
 </xs:simpleType>     
 
</xs:schema>

A valid XML instance conforming to this schema might look like this:

<?xml version="1.0" encoding="UTF-8"?>
<books>
    <book isbn="0070491712">
        <title>Apple PASCAL: a hands-on approach</title>
        <author>Arthur Luehrmann</author>
        <author>Herbert Peckham</author>
        <publisher>McGraw-Hill</publisher>
        <date>1981</date>
        <price currency="USD">13.95</price>
    </book>
    <book isbn="0124119700">
        <title>An Introduction to Direct Access Storage Devices</title>
        <author>Hugh Sierra</author>
        <publisher>Academic Press</publisher>
        <date>1990</date>
        <price currency="USD">72.95</price>
    </book>
</books>

The Saxon-EE schema compiler can be invoked to convert the source schema into an SCM file, for example with a command such as:

java com.saxonica.Validate xsd:books.xsd -scmout:xsd

Here is the resulting SCM file:

<?xml version="1.0" encoding="UTF-8"?>
<scm:schema xmlns:scm="http://ns.saxonica.com/schema-component-model"
            generatedAt="2018-06-04T10:39:09.702+01:00"
            xsdVersion="1.1">
   <scm:simpleType id="C0"
                   name="currencyType"
                   base="#string"
                   variety="atomic"
                   primitiveType="#string">
      <scm:enumeration value="EUR"/>
      <scm:enumeration value="CAD"/>
      <scm:enumeration value="USD"/>
      <scm:enumeration value="GBP"/>
   </scm:simpleType>
   <scm:simpleType id="C1"
                   name="ISBNType"
                   base="#string"
                   variety="atomic"
                   primitiveType="#string">
      <scm:pattern value="[0-9]{9}[0-9X]"/>
   </scm:simpleType>
   <scm:complexType id="C2"
                    name="moneyType"
                    base="#decimal"
                    derivationMethod="extension"
                    abstract="false"
                    variety="simple"
                    simpleType="#decimal">
      <scm:attributeUse required="false" inheritable="false" ref="C3"/>
   </scm:complexType>
   <scm:attribute id="C3"
      name="currency"
      type="C0"
      global="false"
      inheritable="false"
      containingComplexType="C2"/>
   <scm:element id="C4"
                name="book"
                type="C5"
                global="true"
                nillable="false"
                abstract="false"/>
   <scm:complexType id="C5"
      base="#anyType"
      derivationMethod="restriction"
      abstract="false"
      variety="element-only">
      <scm:attributeUse required="true" inheritable="false" ref="C9"/>
      <scm:modelGroupParticle minOccurs="1" maxOccurs="1">
         <scm:sequence>
            <scm:elementParticle minOccurs="1" maxOccurs="1" ref="C10"/>
            <scm:elementParticle minOccurs="1" maxOccurs="1" ref="C11"/>
            <scm:elementParticle minOccurs="1" maxOccurs="5" ref="C12"/>
            <scm:elementParticle minOccurs="1" maxOccurs="1" ref="C13"/>
            <scm:elementParticle minOccurs="1" maxOccurs="1" ref="C14"/>
         </scm:sequence>
      </scm:modelGroupParticle>
      <scm:finiteStateMachine initialState="0">
         <scm:state nr="0">
            <scm:edge term="C10" to="1"/>
         </scm:state>
         <scm:state nr="1">
            <scm:edge term="C11" to="2"/>
         </scm:state>
         <scm:state nr="2">
            <scm:edge term="C12" to="3"/>
         </scm:state>
         <scm:state nr="3" minOccurs="1" maxOccurs="5">
            <scm:edge term="C12" to="3"/>
            <scm:edge term="C13" to="4"/>
         </scm:state>
         <scm:state nr="4">
            <scm:edge term="C14" to="5"/>
         </scm:state>
         <scm:state nr="5" final="true"/>
      </scm:finiteStateMachine>
      <scm:assertion xmlns:xs="http://www.w3.org/2001/XMLSchema"
         test="if (publisher eq 'McGraw Hill') then starts-with(@isbn, '007')
                     else if (publisher eq 'Academic Press') then starts-with(@isbn, '012')
                     else true()"
         defaultNamespace=""
         xml:base="file:/Users/mike/Documents/papers/markupuk2018/books.xsd"/>
   </scm:complexType>
   <scm:element id="C6"
                name="books"
                type="C7"
                global="true"
                nillable="false"
                abstract="false">
      <scm:identityConstraint ref="C8"/>
   </scm:element>
   <scm:complexType id="C7"
      base="#anyType"
      derivationMethod="restriction"
      abstract="false"
      variety="element-only">
      <scm:elementParticle minOccurs="1" maxOccurs="unbounded" ref="C4"/>
      <scm:finiteStateMachine initialState="0">
         <scm:state nr="0">
            <scm:edge term="C4" to="1"/>
         </scm:state>
         <scm:state nr="1" final="true">
            <scm:edge term="C4" to="2"/>
         </scm:state>
         <scm:state nr="2" final="true">
            <scm:edge term="C4" to="2"/>
         </scm:state>
      </scm:finiteStateMachine>
   </scm:complexType>
   <scm:key id="C8" name="isbn-key" targetNamespace="">
      <scm:selector xmlns:xs="http://www.w3.org/2001/XMLSchema"
                    xpath="book"
                    defaultNamespace=""/>
      <scm:field xmlns:xs="http://www.w3.org/2001/XMLSchema"
                 xpath="@isbn"
                 defaultNamespace=""
                 type="#string"/>
   </scm:key>
   <scm:attribute id="C9"
      name="isbn"
      type="C1"
      global="false"
      inheritable="false"
      containingComplexType="C5"/>
   <scm:element id="C10"
                name="title"
                type="#string"
                global="false"
                containingComplexType="C5"
                nillable="false"
                abstract="false"/>
   <scm:element id="C11"
                name="publisher"
                type="#string"
                global="false"
                containingComplexType="C5"
                nillable="false"
                abstract="false"/>
   <scm:element id="C12"
                name="author"
                type="#string"
                global="false"
                containingComplexType="C5"
                nillable="false"
                abstract="false"/>
   <scm:element id="C13"
      name="date"
      type="#gYear"
      global="false"
      containingComplexType="C5"
      nillable="false"
      abstract="false"/>
   <scm:element id="C14"
                name="price"
                type="C2"
                global="false"
                containingComplexType="C5"
                nillable="false"
                abstract="false"/>
</scm:schema>

Let's look briefly at what this contains. The children of the scm:schema element represent different schema components such as element declarations, attribute declarations, simple and complex types, each with a unique identifier. For convenience I've rearranged these in order of the component identifier (the actual order doesn't matter).

Given this schema and this instance document, the task of the validator is to produce an empty validation report showing that there are no errors. The validation report becomes more interesting if the instance is invalid. For example, we can use this command:

java com.saxonica.Validate -xsd:books.scm -s:books-invalid.xml -report:report.xml

to validate this invalid instance:

<?xml version="1.0" encoding="UTF-8"?>
<books>
    <book isbn="0070491712">
        <title>Apple PASCAL: a hands-on approach</title>
        <publisher>McGraw-Hill</publisher>
        <date>1981</date>
        <price currency="NZD">13.95</price>
    </book>
    <book isbn="0134119700">
        <title>An Introduction to Direct Access Storage Devices</title>
        <author>Hugh Sierra</author>
        <publisher>Academic Press</publisher>
        <date>1990-04</date>
        <price currency="USD">72.95</price>
    </book>
</books>

and the result is the following report:

<?xml version="1.0" encoding="UTF-8"?>
<validation-report xmlns="http://saxon.sf.net/ns/validation"
                   system-id="file:/Users/mike/Documents/papers/markupuk2018/
                   books-invalid.xml">
   <error line="6"
          column="15"
          path="/Q{}books[1]/Q{}book[1]/Q{}date[1]"
          xsd-part="1"
          constraint="cvc-complex-type.2.4">In content of element &lt;book&gt;: The 
            content model does not allow element &lt;Q{}date&gt; to appear immediately 
            after element &lt;publisher&gt;. No further elements are allowed at 
            this point. </error>
   <error line="7"
          column="31"
          path="/Q{}books[1]/Q{}book[1]/Q{}price[1]/@currency"
          xsd-part="2"
          constraint="cvc-complex-type.3">Value "NZD" contravenes the enumeration 
          facet "EUR, USD, CAD, GBP" of the type Q{}currencyType</error>
   <error line="11"
          column="17"
          path="/Q{}books[1]/Q{}book[2]/Q{}author[1]"
          xsd-part="1"
          constraint="cvc-complex-type.2.4">In content of element &lt;book&gt;: 
            The content model does not allow element &lt;Q{}author&gt; to appear 
            immediately after element &lt;title&gt;. No further elements are allowed 
            at this point. </error>
   <error line="13"
          column="15"
          path="/Q{}books[1]/Q{}book[2]/Q{}date[1]"
          xsd-part="2"
          constraint="cvc-datatype-valid.1">The content "1990-04" of element &lt;date&gt;
            does not match the required simple type. Cannot convert '1990-04' to a 
            gYear</error>
   <error line="9"
          column="29"
          path="/Q{}books[1]/Q{}book[2]"
          xsd-part="1"
          constraint="sec-cvc-assertion.0">Element book does not satisfy assertion 
            if (publisher eq 'McGraw Hill') then starts-with(@isbn, '007') else 
            if (publisher eq 'Academic Press') then starts-with(@isbn, '012') 
            else true()</error>
   <meta-data>
      <validator name="SAXON-EE" version="9.9.0.1"/>
      <results errors="5" warnings="0"/>
      <schema file="books.scm" xsd-version="1.1"/>
      <run at="2018-06-04T11:12:24.651+01:00"/>
   </meta-data>
</validation-report>

The report shown here comes from the existing Saxon-EE validator written in Java. Our task is to reproduce this report with a validator written entirely in portable XSLT.