by Eric van der Vlist is published by O'Reilly & Associates (ISBN: 0596004214)


Assembling the Parts

You have seen the different bits and pieces needed to define and reference patterns. It's time to put them all together and create a complete schema. The first exercise is to define a DTD-like RELAX NG schema that defines each element and its own named pattern.

The full schema might look like this:

<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">

 <start>
  <ref name="element-library"/>
 </start>

<define name="element-library">
 <element name="library">
  <oneOrMore>
   <ref name="element-book"/>
  </oneOrMore>
 </element>
</define>

<define name="element-book">
 <element name="book">
  <attribute name="id"/>
  <attribute name="available"/>
  <ref name="element-isbn"/>
  <ref name="element-title"/>
  <oneOrMore>
   <ref name="element-author"/>
  </oneOrMore>
  <zeroOrMore>
   <ref name="element-character"/>
  </zeroOrMore>
 </element>
</define>

<define name="element-isbn">
 <element name="isbn">
  <text/>
  </element>
</define>

<define name="element-title">
 <element name="title">
  <attribute name="xml:lang"/>
  <text/>
 </element>
</define>

<define name="element-author">
 <element name="author">
  <attribute name="id"/>
  <ref name="element-name"/>
  <optional>
   <ref name="element-born"/>
  </optional>
  <optional>
   <ref name="element-died"/>
  </optional>
 </element>
</define>

<define name="element-name">
 <element name="name">
  <text/>
 </element>
</define>

<define name="element-born">
 <element name="born">
  <text/>
 </element>
</define>
<define name="element-died">
 <element name="died">
  <text/>
 </element>
</define>
<define name="element-character">
 <element name="character">
  <attribute name="id"/>
  <ref name="element-name"/>
  <optional>
   <ref name="element-born"/>
  </optional>
  <ref name="element-qualification"/>
 </element>
</define>
<define name="element-qualification">
 <element name="qualification">
   <text/>
  </element>
 </define>
</grammar>

Or:

grammar{
start = element-library
 
element-library = element library {element-book +}
 
element-book = element book {
    attribute id { text },
    attribute available { text },
    element-isbn,
    element-title,
    element-author+,
    element-character*
   }

element-isbn = element isbn { text }

element-title = element title {
    attribute xml:lang { text },
    text
   }

element-author = element author {
    attribute id { text },
    element-name,
    element-born?,
    element-died?
   }

element-name = element name { text }

element-born = element born { text }

element-died = element died { text }

element-character = element character {
    attribute id { text },
    element-name,
    element-born?,
    element-qualification
   }

element-qualification = element qualification { text }

}

The DTD style just shown is pretty common, and finding the definition of each element in the schema is easy, which is a great advantage. Another popular style, the content-oriented style, defines the content of each element as a separate pattern:

<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
 
 <start>
 <element name="library">
   <ref name="library-content"/>
  </element>
 </start>

 <define name="library-content">
  <oneOrMore>
   <element name="book">
    <ref name="book-content"/>
   </element>
  </oneOrMore>
 </define>

 <define name="book-content">
  <attribute name="id"/>
  <attribute name="available"/>
  <element name="isbn">
   <ref name="isbn-content"/>
  </element>
  <element name="title">
   <ref name="title-content"/>
  </element>
  <oneOrMore>
   <element name="author">
    <ref name="author-content"/>
   </element>
  </oneOrMore>
  <zeroOrMore>
   <element name="character">
    <ref name="character-content"/>
   </element>
  </zeroOrMore>
 </define>

 <define name="isbn-content">
  <text/>
 </define>

 <define name="name-content">
  <text/>
 </define>

 <define name="born-content">
  <text/>
 </define>

 <define name="died-content">
  <text/>
 </define>
  
 <define name="qualification-content">
  <text/>
 </define>

 <define name="title-content">
  <attribute name="xml:lang"/>
  <text/>
 </define>

 <define name="author-content">
  <attribute name="id"/>
  <element name="name">
   <ref name="name-content"/>
  </element>
  <optional>
   <element name="born">
    <ref name="born-content"/>
   </element>
  </optional>
  <optional>
   <element name="died">
    <ref name="died-content"/>
   </element>
  </optional>
 </define>

 <define name="character-content">
  <attribute name="id"/>
  <element name="name">
   <ref name="name-content"/>
  </element>
  <optional>
   <element name="born">
    <ref name="born-content"/>
   </element>
  </optional>
  <element name="qualification">
   <ref name="qualification-content"/>
  </element>
 </define>

</grammar>

Or:

grammar {

start = element library {library-content}

library-content =
  element book { book-content } +

book-content =
     attribute id { text },
     attribute available { text },
     element isbn { isbn-content },
     element title { title-content },
     element author { author-content }+,
     element character { character-content }*

 isbn-content = text

 name-content = text

 born-content = text

 died-content = text

 qualification-content = text

 title-content =
   attribute xml:lang { text },
   text
   
 author-content =
   attribute id { text },
   element name { name-content },
   element born { born-content }?,
   element died { died-content }?
 character-content =
   attribute id { text },
   element name { name-content },
   element born { born-content }?,
   element qualification { qualification-content }
   
 }

As shown in Chapter 12, the style of your schema (Russian doll, DTD-like, or content-oriented, as this last schema) has an impact on its extensibility. The last option (content-oriented) is the most extensible.

Now let's revisit the "bizarre patterns" mentioned in Chapter 2 and shown in Figure 5-3.

Figure 5-3. Bizarre combinations of child content for a group

Bizarre combinations of child content for a group

When you think about it, this case is not so uncommon. When you find it in its original form, it's a muddled mess: there's a first pattern named book-basic with the id attribute and the isbn and title elements, one or more author elements, and an optional character element. There's a second pattern that extends the first one. It's named book-extended and holds the available attribute and zero or more character elements. It's confusing to write, certainly, and difficult to follow. Still, this pattern gives the opportunity to disentangle the web of confusion.

Updating the "DTD-like" flavor of our schema to reflect this instance document is just a matter of splitting up the definition of the book element:

<define name="element-book">
 <element name="book">
   <ref name="book-basic"/>
   <ref name="book-extended"/>
  </element>
 </define>

 <define name="book-basic">
  <attribute name="id"/>
  <ref name="element-isbn"/>
  <ref name="element-title"/>
  <oneOrMore>
   <ref name="element-author"/>
  </oneOrMore>
  <optional>
   <ref name="element-character"/>
  </optional>
 </define>

 <define name="book-extended">
  <attribute name="available"/>
  <zeroOrMore>
   <ref name="element-character"/>
  </zeroOrMore>
 </define>

Or, in the compact syntax:

element-book = element book {
     book-basic,
     book-extended
    }

 book-basic =
     attribute id { text },
     element-isbn,
     element-title,
     element-author+,
     element-character?

 book-extended =
     attribute available { text },
     element-character*

This text is released under the Free Software Foundation GFDL.