DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

RELAX NG Long Grammar

06.26.2008
| 2116 views |
  • submit to reddit
        I believe both this Relax-NG grammar and the shorter one were both derived from the specification, however, IIRC, there were two grammars in the spec, this one was designed to make it easier to convert to XML.

#!/usr/bin/env tt
# Author::    Daniel Brumbaugh Keeney (http://rubyforge.org/users/db-keen)
# Copyright:: 2008 Daniel Brumbaugh Keeney
# License::   GPLv3+
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# http://relaxng.org/compact-20021121.html#formal-syntax
# http://relaxng.org/tutorial-20011203.html
grammar RelaxNGfromSpec

  include XmlNameSpaces

  rule top_level
    decl* top_level_body
  end

  rule decl
    "namespace" namespace_prefix  "="  namespace_uri_literal
  / "default"  "namespace"  namespace_prefix?  "="  namespace_uri_literal
  / "datatypes"  datatype_prefix:identifier_or_keyword  "="  literal
  end

  rule namespace_prefix
    !( "xmlns" space ) identifier_or_keyword
  end

  rule namespace_uri_literal
    literal /  "inherit"
  end

  rule top_level_body
    pattern / grammar { "<grammar>" x "</grammar>" }
  end

  rule grammar
    member*
  end

  rule member
    annotated_component / annotation_element_not_keyword
  end

  rule annotated_component
    annotations component { applyAnnotations(x, y) }
  end

  rule component
    start / define / include / div
  end

  rule start
    "start" assign_op  pattern
        { <start x> y </start> }
  end

  rule define
    identifier assign_op pattern
        { <define name=x y> z </define> }
  end

  rule assign_op
      "="
    / "|=" { attribute(name("", "combine"), "choice") }
    / "&=" { attribute(name("", "combine"), "interleave") }
  end

  rule include
    "include"  any_uri_literal  opt_inherit optIncludeBody
        { <include href=mapSchemaRef(environment, x) y> z </include> }
  end

  rule any_uri_literal
    literal
  end

  rule opt_inherit
    ε
        { makeNsAttribute(lookupDefault(environment)) }
    /  "inherit"  "="  identifier_or_keyword
        { makeNsAttribute(lookupPrefix(environment, x)) }
  end

  rule optIncludeBody
    ( "{"  includeBody  "}" )?
  end

  rule includeBody
    ( annotated_include_component / annotation_element_not_keyword )*
  end

  rule annotated_include_component
    annotations include_component:( start / define / include_div ) { applyAnnotations(x, y) }
  end

  rule div
    "div" "{" grammar "}" { <div> x </div> }
  end

  rule include_div
    "div" "{" includeBody "}" { <div> x </div> }
  end

  rule pattern
       particle  ( "|"  particle )+
        { applyAnnotations(anno, <choice> x </choice>) }
    /  particle  ( ","  particle )+
        { applyAnnotations(anno, <group> x </group>) }
    /   particle  ( "&"  particle )+
        { applyAnnotations(anno, <interleave> x </interleave>) }
    /  annotated_data_except
        { applyAnnotationsGroup(anno, x) }
    /  particle
  end

  rule particle
      annotated_primary { applyAnnotationsGroup(anno, x) }
    / repeatedPrimary follow_annotations
        { (applyAnnotations(anno, x), y) }
  end

  # !TODO: run benchmarks to see if it's better to handle these in
  # the grammar, or via case node.text_value
  rule repeatedPrimary
    #  annotated_primary "*" { <zeroOrMore> x </zeroOrMore> }
    #/  annotated_primary "+" { <oneOrMore> x </oneOrMore> }
    #/  annotated_primary "?" { <optional> x </optional> }
    annotated_primary [*+?]
  end

  rule annotated_primary
    lead_annotated_primary follow_annotations
  end

  rule annotated_data_except
    lead_annotated_data_except follow_annotations
  end

  rule lead_annotated_data_except
    annotations data_except { applyAnnotations(x, y) }
  end

  rule lead_annotated_primary
      annotations primary { applyAnnotations(x, y) }
    / annotations "(" pattern ")"
  end

  rule primary
    "element"  name_class "{"  pattern  "}"
        { <element> x y </element> }
    /  "attribute" name_class "{"  pattern  "}"
        { <attribute> x y </attribute> }
    /  "mixed"  "{"  pattern  "}"  { <mixed> x </mixed> }
    /  "list"  "{"  pattern  "}"   { <list>  x </list> }
    /  datatype_name  optParams     { <data x> y </data> }
    /  datatype_name  datatype_value { <value x> y </value> }
    /  datatype_value               { <value> x </value> }
    /  "empty" { <empty/> }
    /  "notAllowed" { <notAllowed/> }
    /  "empty" { <text/> }
    /  ref { <ref name=x/> }
    /  "parent" ref { <parentRef name=x/> }
    /  "grammar"  "{"  grammar  "}" { <grammar> x </grammar> }
    /  "external" any_uri_literal opt_inherit
        { <externalRef href=mapSchemaRef(x) y/> }
  end

  rule data_except
    datatype_name  optParams  "-"  lead_annotated_primary
        { <data x> y <except> z </except> </data> }
  end

  rule ref
    identifier
  end

  rule datatype_name
      CName
        { datatypeAttributes(lookupDatatypePrefix(environment, prefix(x)), localPart(x)) }
    / "string"
        { datatypeAttributes("", "string") }
    / "token"
        { datatypeAttributes("", "token") }
  end

  rule datatype_value
    literal
  end

  rule optParams
    ( "{"  param+  "}" )?
  end

  rule param
    annotations  identifier_or_keyword  "="  literal
        { applyAnnotations(x, <param name=y> z </param>) }
  end

  rule name_class
    inner_name_class
  end

  rule inner_name_class
    annotated_simple_name_class
        { applyAnnotationsChoice(anno, x) }
    /  name_class_choice
        { applyAnnotations(anno, <choice> x </choice>) }
    /  annotated_except_name_class
        { applyAnnotationsChoice(anno, x) }
  end

  rule name_class_choice
    annotated_simple_name_class  "|"  annotated_simple_name_class
    /  annotated_simple_name_class  "|"  name_class_choice
  end

  rule annotated_except_name_class
    lead_annotated_except_name_class  follow_annotations

  end

  rule lead_annotated_except_name_class
    annotations  except_name_class
        { applyAnnotations(x, y) }
  end

  rule annotated_simple_name_class
    lead_annotated_simple_name_class  follow_annotations
  end

  rule lead_annotated_simple_name_class
    annotations  simple_name_class
        { applyAnnotations(x, y) }
    /  annotations  "("  inner_name_class(anno := x)y  ")"
  end

  rule except_name_class
    nsName "-"  lead_annotated_simple_name_class
        { <nsName makeNsAttribute(lookupPrefix(environment, x))> <except> y </except> </nsName> }
    /  "*"  "-"  lead_annotated_simple_name_class
        { <anyName> <except> x </except> </anyName> }
  end

  rule simple_name_class
    identifier_or_keyword
        { <name makeNsAttribute(isElem ? lookupDefault(environment) : "")> x </name> }
    /  CName
        { <name makeNsAttribute(lookupPrefix(environment, prefix(x)))> localPart(x) </name> }
    /  nsName
        { <nsName makeNsAttribute(lookupPrefix(environment, x))/> }
    /  "*"
        { <anyName/> }
  end

  rule follow_annotations
    ( ">>"  annotation_element )*
  end

  rule annotations
    documentations
    ( "["  annotation_attributes annotation_element* "]" )?
  end

  rule annotation_attributes
    ε
    /  foreign_attribute_name:prefixed_name "="  literal annotation_attributes
        { (attribute(x, y), z) }
  end

  rule annotation_element
    foreign_element_name annotation_attributes_content
        { element(environment, x, y) }
  end

  rule foreign_element_name
    identifier_or_keyword
        { name("", x) }
    /  prefixed_name
  end

  rule annotation_element_not_keyword
    foreign_element_nameNotKeyword annotation_attributes_content
        { element(environment, x, y) }
  end

  rule foreign_element_nameNotKeyword
    identifier
        { name("", x) }
    /  prefixed_name
  end

  rule annotation_attributes_content
    "["  nested_annotation_attributes annotationContent "]"
  end

  rule nested_annotation_attributes
    ε
    /  anyAttributeName "="  literal nested_annotation_attributes
        { (attribute(x, y), z) }
  end

  rule anyAttributeName
    identifier_or_keyword
        { name("", x) }
    /  prefixed_name
  end

  rule annotationContent
    ( nested_annotation_element /  literal )*
  end

  rule nested_annotation_element
    any_element_name annotation_attributes_content
        { element(environment, x, y) }
  end

  rule any_element_name
    identifier_or_keyword / prefixed_name
  end

  rule prefixed_name
    CName
        { name(lookupPrefix(environment, prefix(x)), localPart(x)) }
  end

  rule documentations
    documentation*
        { (element(environment, documentationElementName(), text(x)), y) }
  end

  rule identifier_or_keyword
    NCName / keyword / "\" NCName
  end

  rule keyword
    "attribute"
    /  "default"
    /  "datatypes"
    /  "div"
    /  "element"
    /  "empty"
    /  "external"
    /  "grammar"
    /  "include"
    /  "inherit"
    /  "list"
    /  "mixed"
    /  "namespace"
    /  "notAllowed"
    /  "parent"
    /  "start"
    /  "string"
    /  "text"
    /  "token"
  end

  rule literal
    literal_segment ( "~"  literal )?
  end

  rule escape_sequence
    "\"  'x'+  '{' code:[0123456789ABCDEFabcdef]+ '}'
  end

  rule identifier
    NCName - keyword /  "\"  NCName
  end

  rule CName
    NCName ":"  NCName { qName(x, y) }
  end

  rule nsName
    NCName ":*"
  end

  rule literal_segment
      '"' contents:( !["\n] . )*  '"'
    / "'"  contents:( ['\n] . )* "'"
    / '"""'  contents:( !'"""' . )*  '"""'
    / "'''"  contents:( !"'''" . )*  "'''"
  end

  # !CHECK: is this right?
  rule documentation
    #documentation_line / documentation documentation_continuation
    documentation_line documentation_continuation*
  end

  rule documentation_line
    "##" '#'* ' '? contents:rest_of_line
  end

  rule documentation_continuation
    [\n\x0A] indent:[\x09\x20]* documentation_line
  end

  rule rest_of_line
    !( [\n\x0A]* . )
  end

  rule separator
    [\x09\x0A\x20\n] / "#" ( ![\n\x0A#] . rest_of_line )?
  end

end