DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

RELAX NG Short Grammar

06.26.2008
| 2872 views |
  • submit to reddit
        I believe both this Relax-NG grammar and the other one were both derived from the specification, however, IIRC, there were two grammars in the spec, the longer one was designed to make it easier to convert to XML.

#!/usr/bin/env tt
# Author::    Daniel Brumbaugh Keeney (http://rubyforge.org/users/db-keen)
# Copyright:: 2008 Daniel Brumbaugh Keeney
# License::   GPLv3+
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# http://relaxng.org/compact-tutorial-20030326.html
# http://relaxng.org/tutorial-20011203.html
grammar RelaxNGcustom

  #include XmlNameSpaces

  rule top_level
    decl* ( pattern / grammar_content* )
  end

  rule decl
      "namespace" separator identifier_or_keyword separator
        "=" separator namespace_uri_literal
    / "default" separator "namespace" separator ( identifier_or_keyword separator )?
        "=" separator namespace_uri_literal
    / "datatypes" separator identifier_or_keyword separator
        "=" separator literal
  end

  rule pattern
      "element" name_class "{" pattern "}"
    / "attribute" name_class "{" pattern "}"
    / pattern ("," pattern)+
    / pattern ("&" pattern)+
    / pattern ("|" pattern)+
    / pattern "?"
    / pattern "*"
    / pattern "+"
    / "list" "{" pattern "}"
    / "mixed" "{" pattern "}"
    / identifier
    / "parent" identifier
    / "empty"
    / "text"
    / datatype_name? datatype_value
    / datatype_name ( "{" param* "}" )? except_pattern?
    / "notAllowed"
    / "external" any_uri_literal inherit?
    / "grammar" "{" grammar_content* "}"
    / "(" pattern ")"
  end

  rule param
    identifier_or_keyword separator "=" separator literal
  end

  rule except_pattern
    "-" pattern
  end

  rule grammar_content
    start
    / define
    / "div" "{" grammar_content* "}"
    / "include" any_uri_literal [inherit] ["{" include_content* "}"]
  end

  rule include_content
    define / start / "div" "{" include_content* "}"
  end

  rule start
    "start" assign_method pattern
  end

  rule define
    identifier separator assign_method pattern
  end

  rule assign_method
    "=" / "|=" / "&="
  end

  rule name_class
      name
    / nsName except_name_class?
    / "*" except_name_class?
    / name_class "|" name_class
    / "(" name_class ")"
  end

  rule name
    identifier_or_keyword / CName
  end

  rule except_name_class
    "-" name_class
  end

  rule datatype_name
    CName / "string" / "token"
  end

  rule datatype_value
    literal
  end

  rule any_uri_literal
    literal
  end

  rule namespace_uri_literal
    literal / "inherit"
  end

  rule inherit
    "inherit" separator "=" separator identifier_or_keyword
  end

  rule identifier_or_keyword
    NCName / keyword / '\\' NCName
  end

  rule identifier
    !( keyword NCNameChar ) NCName / '\\' NCName
  end

  rule CName
    NCName ':' NCName
  end

  rule nsName
    NCName ":*"
  end

  rule literal
    literal_segment ( "~" literal_segment )+
  end

  rule literal_segment
      '"' contents:( !["\n] . )*  '"'
    / "'"  contents:( ['\n] . )* "'"
    / '"""'  contents:( !'"""' . )*  '"""'
    / "'''"  contents:( !"'''" . )*  "'''"
  end

  rule keyword
      "attribute"
    / "default"
    / "datatypes"
    / "div"
    / "element"
    / "empty"
    / "external"
    / "grammar"
    / "include"
    / "inherit"
    / "list"
    / "mixed"
    / "namespace"
    / "notAllowed"
    / "parent"
    / "start"
    / "string"
    / "text"
    / "token"
  end

  rule rest_of_line
    !( [\n\x0A]* . )
  end

  rule separator
    [\x09\x0A\x20\n] / "#" ( ![\n\x0A#] . rest_of_line )?
  end

  # Stolen from xml-namespaces.treetop and xml.treetop

  rule NCName
    NCNameStartChar NCNameChar*
  end

  rule NCNameChar
    !':' name_char
  end

  rule name_char
    [\w] | '.' | '-' | ':' | combining_char | extender
  end

  rule combining_char
    [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A
  end

  rule extender
    #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
  end

  rule NCNameStartChar
    [a-zA-Z_]
  end

end