Class: Abnf::Parser

Inherits:
Object
  • Object
show all
Defined in:
../lib/abnf_parser.rb

Overview

Abnf::Parser understands the ABNF syntax specified by RFC 5234 www.ietf.org/rfc/rfc5234.txt — with these important exceptions:

  1. Variable repetitions (Chapter 3.6. of the spec.) have limited maximal number of occurences (ie. the decimal value in the original specification). Thus, parsing expressions like “30000 HTAB”, “3*CHAR” or “*SP” terminate with an exception. This limit is controlled by the Parser#max_repetitions attribute.

  2. The core rule LWSP (Appendix B.1) is not implemented, for the same reason (there is the infinite repetition in the declaration of this rule).

  3. Rule names (nonterminal external symbols) cannot begin with the underscore character (‘_’). (Reason: All the internal nodes created by the parser begin with the underscore.)

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Constructor Details

- (Parser) initialize

Create the new RegExp machinery of the ABNF parser.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File '../lib/abnf_parser.rb', line 29

def initialize
  @max_repetitions = 100
  @transitions = {
    :start =>    {
                   :symbol => proc {|g,t| g.rule=t; g.start_symbol=t.data; :equals },
                   :newline => proc { :start },
                   :comment => proc { :start }
                 },
    :equals =>   {
                   :equals => proc { :elements },
                   :eq_slash => proc {|g,t| g.retype=:incremental; :elements },
                   :comment => proc { :equals },  
                   :space => proc { :equals } 
                 },
    :elements => {
                   :symbol => proc {|g,t| g.tok=t; :elements },
                   :literal => proc {|g,t| g.tok=t; :elements },
                   :_digit => proc {|g,t| g.ranges(t,['0'..'9']); :elements },
                   :_hexdig => proc {|g,t| g.ranges(t,['0'..'9','A'..'F']); :elements },
                   :_bit => proc {|g,t| g.ranges(t,['0'..'1']); :elements },
                   :_alpha => proc {|g,t| g.ranges(t,['A'..'Z','a'..'z']); :elements },
                   :_char => proc {|g,t| g.ranges(t,[0x01..0x7F]); :elements },
                   :_vchar => proc {|g,t| g.ranges(t,[0x21..0x7E]); :elements }, 
                   :_octet => proc {|g,t| g.ranges(t,[0x00..0xFF]); :elements },
                   :_ctl => proc {|g,t| g.ranges(t,[0x00..0x1F,0x7F..0x7F]); :elements },
                   :_wsp => proc {|g,t| g.ranges(t,[' '..' ',"\t".."\t"]); :elements }, 
                   :_cr => proc {|g,t| g.entity="\r"; :elements },
                   :_lf => proc {|g,t| g.entity="\n"; :elements },
                   :_crlf => proc {|g,t| g.entity="\r\n"; :elements },
                   :_sp => proc {|g,t| g.entity=" "; :elements },
                   :_dquote => proc {|g,t| g.entity=%Q("); :elements },
                   :_htab => proc {|g,t| g.entity="\t"; :elements },
                   :entity_dec  => proc {|g,t| g.entity=t.data.to_i.chr; :dot },
                   :entity_hex  => proc {|g,t| g.entity=t.data.hex.chr; :dot },
                   :entity_bin => proc {|g,t| g.entity=bin2chr(t.data); :dot },
                   :range_hex => proc {|g,t| g.rng(t) {|v| v.hex.chr}; :elements },
                   :range_dec => proc {|g,t| g.rng(t) {|v| v.to_i.chr}; :elements },
                   :range_bin => proc {|g,t| g.rng(t) {|v| bin2chr v}; :elements },
                   :slash =>  proc {|g,t| g.alt; :elements },
                   :newline => proc {|g,t| :next_rule },
                   :seq_begin => proc {|g,t| g.group=t; :elements },
                   :seq_end => proc {|g,t| g.store=t; :elements },                    
                   :opt_begin => proc {|g,t| g.opt=t; :elements },
                   :opt_end => proc {|g,t| g.store=t; :elements },                    
                   :comment => proc { :elements },  
                   :space => proc { :elements },
                   :number => proc {|g,t| g.repeat=t.data; :rpt_1 },
                   :asterisk => proc { |g,t| g.repeat=0; :rpt_2 },
                   :eof => proc { |g,t| g.retype=:eof; g.store=t; :stop }                    
                 },
    :dot =>  {
                   :dot => proc { :elements },
                   :entity_dec => proc {|g,t| g.entity=t.data.to_i.chr; :dot },
                   :entity_hex  => proc {|g,t| g.entity=t.data.hex.chr; :dot },             
                   :entity_bin => proc {|g,t| g.entity=bin2chr(t.data); :dot },
                   :space => proc { :elements },
                   :eof => proc { |g,t| g.retype=:eof; g.store=t; :stop },
                   :comment => proc { :elements },                     
             },
    :rpt_1 =>    {
                    :number => proc {|g,t| g.repeat=t.data; :rpt_1 },
                    :asterisk => proc { :rpt_2 },
                    :symbol => proc {|g,t| g.tok=t; :elements },
                    :literal => proc {|g,t| g.tok=t; :elements },
                    :_digit => proc {|g,t| g.ranges(t,['0'..'9']); :elements },
                    :_hexdig => proc {|g,t| g.ranges(t,['0'..'9','A'..'F']); :elements },
                    :_bit => proc {|g,t| g.ranges(t,['0'..'1']); :elements },
                    :_alpha => proc {|g,t| g.ranges(t,['A'..'Z','a'..'z']); :elements },
                    :_char => proc {|g,t| g.ranges(t,[0x01..0x7F]); :elements },
                    :_vchar => proc {|g,t| g.ranges(t,[0x21..0x7E]); :elements },
                    :_octet => proc {|g,t| g.ranges(t,[0x00..0xFF]); :elements },
                    :_ctl => proc {|g,t| g.ranges(t,[0x00..0x1F,0x7F..0x7F]); :elements },
                    :_wsp => proc {|g,t| g.ranges(t,[' '..' ','\t'..'\t']); :elements },
                    :_cr => proc {|g,t| g.entity="\r"; :elements },
                    :_lf => proc {|g,t| g.entity="\n"; :elements },
                    :_crlf => proc {|g,t| g.entity="\r\n"; :elements },
                    :_sp => proc {|g,t| g.entity=" "; :elements },
                    :_htab => proc {|g,t| g.entity="\t"; :elements },
                    :_dquote => proc {|g,t| g.entity=%Q("); :elements },                       
                    :seq_begin => proc {|g,t| g.group=t; :elements },
                    :space => proc { :rpt_1 },
                    :entity_dec => proc {|g,t| g.entity=t.data.to_i.chr; :dot },
                    :entity_hex => proc {|g,t| g.entity=t.data.hex.chr; :dot },
                    :entity_bin => proc {|g,t| g.entity=bin2chr(t.data); :dot },
                    :range_hex => proc {|g,t| g.rng(t) {|v| v.hex.chr}; :elements; },
                    :range_dec => proc {|g,t| g.rng(t) {|v| v.to_i.chr}; :elements },
                    :range_bin => proc {|g,t| g.rng(t) {|v| bin2chr v}; :elements }
                 },
    :rpt_2 =>    {
                    :number => proc {|g,t| g.repeat=t.data; :elements },
                    :space => proc { :rpt_2 },
                 },
    :next_rule => {
                   :symbol => proc {|g,t|  g.store=t; g.rule=t; :equals },
                   :comment => proc { :next_rule },                      
                   :space => proc { :elements },
                   :newline => proc { :next_rule },
                   :eof => proc { |g,t| g.retype=:eof; g.store=t; :stop }
                  },
  }

end

Instance Attribute Details

- (Object) max_repetitions

The limit for the repetition rules (such as 1000*CRLF), defaulting to 100.



26
27
28
# File '../lib/abnf_parser.rb', line 26

def max_repetitions
  @max_repetitions
end

Instance Method Details

- (Object) parse(stream)

Create Mapper::Grammar structure from the token stream (ie. array of Mapper::Token items) preprocessed by the Abnf::Tokenizer.



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File '../lib/abnf_parser.rb', line 134

def parse stream
  @stack = []
  @iv = 0
  @repeat_range = [] 
  @range_rules = []    
  @gram = Grammar.new 
  state = :start
 
  stream.each do |token|
    trans = @transitions.fetch state
    action = trans.fetch( token.type, nil )
    raise "Parser: unexpected token '#{token.type}' when in #{state}" if action.nil?
    state = action.call( self, token )
  end
  @gram
end