ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-2.0/src/mdParser/MDParser.g
Revision: 2509
Committed: Wed Dec 14 18:02:28 2005 UTC (18 years, 6 months ago) by tim
File size: 10735 byte(s)
Log Message:
fix a nasty negative integer parsing problem by refining the grammar

File Contents

# User Rev Content
1 tim 2469 header
2     {
3    
4     #include "antlr/CharScanner.hpp"
5     #include "utils/StringUtils.hpp"
6     #include "mdParser/FilenameObserver.hpp"
7     }
8    
9     options
10     {
11     language = "Cpp";
12     }
13    
14     class MDParser extends Parser;
15    
16     options
17     {
18     k = 3;
19     exportVocab = MD;
20     buildAST = true;
21     codeGenMakeSwitchThreshold = 2;
22     codeGenBitsetTestThreshold = 3;
23    
24     }
25    
26     tokens
27     {
28     COMPONENT = "component";
29     MOLECULE = "molecule";
30     ZCONSTRAINT = "zconstraint";
31     ATOM = "atom";
32     BOND = "bond";
33     BEND = "bend";
34     TORSION = "torsion";
35     RIGIDBODY = "rigidBody";
36     CUTOFFGROUP = "cutoffGroup";
37     FRAGMENT = "fragment";
38     MEMBERS = "members";
39     POSITION = "position";
40     ORIENTATION = "orientation";
41     ENDBLOCK;
42     }
43    
44    
45     mdfile : (statement)*
46     ;
47    
48     statement : assignment
49     | componentblock
50     | moleculeblock
51     | zconstraintblock
52     ;
53    
54     assignment : ID ASSIGNEQUAL^ constant SEMICOLON!
55     ;
56    
57     constant : signedNumber
58     | ID
59     | StringLiteral
60     ;
61    
62     componentblock : COMPONENT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);}
63     ;
64    
65     zconstraintblock : ZCONSTRAINT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);}
66     ;
67    
68     moleculeblock : MOLECULE^ LCURLY! (moleculestatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
69     ;
70    
71     moleculestatement : assignment
72     | atomblock
73     | bondblock
74     | bendblock
75     | torsionblock
76     | rigidbodyblock
77     | cutoffgroupblock
78     | fragmentblock
79     ;
80    
81     atomblock : ATOM^ LBRACKET! intConst RBRACKET! LCURLY! (atomstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
82     ;
83    
84     atomstatement : assignment
85     | POSITION^ LPAREN! signedNumberTuple RPAREN! SEMICOLON!
86     | ORIENTATION^ LPAREN! signedNumberTuple RPAREN! SEMICOLON!
87     ;
88    
89    
90     bondblock : BOND^ (LBRACKET! intConst! RBRACKET!)? LCURLY!(bondstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
91     ;
92    
93     bondstatement : assignment
94     | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
95     ;
96    
97     bendblock : BEND^ (LBRACKET! intConst! RBRACKET!)? LCURLY! (bendstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
98     ;
99    
100     bendstatement : assignment
101     | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
102     ;
103    
104     torsionblock : TORSION^ (LBRACKET! intConst! RBRACKET!)? LCURLY!(torsionstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
105     ;
106    
107     torsionstatement : assignment
108     | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
109     ;
110    
111     rigidbodyblock : RIGIDBODY^ LBRACKET! intConst RBRACKET! LCURLY!(rigidbodystatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
112     ;
113    
114     rigidbodystatement : assignment
115     | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
116     ;
117    
118     cutoffgroupblock : CUTOFFGROUP^ (LBRACKET! intConst! RBRACKET!)? LCURLY! (cutoffgroupstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
119     ;
120    
121     cutoffgroupstatement : assignment
122     | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
123     ;
124    
125     fragmentblock : FRAGMENT^ LBRACKET! intConst RBRACKET! LCURLY! (fragmentstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
126     ;
127    
128     fragmentstatement : assignment
129     ;
130    
131    
132    
133     signedNumberTuple : signedNumber (COMMA! signedNumber)*
134     ;
135    
136     inttuple : intConst (COMMA! intConst)*
137     ;
138    
139     protected
140     intConst
141     : OCTALINT | DECIMALINT | HEXADECIMALINT
142     ;
143    
144     protected
145 tim 2509 signedNumber :
146 tim 2469 (intConst | floatConst)
147     ;
148    
149     protected
150     floatConst
151     :
152     FLOATONE | FLOATTWO
153     ;
154    
155    
156    
157     class MDLexer extends Lexer;
158    
159     options
160     {
161     k = 3;
162     exportVocab = MD;
163     testLiterals = false;
164     }
165    
166     tokens {
167     DOT;
168     }
169    
170     {
171    
172    
173     int deferredLineCount;
174     FilenameObserver* observer;
175    
176     public:
177     void setObserver(FilenameObserver* osv) {observer = osv;}
178     void initDeferredLineCount() { deferredLineCount = 0;}
179     void deferredNewline() {
180     deferredLineCount++;
181     }
182    
183    
184     virtual void newline() {
185     for (;deferredLineCount>0;deferredLineCount--) {
186     CharScanner::newline();
187     }
188     CharScanner::newline();
189     }
190    
191     }
192    
193    
194     // Operators:
195    
196     ASSIGNEQUAL : '=' ;
197     COLON : ':' ;
198     COMMA : ',' ;
199     QUESTIONMARK : '?' ;
200     SEMICOLON : ';' ;
201    
202     LPAREN : '(' ;
203     RPAREN : ')' ;
204     LBRACKET : '[' ;
205     RBRACKET : ']' ;
206     LCURLY : '{' ;
207     RCURLY : '}' ;
208    
209    
210     /*
211     EQUAL : "==" ;
212     NOTEQUAL : "!=" ;
213     LESSTHANOREQUALTO : "<=" ;
214     LESSTHAN : "<" ;
215     GREATERTHANOREQUALTO : ">=" ;
216     GREATERTHAN : ">" ;
217    
218     DIVIDE : '/' ;
219     DIVIDEEQUAL : "/=" ;
220     PLUS : '+' ;
221     PLUSEQUAL : "+=" ;
222     PLUSPLUS : "++" ;
223     MINUS : '-' ;
224     MINUSEQUAL : "-=" ;
225     MINUSMINUS : "--" ;
226     STAR : '*' ;
227     TIMESEQUAL : "*=" ;
228     MOD : '%' ;
229     MODEQUAL : "%=" ;
230     SHIFTRIGHT : ">>" ;
231     SHIFTRIGHTEQUAL : ">>=" ;
232     SHIFTLEFT : "<<" ;
233     SHIFTLEFTEQUAL : "<<=" ;
234    
235     AND : "&&" ;
236     NOT : '!' ;
237     OR : "||" ;
238    
239     AMPERSAND : '&' ;
240     BITWISEANDEQUAL : "&=" ;
241     TILDE : '~' ;
242     BITWISEOR : '|' ;
243     BITWISEOREQUAL : "|=" ;
244     BITWISEXOR : '^' ;
245     BITWISEXOREQUAL : "^=" ;
246     */
247    
248    
249     Whitespace
250     :
251     ( // whitespace ignored
252     (' ' |'\t' | '\f')
253     | // handle newlines
254     ( '\r' '\n' // MS
255     | '\r' // Mac
256     | '\n' // Unix
257     ) { newline(); }
258     | // handle continuation lines
259     ( '\\' '\r' '\n' // MS
260     | '\\' '\r' // Mac
261     | '\\' '\n' // Unix
262     ) {printf("CPP_parser.g continuation line detected\n");
263     deferredNewline();}
264     )
265     {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;}
266     ;
267    
268     Comment
269     :
270     "/*"
271     ( {LA(2) != '/'}? '*'
272     | EndOfLine {deferredNewline();}
273     | ~('*'| '\r' | '\n')
274     )*
275     "*/" {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;}
276     ;
277    
278     CPPComment
279     :
280     "//" (~('\n' | '\r'))* EndOfLine
281     {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();}
282     ;
283    
284     PREPROC_DIRECTIVE
285     options{paraphrase = "a line directive";}
286     :
287     '#' LineDirective
288     {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();}
289     ;
290    
291     protected
292     LineDirective
293     :
294     {
295     deferredLineCount = 0;
296     }
297     ("line")? // this would be for if the directive started "#line"
298     (Space)+
299     n:Decimal { setLine(oopse::lexi_cast<int>(n->getText()) - 1); }
300     (Space)+
301     (sl:StringLiteral) {std::string filename = sl->getText().substr(1,sl->getText().length()-2); observer->notify(filename);}
302     ((Space)+ Decimal)* // To support cpp flags (GNU)
303     EndOfLine
304     ;
305    
306     protected
307     Space
308     :
309     (' '|'\t'|'\f')
310     ;
311    
312    
313     // Literals:
314    
315     /*
316     * Note that we do NOT handle tri-graphs nor multi-byte sequences.
317     */
318    
319     /*
320     * Note that we can't have empty character constants (even though we
321     * can have empty strings :-).
322     */
323     CharLiteral
324     :
325     '\'' (Escape | ~('\'')) '\''
326     ;
327    
328     /*
329     * Can't have raw imbedded newlines in string constants. Strict reading of
330     * the standard gives odd dichotomy between newlines & carriage returns.
331     * Go figure.
332     */
333     StringLiteral
334     :
335     '"'
336     ( Escape
337     |
338     ( "\\\r\n" // MS
339     | "\\\r" // MAC
340     | "\\\n" // Unix
341     ) {deferredNewline();}
342     |
343     ~('"'|'\r'|'\n'|'\\')
344     )*
345     '"'
346     ;
347    
348     protected
349     EndOfLine
350     :
351     ( options{generateAmbigWarnings = false;}:
352     "\r\n" // MS
353     | '\r' // Mac
354     | '\n' // Unix
355     )
356     ;
357    
358     /*
359     * Handle the various escape sequences.
360     *
361     * Note carefully that these numeric escape *sequences* are *not* of the
362     * same form as the C language numeric *constants*.
363     *
364     * There is no such thing as a binary numeric escape sequence.
365     *
366     * Octal escape sequences are either 1, 2, or 3 octal digits exactly.
367     *
368     * There is no such thing as a decimal escape sequence.
369     *
370     * Hexadecimal escape sequences are begun with a leading \x and continue
371     * until a non-hexadecimal character is found.
372     *
373     * No real handling of tri-graph sequences, yet.
374     */
375    
376     protected
377     Escape
378     :
379     '\\'
380     ( options{warnWhenFollowAmbig=false;}:
381     'a'
382     | 'b'
383     | 'f'
384     | 'n'
385     | 'r'
386     | 't'
387     | 'v'
388     | '"'
389     | '\''
390     | '\\'
391     | '?'
392     | ('0'..'3') (options{warnWhenFollowAmbig=false;}: Digit (options{warnWhenFollowAmbig=false;}: Digit)? )?
393     | ('4'..'7') (options{warnWhenFollowAmbig=false;}: Digit)?
394     | 'x' (options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F')+
395     )
396     ;
397    
398     // Numeric Constants:
399    
400     protected
401     Digit
402     :
403     '0'..'9'
404     ;
405    
406     protected
407     Decimal
408     :
409     ('0'..'9')+
410     ;
411    
412     protected
413     LongSuffix
414     : 'l'
415     | 'L'
416     ;
417    
418     protected
419     UnsignedSuffix
420     : 'u'
421     | 'U'
422     ;
423    
424     protected
425     FloatSuffix
426     : 'f'
427     | 'F'
428     ;
429    
430     protected
431     Exponent
432     :
433     ('e'|'E'|'d'|'D') ('+'|'-')? (Digit)+
434     ;
435    
436     protected
437     Vocabulary
438     :
439     '\3'..'\377'
440     ;
441    
442     Number
443 tim 2509 :
444     ('+'|'-')?
445     (
446 tim 2469 ( (Digit)+ ('.' | 'e' | 'E' | 'd' | 'D' ) )=>
447     (Digit)+
448     ( '.' (Digit)* (Exponent)? {_ttype = FLOATONE;} //Zuo 3/12/01
449     | Exponent {_ttype = FLOATTWO;} //Zuo 3/12/01
450     ) //{_ttype = DoubleDoubleConst;}
451     (FloatSuffix //{_ttype = FloatDoubleConst;}
452     |LongSuffix //{_ttype = LongDoubleConst;}
453     )?
454     |
455     '.' {_ttype = DOT;}
456     ( (Digit)+ (Exponent)? {_ttype = FLOATONE;} //Zuo 3/12/01
457     //{_ttype = DoubleDoubleConst;}
458     (FloatSuffix //{_ttype = FloatDoubleConst;}
459     |LongSuffix //{_ttype = LongDoubleConst;}
460     )?
461     )?
462     |
463     '0' ('0'..'7')* //{_ttype = IntOctalConst;}
464     (LongSuffix //{_ttype = LongOctalConst;}
465     |UnsignedSuffix //{_ttype = UnsignedOctalConst;}
466     )* {_ttype = OCTALINT;}
467     |
468     '1'..'9' (Digit)* //{_ttype = IntIntConst;}
469     (LongSuffix //{_ttype = LongIntConst;}
470     |UnsignedSuffix //{_ttype = UnsignedIntConst;}
471     )* {_ttype = DECIMALINT;}
472     |
473     '0' ('x' | 'X') ('a'..'f' | 'A'..'F' | Digit)+
474     //{_ttype = IntHexConst;}
475     (LongSuffix //{_ttype = LongHexConst;}
476     |UnsignedSuffix //{_ttype = UnsignedHexConst;}
477     )* {_ttype = HEXADECIMALINT;}
478 tim 2509 )
479 tim 2469 ;
480    
481     ID
482     options {testLiterals = true;}
483     :
484     ('a'..'z'|'A'..'Z'|'_')
485     ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
486     ;

Properties

Name Value
svn:executable *