ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-2.0/src/mdParser/MDParser.g
Revision: 2509
Committed: Wed Dec 14 18:02:28 2005 UTC (18 years, 6 months ago) by tim
File size: 10735 byte(s)
Log Message:
fix a nasty negative integer parsing problem by refining the grammar

File Contents

# Content
1 header
2 {
3
4 #include "antlr/CharScanner.hpp"
5 #include "utils/StringUtils.hpp"
6 #include "mdParser/FilenameObserver.hpp"
7 }
8
9 options
10 {
11 language = "Cpp";
12 }
13
14 class MDParser extends Parser;
15
16 options
17 {
18 k = 3;
19 exportVocab = MD;
20 buildAST = true;
21 codeGenMakeSwitchThreshold = 2;
22 codeGenBitsetTestThreshold = 3;
23
24 }
25
26 tokens
27 {
28 COMPONENT = "component";
29 MOLECULE = "molecule";
30 ZCONSTRAINT = "zconstraint";
31 ATOM = "atom";
32 BOND = "bond";
33 BEND = "bend";
34 TORSION = "torsion";
35 RIGIDBODY = "rigidBody";
36 CUTOFFGROUP = "cutoffGroup";
37 FRAGMENT = "fragment";
38 MEMBERS = "members";
39 POSITION = "position";
40 ORIENTATION = "orientation";
41 ENDBLOCK;
42 }
43
44
45 mdfile : (statement)*
46 ;
47
48 statement : assignment
49 | componentblock
50 | moleculeblock
51 | zconstraintblock
52 ;
53
54 assignment : ID ASSIGNEQUAL^ constant SEMICOLON!
55 ;
56
57 constant : signedNumber
58 | ID
59 | StringLiteral
60 ;
61
62 componentblock : COMPONENT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);}
63 ;
64
65 zconstraintblock : ZCONSTRAINT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);}
66 ;
67
68 moleculeblock : MOLECULE^ LCURLY! (moleculestatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
69 ;
70
71 moleculestatement : assignment
72 | atomblock
73 | bondblock
74 | bendblock
75 | torsionblock
76 | rigidbodyblock
77 | cutoffgroupblock
78 | fragmentblock
79 ;
80
81 atomblock : ATOM^ LBRACKET! intConst RBRACKET! LCURLY! (atomstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
82 ;
83
84 atomstatement : assignment
85 | POSITION^ LPAREN! signedNumberTuple RPAREN! SEMICOLON!
86 | ORIENTATION^ LPAREN! signedNumberTuple RPAREN! SEMICOLON!
87 ;
88
89
90 bondblock : BOND^ (LBRACKET! intConst! RBRACKET!)? LCURLY!(bondstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
91 ;
92
93 bondstatement : assignment
94 | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
95 ;
96
97 bendblock : BEND^ (LBRACKET! intConst! RBRACKET!)? LCURLY! (bendstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
98 ;
99
100 bendstatement : assignment
101 | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
102 ;
103
104 torsionblock : TORSION^ (LBRACKET! intConst! RBRACKET!)? LCURLY!(torsionstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
105 ;
106
107 torsionstatement : assignment
108 | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
109 ;
110
111 rigidbodyblock : RIGIDBODY^ LBRACKET! intConst RBRACKET! LCURLY!(rigidbodystatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
112 ;
113
114 rigidbodystatement : assignment
115 | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
116 ;
117
118 cutoffgroupblock : CUTOFFGROUP^ (LBRACKET! intConst! RBRACKET!)? LCURLY! (cutoffgroupstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
119 ;
120
121 cutoffgroupstatement : assignment
122 | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON!
123 ;
124
125 fragmentblock : FRAGMENT^ LBRACKET! intConst RBRACKET! LCURLY! (fragmentstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);}
126 ;
127
128 fragmentstatement : assignment
129 ;
130
131
132
133 signedNumberTuple : signedNumber (COMMA! signedNumber)*
134 ;
135
136 inttuple : intConst (COMMA! intConst)*
137 ;
138
139 protected
140 intConst
141 : OCTALINT | DECIMALINT | HEXADECIMALINT
142 ;
143
144 protected
145 signedNumber :
146 (intConst | floatConst)
147 ;
148
149 protected
150 floatConst
151 :
152 FLOATONE | FLOATTWO
153 ;
154
155
156
157 class MDLexer extends Lexer;
158
159 options
160 {
161 k = 3;
162 exportVocab = MD;
163 testLiterals = false;
164 }
165
166 tokens {
167 DOT;
168 }
169
170 {
171
172
173 int deferredLineCount;
174 FilenameObserver* observer;
175
176 public:
177 void setObserver(FilenameObserver* osv) {observer = osv;}
178 void initDeferredLineCount() { deferredLineCount = 0;}
179 void deferredNewline() {
180 deferredLineCount++;
181 }
182
183
184 virtual void newline() {
185 for (;deferredLineCount>0;deferredLineCount--) {
186 CharScanner::newline();
187 }
188 CharScanner::newline();
189 }
190
191 }
192
193
194 // Operators:
195
196 ASSIGNEQUAL : '=' ;
197 COLON : ':' ;
198 COMMA : ',' ;
199 QUESTIONMARK : '?' ;
200 SEMICOLON : ';' ;
201
202 LPAREN : '(' ;
203 RPAREN : ')' ;
204 LBRACKET : '[' ;
205 RBRACKET : ']' ;
206 LCURLY : '{' ;
207 RCURLY : '}' ;
208
209
210 /*
211 EQUAL : "==" ;
212 NOTEQUAL : "!=" ;
213 LESSTHANOREQUALTO : "<=" ;
214 LESSTHAN : "<" ;
215 GREATERTHANOREQUALTO : ">=" ;
216 GREATERTHAN : ">" ;
217
218 DIVIDE : '/' ;
219 DIVIDEEQUAL : "/=" ;
220 PLUS : '+' ;
221 PLUSEQUAL : "+=" ;
222 PLUSPLUS : "++" ;
223 MINUS : '-' ;
224 MINUSEQUAL : "-=" ;
225 MINUSMINUS : "--" ;
226 STAR : '*' ;
227 TIMESEQUAL : "*=" ;
228 MOD : '%' ;
229 MODEQUAL : "%=" ;
230 SHIFTRIGHT : ">>" ;
231 SHIFTRIGHTEQUAL : ">>=" ;
232 SHIFTLEFT : "<<" ;
233 SHIFTLEFTEQUAL : "<<=" ;
234
235 AND : "&&" ;
236 NOT : '!' ;
237 OR : "||" ;
238
239 AMPERSAND : '&' ;
240 BITWISEANDEQUAL : "&=" ;
241 TILDE : '~' ;
242 BITWISEOR : '|' ;
243 BITWISEOREQUAL : "|=" ;
244 BITWISEXOR : '^' ;
245 BITWISEXOREQUAL : "^=" ;
246 */
247
248
249 Whitespace
250 :
251 ( // whitespace ignored
252 (' ' |'\t' | '\f')
253 | // handle newlines
254 ( '\r' '\n' // MS
255 | '\r' // Mac
256 | '\n' // Unix
257 ) { newline(); }
258 | // handle continuation lines
259 ( '\\' '\r' '\n' // MS
260 | '\\' '\r' // Mac
261 | '\\' '\n' // Unix
262 ) {printf("CPP_parser.g continuation line detected\n");
263 deferredNewline();}
264 )
265 {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;}
266 ;
267
268 Comment
269 :
270 "/*"
271 ( {LA(2) != '/'}? '*'
272 | EndOfLine {deferredNewline();}
273 | ~('*'| '\r' | '\n')
274 )*
275 "*/" {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;}
276 ;
277
278 CPPComment
279 :
280 "//" (~('\n' | '\r'))* EndOfLine
281 {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();}
282 ;
283
284 PREPROC_DIRECTIVE
285 options{paraphrase = "a line directive";}
286 :
287 '#' LineDirective
288 {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();}
289 ;
290
291 protected
292 LineDirective
293 :
294 {
295 deferredLineCount = 0;
296 }
297 ("line")? // this would be for if the directive started "#line"
298 (Space)+
299 n:Decimal { setLine(oopse::lexi_cast<int>(n->getText()) - 1); }
300 (Space)+
301 (sl:StringLiteral) {std::string filename = sl->getText().substr(1,sl->getText().length()-2); observer->notify(filename);}
302 ((Space)+ Decimal)* // To support cpp flags (GNU)
303 EndOfLine
304 ;
305
306 protected
307 Space
308 :
309 (' '|'\t'|'\f')
310 ;
311
312
313 // Literals:
314
315 /*
316 * Note that we do NOT handle tri-graphs nor multi-byte sequences.
317 */
318
319 /*
320 * Note that we can't have empty character constants (even though we
321 * can have empty strings :-).
322 */
323 CharLiteral
324 :
325 '\'' (Escape | ~('\'')) '\''
326 ;
327
328 /*
329 * Can't have raw imbedded newlines in string constants. Strict reading of
330 * the standard gives odd dichotomy between newlines & carriage returns.
331 * Go figure.
332 */
333 StringLiteral
334 :
335 '"'
336 ( Escape
337 |
338 ( "\\\r\n" // MS
339 | "\\\r" // MAC
340 | "\\\n" // Unix
341 ) {deferredNewline();}
342 |
343 ~('"'|'\r'|'\n'|'\\')
344 )*
345 '"'
346 ;
347
348 protected
349 EndOfLine
350 :
351 ( options{generateAmbigWarnings = false;}:
352 "\r\n" // MS
353 | '\r' // Mac
354 | '\n' // Unix
355 )
356 ;
357
358 /*
359 * Handle the various escape sequences.
360 *
361 * Note carefully that these numeric escape *sequences* are *not* of the
362 * same form as the C language numeric *constants*.
363 *
364 * There is no such thing as a binary numeric escape sequence.
365 *
366 * Octal escape sequences are either 1, 2, or 3 octal digits exactly.
367 *
368 * There is no such thing as a decimal escape sequence.
369 *
370 * Hexadecimal escape sequences are begun with a leading \x and continue
371 * until a non-hexadecimal character is found.
372 *
373 * No real handling of tri-graph sequences, yet.
374 */
375
376 protected
377 Escape
378 :
379 '\\'
380 ( options{warnWhenFollowAmbig=false;}:
381 'a'
382 | 'b'
383 | 'f'
384 | 'n'
385 | 'r'
386 | 't'
387 | 'v'
388 | '"'
389 | '\''
390 | '\\'
391 | '?'
392 | ('0'..'3') (options{warnWhenFollowAmbig=false;}: Digit (options{warnWhenFollowAmbig=false;}: Digit)? )?
393 | ('4'..'7') (options{warnWhenFollowAmbig=false;}: Digit)?
394 | 'x' (options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F')+
395 )
396 ;
397
398 // Numeric Constants:
399
400 protected
401 Digit
402 :
403 '0'..'9'
404 ;
405
406 protected
407 Decimal
408 :
409 ('0'..'9')+
410 ;
411
412 protected
413 LongSuffix
414 : 'l'
415 | 'L'
416 ;
417
418 protected
419 UnsignedSuffix
420 : 'u'
421 | 'U'
422 ;
423
424 protected
425 FloatSuffix
426 : 'f'
427 | 'F'
428 ;
429
430 protected
431 Exponent
432 :
433 ('e'|'E'|'d'|'D') ('+'|'-')? (Digit)+
434 ;
435
436 protected
437 Vocabulary
438 :
439 '\3'..'\377'
440 ;
441
442 Number
443 :
444 ('+'|'-')?
445 (
446 ( (Digit)+ ('.' | 'e' | 'E' | 'd' | 'D' ) )=>
447 (Digit)+
448 ( '.' (Digit)* (Exponent)? {_ttype = FLOATONE;} //Zuo 3/12/01
449 | Exponent {_ttype = FLOATTWO;} //Zuo 3/12/01
450 ) //{_ttype = DoubleDoubleConst;}
451 (FloatSuffix //{_ttype = FloatDoubleConst;}
452 |LongSuffix //{_ttype = LongDoubleConst;}
453 )?
454 |
455 '.' {_ttype = DOT;}
456 ( (Digit)+ (Exponent)? {_ttype = FLOATONE;} //Zuo 3/12/01
457 //{_ttype = DoubleDoubleConst;}
458 (FloatSuffix //{_ttype = FloatDoubleConst;}
459 |LongSuffix //{_ttype = LongDoubleConst;}
460 )?
461 )?
462 |
463 '0' ('0'..'7')* //{_ttype = IntOctalConst;}
464 (LongSuffix //{_ttype = LongOctalConst;}
465 |UnsignedSuffix //{_ttype = UnsignedOctalConst;}
466 )* {_ttype = OCTALINT;}
467 |
468 '1'..'9' (Digit)* //{_ttype = IntIntConst;}
469 (LongSuffix //{_ttype = LongIntConst;}
470 |UnsignedSuffix //{_ttype = UnsignedIntConst;}
471 )* {_ttype = DECIMALINT;}
472 |
473 '0' ('x' | 'X') ('a'..'f' | 'A'..'F' | Digit)+
474 //{_ttype = IntHexConst;}
475 (LongSuffix //{_ttype = LongHexConst;}
476 |UnsignedSuffix //{_ttype = UnsignedHexConst;}
477 )* {_ttype = HEXADECIMALINT;}
478 )
479 ;
480
481 ID
482 options {testLiterals = true;}
483 :
484 ('a'..'z'|'A'..'Z'|'_')
485 ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
486 ;

Properties

Name Value
svn:executable *