| 1 | header | 
| 2 | { | 
| 3 |  | 
| 4 | #include "antlr/CharScanner.hpp" | 
| 5 | #include "utils/StringUtils.hpp" | 
| 6 | #include "mdParser/FilenameObserver.hpp" | 
| 7 | } | 
| 8 |  | 
| 9 | options | 
| 10 | { | 
| 11 | language = "Cpp"; | 
| 12 | } | 
| 13 |  | 
| 14 | class MDParser extends Parser; | 
| 15 |  | 
| 16 | options | 
| 17 | { | 
| 18 | k = 3; | 
| 19 | exportVocab = MD; | 
| 20 | buildAST = true; | 
| 21 | codeGenMakeSwitchThreshold = 2; | 
| 22 | codeGenBitsetTestThreshold = 3; | 
| 23 |  | 
| 24 | } | 
| 25 |  | 
| 26 | tokens | 
| 27 | { | 
| 28 | COMPONENT   = "component"; | 
| 29 | MOLECULE    = "molecule"; | 
| 30 | ZCONSTRAINT = "zconstraint"; | 
| 31 | ATOM        = "atom"; | 
| 32 | BOND        = "bond"; | 
| 33 | BEND        = "bend"; | 
| 34 | TORSION     = "torsion"; | 
| 35 | RIGIDBODY   = "rigidBody"; | 
| 36 | CUTOFFGROUP = "cutoffGroup"; | 
| 37 | FRAGMENT    = "fragment"; | 
| 38 | MEMBERS     = "members"; | 
| 39 | POSITION    = "position"; | 
| 40 | ORIENTATION = "orientation"; | 
| 41 | ENDBLOCK; | 
| 42 | } | 
| 43 |  | 
| 44 |  | 
| 45 | mdfile  : (statement)* | 
| 46 | ; | 
| 47 |  | 
| 48 | statement : assignment | 
| 49 | | componentblock | 
| 50 | | moleculeblock | 
| 51 | | zconstraintblock | 
| 52 | ; | 
| 53 |  | 
| 54 | assignment  : ID ASSIGNEQUAL^ constant SEMICOLON! | 
| 55 | ; | 
| 56 |  | 
| 57 | constant    : intConst | 
| 58 | | floatConst | 
| 59 | | ID | 
| 60 | | StringLiteral | 
| 61 | ; | 
| 62 |  | 
| 63 | componentblock  : COMPONENT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 64 | ; | 
| 65 |  | 
| 66 | zconstraintblock  : ZCONSTRAINT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 67 | ; | 
| 68 |  | 
| 69 | moleculeblock : MOLECULE^ LCURLY! (moleculestatement)*  RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 70 | ; | 
| 71 |  | 
| 72 | moleculestatement : assignment | 
| 73 | | atomblock | 
| 74 | | bondblock | 
| 75 | | bendblock | 
| 76 | | torsionblock | 
| 77 | | rigidbodyblock | 
| 78 | | cutoffgroupblock | 
| 79 | | fragmentblock | 
| 80 | ; | 
| 81 |  | 
| 82 | atomblock : ATOM^ LBRACKET! intConst RBRACKET! LCURLY! (atomstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 83 | ; | 
| 84 |  | 
| 85 | atomstatement : assignment | 
| 86 | | POSITION^ LPAREN! doubleNumberTuple RPAREN! SEMICOLON! | 
| 87 | | ORIENTATION^  LPAREN! doubleNumberTuple RPAREN! SEMICOLON! | 
| 88 | ; | 
| 89 |  | 
| 90 |  | 
| 91 | bondblock : BOND^ (LBRACKET! intConst! RBRACKET!)?  LCURLY!(bondstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 92 | ; | 
| 93 |  | 
| 94 | bondstatement : assignment | 
| 95 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 96 | ; | 
| 97 |  | 
| 98 | bendblock : BEND^ (LBRACKET! intConst! RBRACKET!)? LCURLY!  (bendstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 99 | ; | 
| 100 |  | 
| 101 | bendstatement : assignment | 
| 102 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 103 | ; | 
| 104 |  | 
| 105 | torsionblock  : TORSION^ (LBRACKET! intConst! RBRACKET!)?  LCURLY!(torsionstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 106 | ; | 
| 107 |  | 
| 108 | torsionstatement  : assignment | 
| 109 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 110 | ; | 
| 111 |  | 
| 112 | rigidbodyblock  : RIGIDBODY^  LBRACKET! intConst RBRACKET! LCURLY!(rigidbodystatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 113 | ; | 
| 114 |  | 
| 115 | rigidbodystatement  : assignment | 
| 116 | | MEMBERS^ LPAREN!  inttuple  RPAREN! SEMICOLON! | 
| 117 | ; | 
| 118 |  | 
| 119 | cutoffgroupblock  : CUTOFFGROUP^ (LBRACKET! intConst! RBRACKET!)? LCURLY! (cutoffgroupstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 120 | ; | 
| 121 |  | 
| 122 | cutoffgroupstatement  : assignment | 
| 123 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 124 | ; | 
| 125 |  | 
| 126 | fragmentblock : FRAGMENT^ LBRACKET! intConst RBRACKET! LCURLY! (fragmentstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 127 | ; | 
| 128 |  | 
| 129 | fragmentstatement : assignment | 
| 130 | ; | 
| 131 |  | 
| 132 |  | 
| 133 |  | 
| 134 | doubleNumberTuple   : doubleNumber (COMMA! doubleNumber)* | 
| 135 | ; | 
| 136 |  | 
| 137 | inttuple      : intConst (COMMA! intConst)* | 
| 138 | ; | 
| 139 |  | 
| 140 | protected | 
| 141 | intConst | 
| 142 | :  NUM_INT | NUM_LONG | 
| 143 | ; | 
| 144 |  | 
| 145 | protected | 
| 146 | doubleNumber  : | 
| 147 | (intConst | floatConst) | 
| 148 | ; | 
| 149 |  | 
| 150 | protected | 
| 151 | floatConst | 
| 152 | : | 
| 153 | NUM_FLOAT | NUM_DOUBLE | 
| 154 | ; | 
| 155 |  | 
| 156 |  | 
| 157 |  | 
| 158 | class MDLexer extends Lexer; | 
| 159 |  | 
| 160 | options | 
| 161 | { | 
| 162 | k = 3; | 
| 163 | exportVocab = MD; | 
| 164 | testLiterals = false; | 
| 165 | } | 
| 166 |  | 
| 167 | tokens { | 
| 168 | DOT; | 
| 169 | } | 
| 170 |  | 
| 171 | { | 
| 172 |  | 
| 173 |  | 
| 174 | int deferredLineCount; | 
| 175 | FilenameObserver* observer; | 
| 176 |  | 
| 177 | public: | 
| 178 | void setObserver(FilenameObserver* osv) {observer = osv;} | 
| 179 | void initDeferredLineCount() { deferredLineCount = 0;} | 
| 180 | void deferredNewline() { | 
| 181 | deferredLineCount++; | 
| 182 | } | 
| 183 |  | 
| 184 |  | 
| 185 | virtual void newline() { | 
| 186 | for (;deferredLineCount>0;deferredLineCount--) { | 
| 187 | CharScanner::newline(); | 
| 188 | } | 
| 189 | CharScanner::newline(); | 
| 190 | } | 
| 191 |  | 
| 192 | } | 
| 193 |  | 
| 194 |  | 
| 195 | // Operators: | 
| 196 |  | 
| 197 | ASSIGNEQUAL     : '=' ; | 
| 198 | COLON           : ':' ; | 
| 199 | COMMA           : ',' ; | 
| 200 | QUESTIONMARK    : '?' ; | 
| 201 | SEMICOLON       : ';' ; | 
| 202 |  | 
| 203 | LPAREN          : '(' ; | 
| 204 | RPAREN          : ')' ; | 
| 205 | LBRACKET        : '[' ; | 
| 206 | RBRACKET        : ']' ; | 
| 207 | LCURLY          : '{' ; | 
| 208 | RCURLY          : '}' ; | 
| 209 |  | 
| 210 | Whitespace | 
| 211 | : | 
| 212 | ( // whitespace ignored | 
| 213 | (' ' |'\t' | '\f') | 
| 214 | | // handle newlines | 
| 215 | ( '\r' '\n' // MS | 
| 216 | | '\r'    // Mac | 
| 217 | | '\n'    // Unix | 
| 218 | ) { newline(); } | 
| 219 | | // handle continuation lines | 
| 220 | ( '\\' '\r' '\n'  // MS | 
| 221 | | '\\' '\r'   // Mac | 
| 222 | | '\\' '\n'   // Unix | 
| 223 | ) {printf("CPP_parser.g continuation line detected\n"); | 
| 224 | deferredNewline();} | 
| 225 | ) | 
| 226 | {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;} | 
| 227 | ; | 
| 228 |  | 
| 229 | Comment | 
| 230 | : | 
| 231 | "/*" | 
| 232 | ( {LA(2) != '/'}? '*' | 
| 233 | | EndOfLine {deferredNewline();} | 
| 234 | | ~('*'| '\r' | '\n') | 
| 235 | )* | 
| 236 | "*/" {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;} | 
| 237 | ; | 
| 238 |  | 
| 239 | CPPComment | 
| 240 | : | 
| 241 | "//" (~('\n' | '\r'))* EndOfLine | 
| 242 | {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} | 
| 243 | ; | 
| 244 |  | 
| 245 | PREPROC_DIRECTIVE | 
| 246 | options{paraphrase = "a line directive";} | 
| 247 | : | 
| 248 | '#' LineDirective | 
| 249 | {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} | 
| 250 | ; | 
| 251 |  | 
| 252 | protected | 
| 253 | LineDirective | 
| 254 | : | 
| 255 | { | 
| 256 | deferredLineCount = 0; | 
| 257 | } | 
| 258 | ("line")?  // this would be for if the directive started "#line" | 
| 259 | (Space)+ | 
| 260 | n:Decimal { setLine(oopse::lexi_cast<int>(n->getText()) - 1); } | 
| 261 | (Space)+ | 
| 262 | (sl:StringLiteral) {std::string filename = sl->getText().substr(1,sl->getText().length()-2); observer->notify(filename);} | 
| 263 | ((Space)+ Decimal)* // To support cpp flags (GNU) | 
| 264 | EndOfLine | 
| 265 | ; | 
| 266 |  | 
| 267 | protected | 
| 268 | Space | 
| 269 | : | 
| 270 | (' '|'\t'|'\f') | 
| 271 | ; | 
| 272 |  | 
| 273 |  | 
| 274 | // Literals: | 
| 275 |  | 
| 276 | /* | 
| 277 | * Note that we do NOT handle tri-graphs nor multi-byte sequences. | 
| 278 | */ | 
| 279 |  | 
| 280 | /* | 
| 281 | * Note that we can't have empty character constants (even though we | 
| 282 | * can have empty strings :-). | 
| 283 | */ | 
| 284 | CharLiteral | 
| 285 | : | 
| 286 | '\'' (Escape | ~('\'')) '\'' | 
| 287 | ; | 
| 288 |  | 
| 289 | /* | 
| 290 | * Can't have raw imbedded newlines in string constants.  Strict reading of | 
| 291 | * the standard gives odd dichotomy between newlines & carriage returns. | 
| 292 | * Go figure. | 
| 293 | */ | 
| 294 | StringLiteral | 
| 295 | : | 
| 296 | '"' | 
| 297 | ( Escape | 
| 298 | | | 
| 299 | ( "\\\r\n"   // MS | 
| 300 | | "\\\r"     // MAC | 
| 301 | | "\\\n"     // Unix | 
| 302 | ) {deferredNewline();} | 
| 303 | | | 
| 304 | ~('"'|'\r'|'\n'|'\\') | 
| 305 | )* | 
| 306 | '"' | 
| 307 | ; | 
| 308 |  | 
| 309 | protected | 
| 310 | EndOfLine | 
| 311 | : | 
| 312 | ( options{generateAmbigWarnings = false;}: | 
| 313 | "\r\n"  // MS | 
| 314 | | '\r'    // Mac | 
| 315 | | '\n'    // Unix | 
| 316 | ) | 
| 317 | ; | 
| 318 |  | 
| 319 | /* | 
| 320 | * Handle the various escape sequences. | 
| 321 | * | 
| 322 | * Note carefully that these numeric escape *sequences* are *not* of the | 
| 323 | * same form as the C language numeric *constants*. | 
| 324 | * | 
| 325 | * There is no such thing as a binary numeric escape sequence. | 
| 326 | * | 
| 327 | * Octal escape sequences are either 1, 2, or 3 octal digits exactly. | 
| 328 | * | 
| 329 | * There is no such thing as a decimal escape sequence. | 
| 330 | * | 
| 331 | * Hexadecimal escape sequences are begun with a leading \x and continue | 
| 332 | * until a non-hexadecimal character is found. | 
| 333 | * | 
| 334 | * No real handling of tri-graph sequences, yet. | 
| 335 | */ | 
| 336 |  | 
| 337 | protected | 
| 338 | Escape | 
| 339 | : | 
| 340 | '\\' | 
| 341 | ( options{warnWhenFollowAmbig=false;}: | 
| 342 | 'a' | 
| 343 | | 'b' | 
| 344 | | 'f' | 
| 345 | | 'n' | 
| 346 | | 'r' | 
| 347 | | 't' | 
| 348 | | 'v' | 
| 349 | | '"' | 
| 350 | | '\'' | 
| 351 | | '\\' | 
| 352 | | '?' | 
| 353 | | ('0'..'3') (options{warnWhenFollowAmbig=false;}: Digit (options{warnWhenFollowAmbig=false;}: Digit)? )? | 
| 354 | | ('4'..'7') (options{warnWhenFollowAmbig=false;}: Digit)? | 
| 355 | | 'x' (options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F')+ | 
| 356 | ) | 
| 357 | ; | 
| 358 |  | 
| 359 |  | 
| 360 | protected | 
| 361 | Vocabulary | 
| 362 | : | 
| 363 | '\3'..'\377' | 
| 364 | ; | 
| 365 |  | 
| 366 |  | 
| 367 | ID | 
| 368 | options {testLiterals = true;} | 
| 369 | : | 
| 370 | ('a'..'z'|'A'..'Z'|'_') | 
| 371 | ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* | 
| 372 | ; | 
| 373 |  | 
| 374 |  | 
| 375 | protected | 
| 376 | Digit | 
| 377 | : | 
| 378 | '0'..'9' | 
| 379 | ; | 
| 380 |  | 
| 381 | protected | 
| 382 | Decimal | 
| 383 | : | 
| 384 | ('0'..'9')+ | 
| 385 | ; | 
| 386 |  | 
| 387 | // hexadecimal digit (again, note it's protected!) | 
| 388 | protected | 
| 389 | HEX_DIGIT | 
| 390 | :       ('0'..'9'|'A'..'F'|'a'..'f') | 
| 391 | ; | 
| 392 |  | 
| 393 |  | 
| 394 | // a numeric literal | 
| 395 | NUM_INT | 
| 396 | { | 
| 397 | bool isDecimal = false; | 
| 398 | ANTLR_USE_NAMESPACE(antlr)RefToken t = ANTLR_USE_NAMESPACE(antlr)nullToken; | 
| 399 | } | 
| 400 | : ('+' | '-')? | 
| 401 | ( | 
| 402 | '.' {_ttype = DOT;} | 
| 403 | (   ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})? | 
| 404 | { | 
| 405 | if ( t && | 
| 406 | (t->getText().find('f') != ANTLR_USE_NAMESPACE(std)string::npos || | 
| 407 | t->getText().find('F') != ANTLR_USE_NAMESPACE(std)string::npos ) ) { | 
| 408 | _ttype = NUM_FLOAT; | 
| 409 | } | 
| 410 | else { | 
| 411 | _ttype = NUM_DOUBLE; // assume double | 
| 412 | } | 
| 413 | } | 
| 414 | )? | 
| 415 |  | 
| 416 | |       (       '0' {isDecimal = true;} // special case for just '0' | 
| 417 | (       ('x'|'X') | 
| 418 | (                                                                                       // hex | 
| 419 | // the 'e'|'E' and float suffix stuff look | 
| 420 | // like hex digits, hence the (...)+ doesn't | 
| 421 | // know when to stop: ambig.  ANTLR resolves | 
| 422 | // it correctly by matching immediately.  It | 
| 423 | // is therefor ok to hush warning. | 
| 424 | options { | 
| 425 | warnWhenFollowAmbig=false; | 
| 426 | } | 
| 427 | :       HEX_DIGIT | 
| 428 | )+ | 
| 429 | |       //float or double with leading zero | 
| 430 | (('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) => ('0'..'9')+ | 
| 431 | |       ('0'..'7')+                                                                     // octal | 
| 432 | )? | 
| 433 | |       ('1'..'9') ('0'..'9')*  {isDecimal=true;}               // non-zero decimal | 
| 434 | ) | 
| 435 | (       ('l'|'L') { _ttype = NUM_LONG; } | 
| 436 |  | 
| 437 | // only check to see if it's a float if looks like decimal so far | 
| 438 | |       {isDecimal}? | 
| 439 | (   '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})? | 
| 440 | |   EXPONENT (f3:FLOAT_SUFFIX {t=f3;})? | 
| 441 | |   f4:FLOAT_SUFFIX {t=f4;} | 
| 442 | ) | 
| 443 | { | 
| 444 | if ( t && | 
| 445 | (t->getText().find('f') != ANTLR_USE_NAMESPACE(std)string::npos || | 
| 446 | t->getText().find('F') != ANTLR_USE_NAMESPACE(std)string::npos ) ) { | 
| 447 | _ttype = NUM_FLOAT; | 
| 448 | } | 
| 449 | else { | 
| 450 | _ttype = NUM_DOUBLE; // assume double | 
| 451 | } | 
| 452 | } | 
| 453 | )? | 
| 454 | ) | 
| 455 | ; | 
| 456 |  | 
| 457 | // a couple protected methods to assist in matching floating point numbers | 
| 458 | protected | 
| 459 | EXPONENT | 
| 460 | :       ('e'|'E'|'d'|'D') ('+'|'-')? ('0'..'9')+ | 
| 461 | ; | 
| 462 |  | 
| 463 | protected | 
| 464 | FLOAT_SUFFIX | 
| 465 | :       'f'|'F'|'d'|'D' | 
| 466 | ; |