ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-2.0/src/selection/SelectionCompiler.cpp
Revision: 2147
Committed: Tue Apr 5 23:09:48 2005 UTC (19 years, 3 months ago) by tim
File size: 20736 byte(s)
Log Message:
support '+' and '-' in atom type name;clean the code a little bit

File Contents

# Content
1 /*
2 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3 *
4 * The University of Notre Dame grants you ("Licensee") a
5 * non-exclusive, royalty free, license to use, modify and
6 * redistribute this software in source and binary code form, provided
7 * that the following conditions are met:
8 *
9 * 1. Acknowledgement of the program authors must be made in any
10 * publication of scientific results based in part on use of the
11 * program. An acceptable form of acknowledgement is citation of
12 * the article in which the program was described (Matthew
13 * A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14 * J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15 * Parallel Simulation Engine for Molecular Dynamics,"
16 * J. Comput. Chem. 26, pp. 252-271 (2005))
17 *
18 * 2. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 *
21 * 3. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the
24 * distribution.
25 *
26 * This software is provided "AS IS," without a warranty of any
27 * kind. All express or implied conditions, representations and
28 * warranties, including any implied warranty of merchantability,
29 * fitness for a particular purpose or non-infringement, are hereby
30 * excluded. The University of Notre Dame and its licensors shall not
31 * be liable for any damages suffered by licensee as a result of
32 * using, modifying or distributing the software or its
33 * derivatives. In no event will the University of Notre Dame or its
34 * licensors be liable for any lost revenue, profit or data, or for
35 * direct, indirect, special, consequential, incidental or punitive
36 * damages, however caused and regardless of the theory of liability,
37 * arising out of the use of or inability to use software, even if the
38 * University of Notre Dame has been advised of the possibility of
39 * such damages.
40 */
41
42 #include "selection/SelectionCompiler.hpp"
43 #include "utils/StringUtils.hpp"
44 namespace oopse {
45
46 bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47
48 this->filename = filename;
49 this->script = script;
50 lineNumbers.clear();
51 lineIndices.clear();
52 aatokenCompiled.clear();
53
54 if (internalCompile()) {
55 return true;
56 }
57
58 int icharEnd;
59 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60 (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61 icharEnd = script.size();
62 }
63 errorLine = script.substr(ichCurrentCommand, icharEnd);
64 return false;
65 }
66
67 bool SelectionCompiler::internalCompile(){
68
69 cchScript = script.size();
70 ichToken = 0;
71 lineCurrent = 1;
72
73 error = false;
74
75 //std::vector<Token> lltoken;
76 aatokenCompiled.clear();
77 std::vector<Token> ltoken;
78
79 Token tokenCommand;
80 int tokCommand = Token::nada;
81
82 for ( ; true; ichToken += cchToken) {
83 if (lookingAtLeadingWhitespace())
84 continue;
85 //if (lookingAtComment())
86 // continue;
87 bool endOfLine = lookingAtEndOfLine();
88 if (endOfLine || lookingAtEndOfStatement()) {
89 if (tokCommand != Token::nada) {
90 if (! compileCommand(ltoken)) {
91 return false;
92 }
93 aatokenCompiled.push_back(atokenCommand);
94 lineNumbers.push_back(lineCurrent);
95 lineIndices.push_back(ichCurrentCommand);
96 ltoken.clear();
97 tokCommand = Token::nada;
98 }
99
100 if (ichToken < cchScript) {
101 if (endOfLine)
102 ++lineCurrent;
103 continue;
104 }
105 break;
106 }
107
108 if (tokCommand != Token::nada) {
109 if (lookingAtString()) {
110 std::string str = getUnescapedStringLiteral();
111 ltoken.push_back(Token(Token::string, str));
112 continue;
113 }
114 //if ((tokCommand & Token::specialstring) != 0 &&
115 // lookingAtSpecialString()) {
116 // std::string str = script.substr(ichToken, ichToken + cchToken);
117 // ltoken.push_back(Token(Token::string, str));
118 // continue;
119 //}
120 if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121 float value = lexi_cast<float>(script.substr(ichToken, cchToken));
122 ltoken.push_back(Token(Token::decimal, boost::any(value)));
123 continue;
124 }
125 if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
126
127 int val = lexi_cast<int>(script.substr(ichToken, cchToken));
128 ltoken.push_back(Token(Token::integer, boost::any(val)));
129 continue;
130 }
131 }
132
133 if (lookingAtLookupToken()) {
134 std::string ident = script.substr(ichToken, cchToken);
135 Token token;
136 Token* pToken = TokenMap::getInstance()->getToken(ident);
137 if (pToken != NULL) {
138 token = *pToken;
139 } else {
140 token = Token(Token::identifier, ident);
141 }
142
143 int tok = token.tok;
144
145 switch (tokCommand) {
146 case Token::nada:
147 ichCurrentCommand = ichToken;
148 //tokenCommand = token;
149 tokCommand = tok;
150 if ((tokCommand & Token::command) == 0)
151 return commandExpected();
152 break;
153
154 case Token::define:
155 if (ltoken.size() == 1) {
156 // we are looking at the variable name
157 if (tok != Token::identifier &&
158 (tok & Token::predefinedset) != Token::predefinedset)
159 return invalidExpressionToken(ident);
160 } else {
161 // we are looking at the expression
162 if (tok != Token::identifier &&
163 (tok & (Token::expression | Token::predefinedset)) == 0)
164 return invalidExpressionToken(ident);
165 }
166
167 break;
168
169 case Token::select:
170 if (tok != Token::identifier && (tok & Token::expression) == 0)
171 return invalidExpressionToken(ident);
172 break;
173 }
174 ltoken.push_back(token);
175 continue;
176 }
177
178 if (ltoken.size() == 0) {
179 return commandExpected();
180 }
181
182 return unrecognizedToken();
183 }
184
185 return true;
186 }
187
188
189 bool SelectionCompiler::lookingAtLeadingWhitespace() {
190
191 int ichT = ichToken;
192 while (ichT < cchScript && std::isspace(script[ichT])) {
193 ++ichT;
194 }
195 cchToken = ichT - ichToken;
196 return cchToken > 0;
197 }
198
199 bool SelectionCompiler::lookingAtEndOfLine() {
200 if (ichToken == cchScript)
201 return true;
202 int ichT = ichToken;
203 char ch = script[ichT];
204 if (ch == '\r') {
205 ++ichT;
206 if (ichT < cchScript && script[ichT] == '\n')
207 ++ichT;
208 } else if (ch == '\n') {
209 ++ichT;
210 } else {
211 return false;
212 }
213 cchToken = ichT - ichToken;
214 return true;
215 }
216
217 bool SelectionCompiler::lookingAtEndOfStatement() {
218 if (ichToken == cchScript || script[ichToken] != ';')
219 return false;
220 cchToken = 1;
221 return true;
222 }
223
224 bool SelectionCompiler::lookingAtString() {
225 if (ichToken == cchScript)
226 return false;
227 if (script[ichToken] != '"')
228 return false;
229 // remove support for single quote
230 // in order to use it in atom expressions
231 // char chFirst = script.charAt(ichToken);
232 // if (chFirst != '"' && chFirst != '\'')
233 // return false;
234 int ichT = ichToken + 1;
235 // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
236 char ch;
237 bool previousCharBackslash = false;
238 while (ichT < cchScript) {
239 ch = script[ichT++];
240 if (ch == '"' && !previousCharBackslash)
241 break;
242 previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
243 }
244 cchToken = ichT - ichToken;
245
246 return true;
247 }
248
249
250 std::string SelectionCompiler::getUnescapedStringLiteral() {
251 /** @todo */
252 std::string sb(cchToken - 2, ' ');
253
254 int ichMax = ichToken + cchToken - 1;
255 int ich = ichToken + 1;
256
257 while (ich < ichMax) {
258 char ch = script[ich++];
259 if (ch == '\\' && ich < ichMax) {
260 ch = script[ich++];
261 switch (ch) {
262 case 'b':
263 ch = '\b';
264 break;
265 case 'n':
266 ch = '\n';
267 break;
268 case 't':
269 ch = '\t';
270 break;
271 case 'r':
272 ch = '\r';
273 // fall into
274 case '"':
275 case '\\':
276 case '\'':
277 break;
278 case 'x':
279 case 'u':
280 int digitCount = ch == 'x' ? 2 : 4;
281 if (ich < ichMax) {
282 int unicode = 0;
283 for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
284 char chT = script[ich];
285 int hexit = getHexitValue(chT);
286 if (hexit < 0)
287 break;
288 unicode <<= 4;
289 unicode += hexit;
290 ++ich;
291 }
292 ch = (char)unicode;
293 }
294 }
295 }
296 sb.append(1, ch);
297 }
298
299 return sb;
300 }
301
302 int SelectionCompiler::getHexitValue(char ch) {
303 if (ch >= '0' && ch <= '9')
304 return ch - '0';
305 else if (ch >= 'a' && ch <= 'f')
306 return 10 + ch - 'a';
307 else if (ch >= 'A' && ch <= 'F')
308 return 10 + ch - 'A';
309 else
310 return -1;
311 }
312
313 bool SelectionCompiler::lookingAtSpecialString() {
314 int ichT = ichToken;
315 char ch = script[ichT];
316 while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
317 ++ichT;
318 }
319 cchToken = ichT - ichToken;
320 return cchToken > 0;
321 }
322
323 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
324 if (ichToken == cchScript) {
325 return false;
326 }
327
328 int ichT = ichToken;
329 if (script[ichT] == '-') {
330 ++ichT;
331 }
332 bool digitSeen = false;
333 char ch = 'X';
334 while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
335 ++ichT;
336 digitSeen = true;
337 }
338
339 if (ichT == cchScript || ch != '.') {
340 return false;
341 }
342
343 // to support DMPC.1, let's check the character before the dot
344 if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
345 return false;
346 }
347
348 ++ichT;
349 while (ichT < cchScript && std::isdigit(script[ichT])) {
350 ++ichT;
351 digitSeen = true;
352 }
353 cchToken = ichT - ichToken;
354 return digitSeen;
355 }
356
357 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
358 if (ichToken == cchScript) {
359 return false;
360 }
361 int ichT = ichToken;
362 if (allowNegative && script[ichToken] == '-') {
363 ++ichT;
364 }
365 int ichBeginDigits = ichT;
366 while (ichT < cchScript && std::isdigit(script[ichT])) {
367 ++ichT;
368 }
369 if (ichBeginDigits == ichT) {
370 return false;
371 }
372 cchToken = ichT - ichToken;
373 return true;
374 }
375
376 bool SelectionCompiler::lookingAtLookupToken() {
377 if (ichToken == cchScript) {
378 return false;
379 }
380
381 int ichT = ichToken;
382 char ch;
383 switch (ch = script[ichT++]) {
384 case '(':
385 case ')':
386 case ',':
387 case '[':
388 case ']':
389 break;
390 case '&':
391 case '|':
392 if (ichT < cchScript && script[ichT] == ch) {
393 ++ichT;
394 }
395 break;
396 case '<':
397 case '=':
398 case '>':
399 if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
400 ++ichT;
401 }
402 break;
403 case '/':
404 case '!':
405 if (ichT < cchScript && script[ichT] == '=') {
406 ++ichT;
407 }
408 break;
409 default:
410 if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
411 return false;
412 }
413 case '*':
414 case '?': // include question marks in identifier for atom expressions
415 while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
416 (std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){
417
418 ++ichT;
419 }
420 break;
421 }
422
423 cchToken = ichT - ichToken;
424
425 return true;
426 }
427
428 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
429 const Token& tokenCommand = ltoken[0];
430 int tokCommand = tokenCommand.tok;
431
432 atokenCommand = ltoken;
433 if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
434 return false;
435 }
436
437 return true;
438 }
439
440 bool SelectionCompiler::compileExpression() {
441 /** todo */
442 int i = 1;
443 int tokCommand = atokenCommand[0].tok;
444 if (tokCommand == Token::define) {
445 i = 2;
446 } else if ((tokCommand & Token::embeddedExpression) != 0) {
447 // look for the open parenthesis
448 while (i < atokenCommand.size() &&
449 atokenCommand[i].tok != Token::leftparen)
450 ++i;
451 }
452
453 if (i >= atokenCommand.size()) {
454 return true;
455 }
456 return compileExpression(i);
457 }
458
459
460 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
461 ltokenPostfix.push_back(token);
462 return true;
463 }
464
465 bool SelectionCompiler::compileExpression(int itoken) {
466 ltokenPostfix.clear();
467 for (int i = 0; i < itoken; ++i) {
468 addTokenToPostfix(atokenCommand[i]);
469 }
470
471 atokenInfix = atokenCommand;
472 itokenInfix = itoken;
473
474 addTokenToPostfix(Token::tokenExpressionBegin);
475 if (!clauseOr()) {
476 return false;
477 }
478
479 addTokenToPostfix(Token::tokenExpressionEnd);
480 if (itokenInfix != atokenInfix.size()) {
481 return endOfExpressionExpected();
482 }
483
484 atokenCommand = ltokenPostfix;
485 return true;
486 }
487
488 Token SelectionCompiler::tokenNext() {
489 if (itokenInfix == atokenInfix.size()) {
490 return Token();
491 }
492 return atokenInfix[itokenInfix++];
493 }
494
495 boost::any SelectionCompiler::valuePeek() {
496 if (itokenInfix == atokenInfix.size()) {
497 return boost::any();
498 } else {
499 return atokenInfix[itokenInfix].value;
500 }
501 }
502
503 int SelectionCompiler::tokPeek() {
504 if (itokenInfix == atokenInfix.size()) {
505 return 0;
506 }else {
507 return atokenInfix[itokenInfix].tok;
508 }
509 }
510
511 bool SelectionCompiler::clauseOr() {
512 if (!clauseAnd()) {
513 return false;
514 }
515
516 while (tokPeek() == Token::opOr) {
517 Token tokenOr = tokenNext();
518 if (!clauseAnd()) {
519 return false;
520 }
521 addTokenToPostfix(tokenOr);
522 }
523 return true;
524 }
525
526 bool SelectionCompiler::clauseAnd() {
527 if (!clauseNot()) {
528 return false;
529 }
530
531 while (tokPeek() == Token::opAnd) {
532 Token tokenAnd = tokenNext();
533 if (!clauseNot()) {
534 return false;
535 }
536 addTokenToPostfix(tokenAnd);
537 }
538 return true;
539 }
540
541 bool SelectionCompiler::clauseNot() {
542 if (tokPeek() == Token::opNot) {
543 Token tokenNot = tokenNext();
544 if (!clauseNot()) {
545 return false;
546 }
547 return addTokenToPostfix(tokenNot);
548 }
549 return clausePrimitive();
550 }
551
552 bool SelectionCompiler::clausePrimitive() {
553 int tok = tokPeek();
554 switch (tok) {
555 case Token::within:
556 return clauseWithin();
557
558 case Token::asterisk:
559 case Token::identifier:
560 return clauseChemObjName();
561
562 case Token::integer :
563 return clauseIndex();
564 default:
565 if ((tok & Token::atomproperty) == Token::atomproperty) {
566 return clauseComparator();
567 }
568 if ((tok & Token::predefinedset) != Token::predefinedset) {
569 break;
570 }
571 // fall into the code and below and just add the token
572 case Token::all:
573 case Token::none:
574 return addTokenToPostfix(tokenNext());
575 case Token::leftparen:
576 tokenNext();
577 if (!clauseOr()) {
578 return false;
579 }
580 if (tokenNext().tok != Token::rightparen) {
581 return rightParenthesisExpected();
582 }
583 return true;
584 }
585 return unrecognizedExpressionToken();
586 }
587
588 bool SelectionCompiler::clauseComparator() {
589 Token tokenAtomProperty = tokenNext();
590 Token tokenComparator = tokenNext();
591 if ((tokenComparator.tok & Token::comparator) == 0) {
592 return comparisonOperatorExpected();
593 }
594
595 Token tokenValue = tokenNext();
596 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
597 return numberExpected();
598 }
599
600 float val;
601 if (tokenValue.value.type() == typeid(int)) {
602 val = boost::any_cast<int>(tokenValue.value);
603 } else if (tokenValue.value.type() == typeid(float)) {
604 val = boost::any_cast<float>(tokenValue.value);
605 } else {
606 return false;
607 }
608
609 boost::any floatVal;
610 floatVal = val;
611 return addTokenToPostfix(Token(tokenComparator.tok,
612 tokenAtomProperty.tok, floatVal));
613 }
614
615 bool SelectionCompiler::clauseWithin() {
616 tokenNext(); // WITHIN
617 if (tokenNext().tok != Token::leftparen) { // (
618 return leftParenthesisExpected();
619 }
620
621 boost::any distance;
622 Token tokenDistance = tokenNext(); // distance
623 switch(tokenDistance.tok) {
624 case Token::integer:
625 case Token::decimal:
626 distance = tokenDistance.value;
627 break;
628 default:
629 return numberOrKeywordExpected();
630 }
631
632 if (tokenNext().tok != Token::opOr) { // ,
633 return commaExpected();
634 }
635
636 if (! clauseOr()) { // *expression*
637 return false;
638 }
639
640 if (tokenNext().tok != Token::rightparen) { // )T
641 return rightParenthesisExpected();
642 }
643
644 return addTokenToPostfix(Token(Token::within, distance));
645 }
646
647 bool SelectionCompiler::clauseChemObjName() {
648 Token token = tokenNext();
649 if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {
650
651 std::string name = boost::any_cast<std::string>(token.value);
652 if (isNameValid(name)) {
653 return addTokenToPostfix(Token(Token::name, name));
654 } else {
655 return compileError("invalid name: " + name);
656 }
657 }
658
659 return false;
660
661 }
662
663 bool SelectionCompiler::isNameValid(const std::string& name) {
664 int nbracket = 0;
665 int ndot = 0;
666 for (int i =0 ; i < name.size(); ++i) {
667 switch(name[i]) {
668
669 case '[' :
670 ++nbracket;
671 break;
672 case ']' :
673 --nbracket;
674 break;
675 case '.' :
676 ++ndot;
677 break;
678 }
679 }
680
681 //only allow 3 dots at most
682 return (ndot <=3 && nbracket == 0) ? true : false;
683 }
684
685 bool SelectionCompiler::clauseIndex(){
686 Token token = tokenNext();
687 if (token.tok == Token::integer) {
688 int index = boost::any_cast<int>(token.value);
689 int tok = tokPeek();
690 std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
691 if (tok == Token::to) {
692 tokenNext();
693 tok = tokPeek();
694 if (tok != Token::integer) {
695 return numberExpected();
696 }
697
698 boost::any intVal = tokenNext().value;
699 int first = index;
700 if (intVal.type() != typeid(int)){
701 return false;
702 }
703 int second = boost::any_cast<int>(intVal);
704
705 return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
706
707 }else {
708 return addTokenToPostfix(Token(Token::index, boost::any(index)));
709 }
710 } else {
711 return numberExpected();
712 }
713 }
714
715 }

Properties

Name Value
svn:executable *