ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-3.0/src/selection/SelectionCompiler.cpp
Revision: 1967
Committed: Thu Feb 3 23:14:05 2005 UTC (19 years, 5 months ago) by tim
File size: 20539 byte(s)
Log Message:
more work in selection library

File Contents

# Content
1 /*
2 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3 *
4 * The University of Notre Dame grants you ("Licensee") a
5 * non-exclusive, royalty free, license to use, modify and
6 * redistribute this software in source and binary code form, provided
7 * that the following conditions are met:
8 *
9 * 1. Acknowledgement of the program authors must be made in any
10 * publication of scientific results based in part on use of the
11 * program. An acceptable form of acknowledgement is citation of
12 * the article in which the program was described (Matthew
13 * A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14 * J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15 * Parallel Simulation Engine for Molecular Dynamics,"
16 * J. Comput. Chem. 26, pp. 252-271 (2005))
17 *
18 * 2. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 *
21 * 3. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the
24 * distribution.
25 *
26 * This software is provided "AS IS," without a warranty of any
27 * kind. All express or implied conditions, representations and
28 * warranties, including any implied warranty of merchantability,
29 * fitness for a particular purpose or non-infringement, are hereby
30 * excluded. The University of Notre Dame and its licensors shall not
31 * be liable for any damages suffered by licensee as a result of
32 * using, modifying or distributing the software or its
33 * derivatives. In no event will the University of Notre Dame or its
34 * licensors be liable for any lost revenue, profit or data, or for
35 * direct, indirect, special, consequential, incidental or punitive
36 * damages, however caused and regardless of the theory of liability,
37 * arising out of the use of or inability to use software, even if the
38 * University of Notre Dame has been advised of the possibility of
39 * such damages.
40 */
41
42 #include "selection/SelectionCompiler.hpp"
43 #include "utils/StringUtils.hpp"
44 namespace oopse {
45
46 bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47
48 this->filename = filename;
49 this->script = script;
50 lineNumbers.clear();
51 lineIndices.clear();
52 aatokenCompiled.clear();
53
54 if (internalCompile()) {
55 return true;
56 }
57
58 int icharEnd;
59 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60 (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61 icharEnd = script.size();
62 }
63 errorLine = script.substr(ichCurrentCommand, icharEnd);
64 return false;
65 }
66
67 bool SelectionCompiler::internalCompile(){
68
69 cchScript = script.size();
70 ichToken = 0;
71 lineCurrent = 1;
72
73 error = false;
74
75 //std::vector<Token> lltoken;
76 aatokenCompiled.clear();
77 std::vector<Token> ltoken;
78
79 Token tokenCommand;
80 int tokCommand = Token::nada;
81
82 for ( ; true; ichToken += cchToken) {
83 if (lookingAtLeadingWhitespace())
84 continue;
85 if (lookingAtComment())
86 continue;
87 bool endOfLine = lookingAtEndOfLine();
88 if (endOfLine || lookingAtEndOfStatement()) {
89 if (tokCommand != Token::nada) {
90 if (! compileCommand(ltoken)) {
91 return false;
92 }
93 aatokenCompiled.push_back(atokenCommand);
94 lineNumbers.push_back(lineCurrent);
95 lineIndices.push_back(ichCurrentCommand);
96 ltoken.clear();
97 tokCommand = Token::nada;
98 }
99
100 if (ichToken < cchScript) {
101 if (endOfLine)
102 ++lineCurrent;
103 continue;
104 }
105 break;
106 }
107
108 if (tokCommand != Token::nada) {
109 if (lookingAtString()) {
110 std::string str = getUnescapedStringLiteral();
111 ltoken.push_back(Token(Token::string, str));
112 continue;
113 }
114 //if ((tokCommand & Token::specialstring) != 0 &&
115 // lookingAtSpecialString()) {
116 // std::string str = script.substr(ichToken, ichToken + cchToken);
117 // ltoken.push_back(Token(Token::string, str));
118 // continue;
119 //}
120 if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121 float value = lexi_cast<float>(script.substr(ichToken, ichToken + cchToken));
122 ltoken.push_back(Token(Token::decimal, value));/**@todo*/
123 continue;
124 }
125 if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
126 std::string intString = script.substr(ichToken, ichToken + cchToken);
127 int val = lexi_cast<int>(intString);
128 ltoken.push_back(Token(Token::integer, val, intString));/**@todo*/
129 continue;
130 }
131 }
132
133 if (lookingAtLookupToken()) {
134 std::string ident = script.substr(ichToken, ichToken + cchToken);
135
136 Token token;
137 Token* pToken = TokenMap::getInstance()->getToken(ident);
138 if (pToken != NULL) {
139 token = *pToken;
140 } else {
141 token = Token(Token::identifier, ident);
142 }
143
144 int tok = token.tok;
145
146 switch (tokCommand) {
147 case Token::nada:
148 ichCurrentCommand = ichToken;
149 //tokenCommand = token;
150 tokCommand = tok;
151 if ((tokCommand & Token::command) == 0)
152 return commandExpected();
153 break;
154
155 case Token::define:
156 if (ltoken.size() == 1) {
157 // we are looking at the variable name
158 if (tok != Token::identifier &&
159 (tok & Token::predefinedset) != Token::predefinedset)
160 return invalidExpressionToken(ident);
161 } else {
162 // we are looking at the expression
163 if (tok != Token::identifier &&
164 (tok & (Token::expression | Token::predefinedset)) == 0)
165 return invalidExpressionToken(ident);
166 }
167
168 break;
169
170 case Token::select:
171 if (tok != Token::identifier && (tok & Token::expression) == 0)
172 return invalidExpressionToken(ident);
173 break;
174 }
175 ltoken.push_back(token);
176 continue;
177 }
178
179 if (ltoken.size() == 0) {
180 return commandExpected();
181 }
182
183 return unrecognizedToken();
184 }
185
186 return true;
187 }
188
189
190 bool SelectionCompiler::lookingAtLeadingWhitespace() {
191
192 int ichT = ichToken;
193 while (ichT < cchScript && std::isspace(script[ichT])) {
194 ++ichT;
195 }
196 cchToken = ichT - ichToken;
197 return cchToken > 0;
198 }
199
200 bool SelectionCompiler::lookingAtEndOfLine() {
201 if (ichToken == cchScript)
202 return true;
203 int ichT = ichToken;
204 char ch = script[ichT];
205 if (ch == '\r') {
206 ++ichT;
207 if (ichT < cchScript && script[ichT] == '\n')
208 ++ichT;
209 } else if (ch == '\n') {
210 ++ichT;
211 } else {
212 return false;
213 }
214 cchToken = ichT - ichToken;
215 return true;
216 }
217
218 bool SelectionCompiler::lookingAtEndOfStatement() {
219 if (ichToken == cchScript || script[ichToken] != ';')
220 return false;
221 cchToken = 1;
222 return true;
223 }
224
225 bool SelectionCompiler::lookingAtString() {
226 if (ichToken == cchScript)
227 return false;
228 if (script[ichToken] != '"')
229 return false;
230 // remove support for single quote
231 // in order to use it in atom expressions
232 // char chFirst = script.charAt(ichToken);
233 // if (chFirst != '"' && chFirst != '\'')
234 // return false;
235 int ichT = ichToken + 1;
236 // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
237 char ch;
238 bool previousCharBackslash = false;
239 while (ichT < cchScript) {
240 ch = script[ichT++];
241 if (ch == '"' && !previousCharBackslash)
242 break;
243 previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
244 }
245 cchToken = ichT - ichToken;
246 return true;
247 }
248
249
250 std::string SelectionCompiler::getUnescapedStringLiteral() {
251 /** @todo */
252 std::string sb(cchToken - 2, ' ');
253
254 int ichMax = ichToken + cchToken - 1;
255 int ich = ichToken + 1;
256
257 while (ich < ichMax) {
258 char ch = script[ich++];
259 if (ch == '\\' && ich < ichMax) {
260 ch = script[ich++];
261 switch (ch) {
262 case 'b':
263 ch = '\b';
264 break;
265 case 'n':
266 ch = '\n';
267 break;
268 case 't':
269 ch = '\t';
270 break;
271 case 'r':
272 ch = '\r';
273 // fall into
274 case '"':
275 case '\\':
276 case '\'':
277 break;
278 case 'x':
279 case 'u':
280 int digitCount = ch == 'x' ? 2 : 4;
281 if (ich < ichMax) {
282 int unicode = 0;
283 for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
284 char chT = script[ich];
285 int hexit = getHexitValue(chT);
286 if (hexit < 0)
287 break;
288 unicode <<= 4;
289 unicode += hexit;
290 ++ich;
291 }
292 ch = (char)unicode;
293 }
294 }
295 }
296 sb.append(1, ch);
297 }
298
299 return sb;
300 }
301
302 int SelectionCompiler::getHexitValue(char ch) {
303 if (ch >= '0' && ch <= '9')
304 return ch - '0';
305 else if (ch >= 'a' && ch <= 'f')
306 return 10 + ch - 'a';
307 else if (ch >= 'A' && ch <= 'F')
308 return 10 + ch - 'A';
309 else
310 return -1;
311 }
312
313 bool SelectionCompiler::lookingAtSpecialString() {
314 int ichT = ichToken;
315 char ch = script[ichT];
316 while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
317 ++ichT;
318 }
319 cchToken = ichT - ichToken;
320 return cchToken > 0;
321 }
322
323 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
324 if (ichToken == cchScript) {
325 return false;
326 }
327
328 int ichT = ichToken;
329 if (script[ichT] == '-') {
330 ++ichT;
331 }
332 bool digitSeen = false;
333 char ch = 'X';
334 while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
335 ++ichT;
336 digitSeen = true;
337 }
338
339 if (ichT == cchScript || ch != '.') {
340 return false;
341 }
342
343 // to support 1.ca, let's check the character after the dot
344 // to determine if it is an alpha
345 if (ch == '.' && (ichT + 1 < cchScript) && std::isalpha(script[ichT + 1])) {
346 return false;
347 }
348
349 ++ichT;
350 while (ichT < cchScript && std::isdigit(script[ichT])) {
351 ++ichT;
352 digitSeen = true;
353 }
354 cchToken = ichT - ichToken;
355 return digitSeen;
356 }
357
358 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
359 if (ichToken == cchScript) {
360 return false;
361 }
362 int ichT = ichToken;
363 if (allowNegative && script[ichToken] == '-') {
364 ++ichT;
365 }
366 int ichBeginDigits = ichT;
367 while (ichT < cchScript && std::isdigit(script[ichT])) {
368 ++ichT;
369 }
370 if (ichBeginDigits == ichT) {
371 return false;
372 }
373 cchToken = ichT - ichToken;
374 return true;
375 }
376
377 bool SelectionCompiler::lookingAtLookupToken() {
378 if (ichToken == cchScript) {
379 return false;
380 }
381
382 int ichT = ichToken;
383 char ch;
384 switch (ch = script[ichT++]) {
385 case '(':
386 case ')':
387 case ',':
388 case '*':
389 case '-':
390 case '[':
391 case ']':
392 case '+':
393 case ':':
394 case '@':
395 case '.':
396 case '%':
397 break;
398 case '&':
399 case '|':
400 if (ichT < cchScript && script[ichT] == ch) {
401 ++ichT;
402 }
403 break;
404 case '<':
405 case '=':
406 case '>':
407 if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
408 ++ichT;
409 }
410 break;
411 case '/':
412 case '!':
413 if (ichT < cchScript && script[ichT] == '=') {
414 ++ichT;
415 }
416 break;
417 default:
418 if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
419 return false;
420 }
421 case '?': // include question marks in identifier for atom expressions
422 while (ichT < cchScript && (std::isalpha(ch = script[ichT]) ||std::isdigit(ch) ||
423 ch == '_' || ch == '?') ||(ch == '^' && ichT > ichToken && std::isdigit(script[ichT - 1]))) {
424 // hack for insertion codes embedded in an atom expression :-(
425 // select c3^a
426 ++ichT;
427 }
428 break;
429 }
430 cchToken = ichT - ichToken;
431 return true;
432 }
433
434 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
435 const Token& tokenCommand = ltoken[0];
436 int tokCommand = tokenCommand.tok;
437
438 atokenCommand = ltoken;
439 if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
440 return false;
441 }
442
443 return true;
444 }
445
446 bool SelectionCompiler::compileExpression() {
447 /** todo */
448 int i = 1;
449 int tokCommand = atokenCommand[0].tok;
450 if (tokCommand == Token::define) {
451 i = 2;
452 } else if ((tokCommand & Token::embeddedExpression) != 0) {
453 // look for the open parenthesis
454 while (i < atokenCommand.size() &&
455 atokenCommand[i].tok != Token::leftparen)
456 ++i;
457 }
458
459 if (i >= atokenCommand.size()) {
460 return true;
461 }
462 return compileExpression(i);
463 }
464
465
466 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
467 ltokenPostfix.push_back(token);
468 return true;
469 }
470
471 bool SelectionCompiler::compileExpression(int itoken) {
472 ltokenPostfix.clear();
473 for (int i = 0; i < itoken; ++i) {
474 addTokenToPostfix(atokenCommand[i]);
475 }
476
477 atokenInfix = atokenCommand;
478 itokenInfix = itoken;
479
480 addTokenToPostfix(Token::tokenExpressionBegin);
481 if (!clauseOr()) {
482 return false;
483 }
484
485 addTokenToPostfix(Token::tokenExpressionEnd);
486 if (itokenInfix != atokenInfix.size()) {
487 return endOfExpressionExpected();
488 }
489
490 atokenCommand = ltokenPostfix;
491 return true;
492 }
493
494 Token SelectionCompiler::tokenNext() {
495 if (itokenInfix == atokenInfix.size()) {
496 return Token();
497 }
498 return atokenInfix[itokenInfix++];
499 }
500
501 boost::any SelectionCompiler::valuePeek() {
502 if (itokenInfix == atokenInfix.size()) {
503 return boost::any();
504 } else {
505 return atokenInfix[itokenInfix].value;
506 }
507 }
508
509 int SelectionCompiler::tokPeek() {
510 if (itokenInfix == atokenInfix.size()) {
511 return 0;
512 }else {
513 return atokenInfix[itokenInfix].tok;
514 }
515 }
516
517 bool SelectionCompiler::clauseOr() {
518 if (!clauseAnd()) {
519 return false;
520 }
521
522 while (tokPeek() == Token::opOr) {
523 Token tokenOr = tokenNext();
524 if (!clauseAnd()) {
525 return false;
526 }
527 addTokenToPostfix(tokenOr);
528 }
529 return true;
530 }
531
532 bool SelectionCompiler::clauseAnd() {
533 if (!clauseNot()) {
534 return false;
535 }
536
537 while (tokPeek() == Token::opAnd) {
538 Token tokenAnd = tokenNext();
539 if (!clauseNot()) {
540 return false;
541 }
542 addTokenToPostfix(tokenAnd);
543 }
544 return true;
545 }
546
547 bool SelectionCompiler::clauseNot() {
548 if (tokPeek() == Token::opNot) {
549 Token tokenNot = tokenNext();
550 if (!clauseNot()) {
551 return false;
552 }
553 return addTokenToPostfix(tokenNot);
554 }
555 return clausePrimitive();
556 }
557
558 bool SelectionCompiler::clausePrimitive() {
559 int tok = tokPeek();
560 switch (tok) {
561 case Token::within:
562 return clauseWithin();
563
564 case Token::asterisk:
565 case Token::identifier:
566 return clauseChemObjName();
567
568 default:
569 if ((tok & Token::atomproperty) == Token::atomproperty) {
570 return clauseComparator();
571 }
572 if ((tok & Token::predefinedset) != Token::predefinedset) {
573 break;
574 }
575 // fall into the code and below and just add the token
576 case Token::all:
577 case Token::none:
578 return addTokenToPostfix(tokenNext());
579 case Token::leftparen:
580 tokenNext();
581 if (!clauseOr()) {
582 return false;
583 }
584 if (tokenNext().tok != Token::rightparen) {
585 return rightParenthesisExpected();
586 }
587 return true;
588 }
589 return unrecognizedExpressionToken();
590 }
591
592 bool SelectionCompiler::clauseComparator() {
593 Token tokenAtomProperty = tokenNext();
594 Token tokenComparator = tokenNext();
595 if ((tokenComparator.tok & Token::comparator) == 0) {
596 return comparisonOperatorExpected();
597 }
598
599 Token tokenValue = tokenNext();
600 if (tokenValue.tok != Token::integer) {
601 return integerExpected();
602 }
603 int val = tokenValue.intValue;
604 // note that a comparator instruction is a complicated instruction
605 // int intValue is the tok of the property you are comparing
606 // the value against which you are comparing is stored as an Integer
607 // in the object value
608 return addTokenToPostfix(Token(tokenComparator.tok,
609 tokenAtomProperty.tok, boost::any(val)));
610 }
611
612 bool SelectionCompiler::clauseWithin() {
613 tokenNext(); // WITHIN
614 if (tokenNext().tok != Token::leftparen) { // (
615 return leftParenthesisExpected();
616 }
617
618 boost::any distance;
619 Token tokenDistance = tokenNext(); // distance
620 switch(tokenDistance.tok) {
621 case Token::integer:
622 distance = float(tokenDistance.intValue);
623 break;
624 case Token::decimal:
625 distance = tokenDistance.value;
626 break;
627 default:
628 return numberOrKeywordExpected();
629 }
630
631 if (tokenNext().tok != Token::opOr) { // ,
632 return commaExpected();
633 }
634
635 if (! clauseOr()) { // *expression*
636 return false;
637 }
638
639 if (tokenNext().tok != Token::rightparen) { // )T
640 return rightParenthesisExpected();
641 }
642
643 return addTokenToPostfix(Token(Token::within, distance));
644 }
645
646 bool SelectionCompiler::clauseChemObjName() {
647 std::string chemObjName;
648 int tok = tokPeek();
649 if (!clauseName(chemObjName)){
650 return false;
651 }
652
653
654 tok = tokPeek();
655 //allow two dot at most
656 if (tok == Token::dot) {
657 if (!clauseName(chemObjName)) {
658 return false;
659 }
660 tok = tokPeek();
661 if (tok == Token::dot) {
662 if (!clauseName(chemObjName)) {
663 return false;
664 }
665 }
666 }
667
668 return addTokenToPostfix(Token(Token::name, chemObjName));
669 }
670
671 bool SelectionCompiler:: clauseName(std::string& name) {
672
673 int tok = tokPeek();
674
675 if (tok == Token::asterisk || tok == Token::identifier) {
676 name += boost::any_cast<std::string>(tokenNext().value);
677
678 while(true){
679 tok = tokPeek();
680 switch (tok) {
681 case Token::asterisk :
682 name += "*";
683 tokenNext();
684 break;
685 case Token::identifier :
686 name += boost::any_cast<std::string>(tokenNext().value);
687 break;
688 case Token::integer :
689 name += toString(boost::any_cast<int>(tokenNext().value));
690 break;
691 case Token::dot :
692 return true;
693 default :
694 return true;
695 }
696 }
697
698 }else {
699 return false;
700 }
701
702 }
703
704
705 }

Properties

Name Value
svn:executable *