ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/selection/SelectionCompiler.cpp
Revision: 2147
Committed: Tue Apr 5 23:09:48 2005 UTC (19 years, 3 months ago) by tim
File size: 20736 byte(s)
Log Message:
support '+' and '-' in atom type name;clean the code a little bit

File Contents

# User Rev Content
1 tim 1963 /*
2     * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3     *
4     * The University of Notre Dame grants you ("Licensee") a
5     * non-exclusive, royalty free, license to use, modify and
6     * redistribute this software in source and binary code form, provided
7     * that the following conditions are met:
8     *
9     * 1. Acknowledgement of the program authors must be made in any
10     * publication of scientific results based in part on use of the
11     * program. An acceptable form of acknowledgement is citation of
12     * the article in which the program was described (Matthew
13     * A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14     * J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15     * Parallel Simulation Engine for Molecular Dynamics,"
16     * J. Comput. Chem. 26, pp. 252-271 (2005))
17     *
18     * 2. Redistributions of source code must retain the above copyright
19     * notice, this list of conditions and the following disclaimer.
20     *
21     * 3. Redistributions in binary form must reproduce the above copyright
22     * notice, this list of conditions and the following disclaimer in the
23     * documentation and/or other materials provided with the
24     * distribution.
25     *
26     * This software is provided "AS IS," without a warranty of any
27     * kind. All express or implied conditions, representations and
28     * warranties, including any implied warranty of merchantability,
29     * fitness for a particular purpose or non-infringement, are hereby
30     * excluded. The University of Notre Dame and its licensors shall not
31     * be liable for any damages suffered by licensee as a result of
32     * using, modifying or distributing the software or its
33     * derivatives. In no event will the University of Notre Dame or its
34     * licensors be liable for any lost revenue, profit or data, or for
35     * direct, indirect, special, consequential, incidental or punitive
36     * damages, however caused and regardless of the theory of liability,
37     * arising out of the use of or inability to use software, even if the
38     * University of Notre Dame has been advised of the possibility of
39     * such damages.
40     */
41    
42     #include "selection/SelectionCompiler.hpp"
43 tim 1965 #include "utils/StringUtils.hpp"
44 tim 1963 namespace oopse {
45    
46     bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47    
48     this->filename = filename;
49     this->script = script;
50     lineNumbers.clear();
51     lineIndices.clear();
52     aatokenCompiled.clear();
53    
54 tim 1965 if (internalCompile()) {
55 tim 1963 return true;
56     }
57    
58     int icharEnd;
59 tim 1965 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60     (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61 tim 1963 icharEnd = script.size();
62     }
63     errorLine = script.substr(ichCurrentCommand, icharEnd);
64     return false;
65     }
66    
67     bool SelectionCompiler::internalCompile(){
68    
69     cchScript = script.size();
70     ichToken = 0;
71     lineCurrent = 1;
72    
73     error = false;
74    
75 tim 1965 //std::vector<Token> lltoken;
76     aatokenCompiled.clear();
77 tim 1963 std::vector<Token> ltoken;
78    
79 tim 1965 Token tokenCommand;
80     int tokCommand = Token::nada;
81 tim 1963
82     for ( ; true; ichToken += cchToken) {
83     if (lookingAtLeadingWhitespace())
84     continue;
85 tim 1972 //if (lookingAtComment())
86     // continue;
87 tim 1965 bool endOfLine = lookingAtEndOfLine();
88 tim 1963 if (endOfLine || lookingAtEndOfStatement()) {
89 tim 1965 if (tokCommand != Token::nada) {
90 tim 1963 if (! compileCommand(ltoken)) {
91     return false;
92     }
93 tim 1965 aatokenCompiled.push_back(atokenCommand);
94     lineNumbers.push_back(lineCurrent);
95     lineIndices.push_back(ichCurrentCommand);
96     ltoken.clear();
97     tokCommand = Token::nada;
98 tim 1963 }
99    
100     if (ichToken < cchScript) {
101     if (endOfLine)
102     ++lineCurrent;
103     continue;
104     }
105     break;
106     }
107    
108 tim 1965 if (tokCommand != Token::nada) {
109 tim 1963 if (lookingAtString()) {
110     std::string str = getUnescapedStringLiteral();
111 tim 1965 ltoken.push_back(Token(Token::string, str));
112 tim 1963 continue;
113     }
114 tim 1965 //if ((tokCommand & Token::specialstring) != 0 &&
115     // lookingAtSpecialString()) {
116     // std::string str = script.substr(ichToken, ichToken + cchToken);
117     // ltoken.push_back(Token(Token::string, str));
118     // continue;
119     //}
120     if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121 tim 1979 float value = lexi_cast<float>(script.substr(ichToken, cchToken));
122     ltoken.push_back(Token(Token::decimal, boost::any(value)));
123 tim 1963 continue;
124     }
125 tim 1965 if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
126 tim 1979
127     int val = lexi_cast<int>(script.substr(ichToken, cchToken));
128     ltoken.push_back(Token(Token::integer, boost::any(val)));
129 tim 1963 continue;
130     }
131     }
132    
133     if (lookingAtLookupToken()) {
134 tim 1972 std::string ident = script.substr(ichToken, cchToken);
135 tim 1965 Token token;
136     Token* pToken = TokenMap::getInstance()->getToken(ident);
137     if (pToken != NULL) {
138     token = *pToken;
139     } else {
140     token = Token(Token::identifier, ident);
141 tim 1963 }
142    
143     int tok = token.tok;
144    
145     switch (tokCommand) {
146 tim 1965 case Token::nada:
147 tim 1963 ichCurrentCommand = ichToken;
148     //tokenCommand = token;
149     tokCommand = tok;
150 tim 1965 if ((tokCommand & Token::command) == 0)
151 tim 1963 return commandExpected();
152     break;
153    
154 tim 1965 case Token::define:
155 tim 1963 if (ltoken.size() == 1) {
156     // we are looking at the variable name
157 tim 1965 if (tok != Token::identifier &&
158     (tok & Token::predefinedset) != Token::predefinedset)
159 tim 1963 return invalidExpressionToken(ident);
160     } else {
161     // we are looking at the expression
162 tim 1965 if (tok != Token::identifier &&
163     (tok & (Token::expression | Token::predefinedset)) == 0)
164 tim 1963 return invalidExpressionToken(ident);
165     }
166    
167     break;
168    
169 tim 1965 case Token::select:
170     if (tok != Token::identifier && (tok & Token::expression) == 0)
171 tim 1963 return invalidExpressionToken(ident);
172     break;
173     }
174     ltoken.push_back(token);
175     continue;
176     }
177    
178     if (ltoken.size() == 0) {
179     return commandExpected();
180     }
181    
182     return unrecognizedToken();
183     }
184    
185     return true;
186     }
187    
188    
189     bool SelectionCompiler::lookingAtLeadingWhitespace() {
190    
191     int ichT = ichToken;
192     while (ichT < cchScript && std::isspace(script[ichT])) {
193     ++ichT;
194     }
195     cchToken = ichT - ichToken;
196     return cchToken > 0;
197     }
198    
199     bool SelectionCompiler::lookingAtEndOfLine() {
200     if (ichToken == cchScript)
201     return true;
202     int ichT = ichToken;
203     char ch = script[ichT];
204     if (ch == '\r') {
205     ++ichT;
206     if (ichT < cchScript && script[ichT] == '\n')
207     ++ichT;
208     } else if (ch == '\n') {
209     ++ichT;
210     } else {
211     return false;
212     }
213     cchToken = ichT - ichToken;
214     return true;
215     }
216    
217     bool SelectionCompiler::lookingAtEndOfStatement() {
218     if (ichToken == cchScript || script[ichToken] != ';')
219     return false;
220     cchToken = 1;
221     return true;
222     }
223    
224     bool SelectionCompiler::lookingAtString() {
225     if (ichToken == cchScript)
226     return false;
227     if (script[ichToken] != '"')
228     return false;
229     // remove support for single quote
230     // in order to use it in atom expressions
231     // char chFirst = script.charAt(ichToken);
232     // if (chFirst != '"' && chFirst != '\'')
233     // return false;
234     int ichT = ichToken + 1;
235     // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
236     char ch;
237 tim 1965 bool previousCharBackslash = false;
238 tim 1963 while (ichT < cchScript) {
239 tim 1965 ch = script[ichT++];
240 tim 1963 if (ch == '"' && !previousCharBackslash)
241     break;
242     previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
243     }
244     cchToken = ichT - ichToken;
245 tim 1979
246 tim 1963 return true;
247     }
248    
249    
250     std::string SelectionCompiler::getUnescapedStringLiteral() {
251 tim 1965 /** @todo */
252     std::string sb(cchToken - 2, ' ');
253    
254 tim 1963 int ichMax = ichToken + cchToken - 1;
255     int ich = ichToken + 1;
256    
257     while (ich < ichMax) {
258     char ch = script[ich++];
259     if (ch == '\\' && ich < ichMax) {
260     ch = script[ich++];
261     switch (ch) {
262     case 'b':
263     ch = '\b';
264     break;
265     case 'n':
266     ch = '\n';
267     break;
268     case 't':
269     ch = '\t';
270     break;
271     case 'r':
272     ch = '\r';
273     // fall into
274     case '"':
275     case '\\':
276     case '\'':
277     break;
278     case 'x':
279     case 'u':
280     int digitCount = ch == 'x' ? 2 : 4;
281     if (ich < ichMax) {
282     int unicode = 0;
283     for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
284     char chT = script[ich];
285     int hexit = getHexitValue(chT);
286     if (hexit < 0)
287     break;
288     unicode <<= 4;
289     unicode += hexit;
290     ++ich;
291     }
292     ch = (char)unicode;
293     }
294     }
295     }
296 tim 1965 sb.append(1, ch);
297 tim 1963 }
298    
299 tim 1965 return sb;
300 tim 1963 }
301    
302 tim 1965 int SelectionCompiler::getHexitValue(char ch) {
303 tim 1963 if (ch >= '0' && ch <= '9')
304     return ch - '0';
305     else if (ch >= 'a' && ch <= 'f')
306     return 10 + ch - 'a';
307     else if (ch >= 'A' && ch <= 'F')
308     return 10 + ch - 'A';
309     else
310     return -1;
311     }
312    
313     bool SelectionCompiler::lookingAtSpecialString() {
314     int ichT = ichToken;
315     char ch = script[ichT];
316     while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
317     ++ichT;
318     }
319     cchToken = ichT - ichToken;
320     return cchToken > 0;
321     }
322    
323 tim 1965 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
324 tim 1963 if (ichToken == cchScript) {
325     return false;
326     }
327    
328     int ichT = ichToken;
329     if (script[ichT] == '-') {
330     ++ichT;
331     }
332 tim 1965 bool digitSeen = false;
333 tim 1963 char ch = 'X';
334     while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
335     ++ichT;
336     digitSeen = true;
337     }
338    
339     if (ichT == cchScript || ch != '.') {
340     return false;
341     }
342    
343 tim 1987 // to support DMPC.1, let's check the character before the dot
344     if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
345 tim 1963 return false;
346     }
347    
348     ++ichT;
349     while (ichT < cchScript && std::isdigit(script[ichT])) {
350     ++ichT;
351     digitSeen = true;
352     }
353     cchToken = ichT - ichToken;
354     return digitSeen;
355     }
356    
357 tim 1965 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
358 tim 1963 if (ichToken == cchScript) {
359     return false;
360     }
361     int ichT = ichToken;
362     if (allowNegative && script[ichToken] == '-') {
363     ++ichT;
364     }
365     int ichBeginDigits = ichT;
366     while (ichT < cchScript && std::isdigit(script[ichT])) {
367     ++ichT;
368     }
369     if (ichBeginDigits == ichT) {
370     return false;
371     }
372     cchToken = ichT - ichToken;
373     return true;
374     }
375    
376     bool SelectionCompiler::lookingAtLookupToken() {
377     if (ichToken == cchScript) {
378     return false;
379     }
380    
381     int ichT = ichToken;
382     char ch;
383     switch (ch = script[ichT++]) {
384     case '(':
385     case ')':
386     case ',':
387     case '[':
388     case ']':
389     break;
390     case '&':
391     case '|':
392     if (ichT < cchScript && script[ichT] == ch) {
393     ++ichT;
394     }
395     break;
396     case '<':
397     case '=':
398     case '>':
399     if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
400     ++ichT;
401     }
402     break;
403     case '/':
404     case '!':
405     if (ichT < cchScript && script[ichT] == '=') {
406     ++ichT;
407     }
408     break;
409     default:
410     if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
411     return false;
412     }
413 tim 2147 case '*':
414 tim 1963 case '?': // include question marks in identifier for atom expressions
415 tim 2147 while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
416     (std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){
417 tim 1972
418 tim 1963 ++ichT;
419     }
420     break;
421     }
422 tim 1979
423 tim 1963 cchToken = ichT - ichToken;
424 tim 1979
425 tim 1963 return true;
426     }
427    
428 tim 1965 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
429     const Token& tokenCommand = ltoken[0];
430 tim 1963 int tokCommand = tokenCommand.tok;
431 tim 1965
432     atokenCommand = ltoken;
433     if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
434 tim 1963 return false;
435     }
436 tim 1965
437 tim 1963 return true;
438     }
439    
440     bool SelectionCompiler::compileExpression() {
441     /** todo */
442     int i = 1;
443     int tokCommand = atokenCommand[0].tok;
444 tim 1965 if (tokCommand == Token::define) {
445     i = 2;
446     } else if ((tokCommand & Token::embeddedExpression) != 0) {
447     // look for the open parenthesis
448     while (i < atokenCommand.size() &&
449     atokenCommand[i].tok != Token::leftparen)
450 tim 1963 ++i;
451     }
452 tim 1965
453     if (i >= atokenCommand.size()) {
454     return true;
455     }
456 tim 1963 return compileExpression(i);
457     }
458    
459    
460 tim 1965 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
461 tim 1963 ltokenPostfix.push_back(token);
462     return true;
463     }
464    
465     bool SelectionCompiler::compileExpression(int itoken) {
466 tim 1965 ltokenPostfix.clear();
467     for (int i = 0; i < itoken; ++i) {
468 tim 1963 addTokenToPostfix(atokenCommand[i]);
469 tim 1965 }
470    
471 tim 1963 atokenInfix = atokenCommand;
472     itokenInfix = itoken;
473    
474 tim 1965 addTokenToPostfix(Token::tokenExpressionBegin);
475 tim 1963 if (!clauseOr()) {
476     return false;
477     }
478    
479 tim 1965 addTokenToPostfix(Token::tokenExpressionEnd);
480     if (itokenInfix != atokenInfix.size()) {
481 tim 1963 return endOfExpressionExpected();
482     }
483    
484     atokenCommand = ltokenPostfix;
485     return true;
486     }
487    
488     Token SelectionCompiler::tokenNext() {
489 tim 1965 if (itokenInfix == atokenInfix.size()) {
490     return Token();
491     }
492     return atokenInfix[itokenInfix++];
493 tim 1963 }
494    
495 tim 1965 boost::any SelectionCompiler::valuePeek() {
496     if (itokenInfix == atokenInfix.size()) {
497     return boost::any();
498 tim 1963 } else {
499     return atokenInfix[itokenInfix].value;
500     }
501     }
502    
503     int SelectionCompiler::tokPeek() {
504 tim 1965 if (itokenInfix == atokenInfix.size()) {
505 tim 1963 return 0;
506     }else {
507     return atokenInfix[itokenInfix].tok;
508     }
509     }
510    
511     bool SelectionCompiler::clauseOr() {
512     if (!clauseAnd()) {
513     return false;
514     }
515    
516 tim 1965 while (tokPeek() == Token::opOr) {
517 tim 1963 Token tokenOr = tokenNext();
518     if (!clauseAnd()) {
519     return false;
520     }
521     addTokenToPostfix(tokenOr);
522     }
523     return true;
524     }
525    
526     bool SelectionCompiler::clauseAnd() {
527     if (!clauseNot()) {
528     return false;
529     }
530    
531 tim 1965 while (tokPeek() == Token::opAnd) {
532 tim 1963 Token tokenAnd = tokenNext();
533     if (!clauseNot()) {
534     return false;
535     }
536     addTokenToPostfix(tokenAnd);
537     }
538     return true;
539     }
540    
541     bool SelectionCompiler::clauseNot() {
542 tim 1965 if (tokPeek() == Token::opNot) {
543 tim 1963 Token tokenNot = tokenNext();
544     if (!clauseNot()) {
545     return false;
546     }
547     return addTokenToPostfix(tokenNot);
548     }
549     return clausePrimitive();
550     }
551    
552     bool SelectionCompiler::clausePrimitive() {
553     int tok = tokPeek();
554     switch (tok) {
555 tim 1965 case Token::within:
556 tim 1963 return clauseWithin();
557 tim 1967
558     case Token::asterisk:
559     case Token::identifier:
560     return clauseChemObjName();
561 tim 1979
562     case Token::integer :
563     return clauseIndex();
564 tim 1963 default:
565 tim 1965 if ((tok & Token::atomproperty) == Token::atomproperty) {
566 tim 1963 return clauseComparator();
567     }
568 tim 1965 if ((tok & Token::predefinedset) != Token::predefinedset) {
569 tim 1963 break;
570     }
571     // fall into the code and below and just add the token
572 tim 1965 case Token::all:
573     case Token::none:
574 tim 1963 return addTokenToPostfix(tokenNext());
575 tim 1965 case Token::leftparen:
576 tim 1963 tokenNext();
577     if (!clauseOr()) {
578     return false;
579     }
580 tim 1965 if (tokenNext().tok != Token::rightparen) {
581 tim 1963 return rightParenthesisExpected();
582     }
583     return true;
584     }
585     return unrecognizedExpressionToken();
586     }
587    
588     bool SelectionCompiler::clauseComparator() {
589     Token tokenAtomProperty = tokenNext();
590     Token tokenComparator = tokenNext();
591 tim 1965 if ((tokenComparator.tok & Token::comparator) == 0) {
592 tim 1963 return comparisonOperatorExpected();
593     }
594    
595     Token tokenValue = tokenNext();
596 tim 1972 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
597     return numberExpected();
598 tim 1963 }
599 tim 1972
600     float val;
601     if (tokenValue.value.type() == typeid(int)) {
602     val = boost::any_cast<int>(tokenValue.value);
603     } else if (tokenValue.value.type() == typeid(float)) {
604     val = boost::any_cast<float>(tokenValue.value);
605     } else {
606     return false;
607     }
608    
609 tim 1979 boost::any floatVal;
610     floatVal = val;
611 tim 1965 return addTokenToPostfix(Token(tokenComparator.tok,
612 tim 1979 tokenAtomProperty.tok, floatVal));
613 tim 1963 }
614    
615     bool SelectionCompiler::clauseWithin() {
616     tokenNext(); // WITHIN
617 tim 1965 if (tokenNext().tok != Token::leftparen) { // (
618 tim 1963 return leftParenthesisExpected();
619     }
620    
621 tim 1965 boost::any distance;
622 tim 1963 Token tokenDistance = tokenNext(); // distance
623     switch(tokenDistance.tok) {
624 tim 1965 case Token::integer:
625     case Token::decimal:
626 tim 1963 distance = tokenDistance.value;
627     break;
628     default:
629     return numberOrKeywordExpected();
630     }
631    
632 tim 1965 if (tokenNext().tok != Token::opOr) { // ,
633 tim 1963 return commaExpected();
634     }
635    
636     if (! clauseOr()) { // *expression*
637     return false;
638     }
639    
640 tim 1965 if (tokenNext().tok != Token::rightparen) { // )T
641 tim 1963 return rightParenthesisExpected();
642     }
643    
644 tim 1965 return addTokenToPostfix(Token(Token::within, distance));
645 tim 1963 }
646    
647 tim 1967 bool SelectionCompiler::clauseChemObjName() {
648 tim 2147 Token token = tokenNext();
649     if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {
650 tim 1963
651 tim 2147 std::string name = boost::any_cast<std::string>(token.value);
652     if (isNameValid(name)) {
653     return addTokenToPostfix(Token(Token::name, name));
654     } else {
655     return compileError("invalid name: " + name);
656 tim 1967 }
657 tim 2147 }
658 tim 1972
659 tim 2147 return false;
660    
661 tim 1963 }
662    
663 tim 2147 bool SelectionCompiler::isNameValid(const std::string& name) {
664     int nbracket = 0;
665     int ndot = 0;
666     for (int i =0 ; i < name.size(); ++i) {
667     switch(name[i]) {
668 tim 1967
669 tim 2147 case '[' :
670     ++nbracket;
671     break;
672     case ']' :
673     --nbracket;
674     break;
675     case '.' :
676     ++ndot;
677     break;
678 tim 1987 }
679 tim 1967 }
680    
681 tim 2147 //only allow 3 dots at most
682     return (ndot <=3 && nbracket == 0) ? true : false;
683 tim 1963 }
684    
685 tim 1979 bool SelectionCompiler::clauseIndex(){
686     Token token = tokenNext();
687     if (token.tok == Token::integer) {
688     int index = boost::any_cast<int>(token.value);
689     int tok = tokPeek();
690     std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
691     if (tok == Token::to) {
692     tokenNext();
693     tok = tokPeek();
694     if (tok != Token::integer) {
695     return numberExpected();
696     }
697    
698     boost::any intVal = tokenNext().value;
699     int first = index;
700     if (intVal.type() != typeid(int)){
701     return false;
702     }
703     int second = boost::any_cast<int>(intVal);
704 tim 1967
705 tim 1979 return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
706    
707     }else {
708     return addTokenToPostfix(Token(Token::index, boost::any(index)));
709     }
710     } else {
711     return numberExpected();
712     }
713 tim 1963 }
714 tim 1979
715     }

Properties

Name Value
svn:executable *