ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-4/src/selection/SelectionCompiler.cpp
Revision: 1972
Committed: Fri Feb 4 22:39:26 2005 UTC (19 years, 5 months ago) by tim
File size: 20529 byte(s)
Log Message:
half of the selection utility is working need to debug within keyword and atomproperty keyword

File Contents

# User Rev Content
1 tim 1963 /*
2     * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3     *
4     * The University of Notre Dame grants you ("Licensee") a
5     * non-exclusive, royalty free, license to use, modify and
6     * redistribute this software in source and binary code form, provided
7     * that the following conditions are met:
8     *
9     * 1. Acknowledgement of the program authors must be made in any
10     * publication of scientific results based in part on use of the
11     * program. An acceptable form of acknowledgement is citation of
12     * the article in which the program was described (Matthew
13     * A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14     * J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15     * Parallel Simulation Engine for Molecular Dynamics,"
16     * J. Comput. Chem. 26, pp. 252-271 (2005))
17     *
18     * 2. Redistributions of source code must retain the above copyright
19     * notice, this list of conditions and the following disclaimer.
20     *
21     * 3. Redistributions in binary form must reproduce the above copyright
22     * notice, this list of conditions and the following disclaimer in the
23     * documentation and/or other materials provided with the
24     * distribution.
25     *
26     * This software is provided "AS IS," without a warranty of any
27     * kind. All express or implied conditions, representations and
28     * warranties, including any implied warranty of merchantability,
29     * fitness for a particular purpose or non-infringement, are hereby
30     * excluded. The University of Notre Dame and its licensors shall not
31     * be liable for any damages suffered by licensee as a result of
32     * using, modifying or distributing the software or its
33     * derivatives. In no event will the University of Notre Dame or its
34     * licensors be liable for any lost revenue, profit or data, or for
35     * direct, indirect, special, consequential, incidental or punitive
36     * damages, however caused and regardless of the theory of liability,
37     * arising out of the use of or inability to use software, even if the
38     * University of Notre Dame has been advised of the possibility of
39     * such damages.
40     */
41    
42     #include "selection/SelectionCompiler.hpp"
43 tim 1965 #include "utils/StringUtils.hpp"
44 tim 1963 namespace oopse {
45    
46     bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47    
48     this->filename = filename;
49     this->script = script;
50     lineNumbers.clear();
51     lineIndices.clear();
52     aatokenCompiled.clear();
53    
54 tim 1965 if (internalCompile()) {
55 tim 1963 return true;
56     }
57    
58     int icharEnd;
59 tim 1965 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60     (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61 tim 1963 icharEnd = script.size();
62     }
63     errorLine = script.substr(ichCurrentCommand, icharEnd);
64     return false;
65     }
66    
67     bool SelectionCompiler::internalCompile(){
68    
69     cchScript = script.size();
70     ichToken = 0;
71     lineCurrent = 1;
72    
73     error = false;
74    
75 tim 1965 //std::vector<Token> lltoken;
76     aatokenCompiled.clear();
77 tim 1963 std::vector<Token> ltoken;
78    
79 tim 1965 Token tokenCommand;
80     int tokCommand = Token::nada;
81 tim 1963
82     for ( ; true; ichToken += cchToken) {
83     if (lookingAtLeadingWhitespace())
84     continue;
85 tim 1972 //if (lookingAtComment())
86     // continue;
87 tim 1965 bool endOfLine = lookingAtEndOfLine();
88 tim 1963 if (endOfLine || lookingAtEndOfStatement()) {
89 tim 1965 if (tokCommand != Token::nada) {
90 tim 1963 if (! compileCommand(ltoken)) {
91     return false;
92     }
93 tim 1965 aatokenCompiled.push_back(atokenCommand);
94     lineNumbers.push_back(lineCurrent);
95     lineIndices.push_back(ichCurrentCommand);
96     ltoken.clear();
97     tokCommand = Token::nada;
98 tim 1963 }
99    
100     if (ichToken < cchScript) {
101     if (endOfLine)
102     ++lineCurrent;
103     continue;
104     }
105     break;
106     }
107    
108 tim 1965 if (tokCommand != Token::nada) {
109 tim 1963 if (lookingAtString()) {
110     std::string str = getUnescapedStringLiteral();
111 tim 1965 ltoken.push_back(Token(Token::string, str));
112 tim 1963 continue;
113     }
114 tim 1965 //if ((tokCommand & Token::specialstring) != 0 &&
115     // lookingAtSpecialString()) {
116     // std::string str = script.substr(ichToken, ichToken + cchToken);
117     // ltoken.push_back(Token(Token::string, str));
118     // continue;
119     //}
120     if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121     float value = lexi_cast<float>(script.substr(ichToken, ichToken + cchToken));
122     ltoken.push_back(Token(Token::decimal, value));/**@todo*/
123 tim 1963 continue;
124     }
125 tim 1965 if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
126 tim 1963 std::string intString = script.substr(ichToken, ichToken + cchToken);
127     int val = lexi_cast<int>(intString);
128 tim 1965 ltoken.push_back(Token(Token::integer, val, intString));/**@todo*/
129 tim 1963 continue;
130     }
131     }
132    
133     if (lookingAtLookupToken()) {
134 tim 1972 std::string ident = script.substr(ichToken, cchToken);
135 tim 1965 Token token;
136     Token* pToken = TokenMap::getInstance()->getToken(ident);
137     if (pToken != NULL) {
138     token = *pToken;
139     } else {
140     token = Token(Token::identifier, ident);
141 tim 1963 }
142    
143     int tok = token.tok;
144    
145     switch (tokCommand) {
146 tim 1965 case Token::nada:
147 tim 1963 ichCurrentCommand = ichToken;
148     //tokenCommand = token;
149     tokCommand = tok;
150 tim 1965 if ((tokCommand & Token::command) == 0)
151 tim 1963 return commandExpected();
152     break;
153    
154 tim 1965 case Token::define:
155 tim 1963 if (ltoken.size() == 1) {
156     // we are looking at the variable name
157 tim 1965 if (tok != Token::identifier &&
158     (tok & Token::predefinedset) != Token::predefinedset)
159 tim 1963 return invalidExpressionToken(ident);
160     } else {
161     // we are looking at the expression
162 tim 1965 if (tok != Token::identifier &&
163     (tok & (Token::expression | Token::predefinedset)) == 0)
164 tim 1963 return invalidExpressionToken(ident);
165     }
166    
167     break;
168    
169 tim 1965 case Token::select:
170     if (tok != Token::identifier && (tok & Token::expression) == 0)
171 tim 1963 return invalidExpressionToken(ident);
172     break;
173     }
174     ltoken.push_back(token);
175     continue;
176     }
177    
178     if (ltoken.size() == 0) {
179     return commandExpected();
180     }
181    
182     return unrecognizedToken();
183     }
184    
185     return true;
186     }
187    
188    
189     bool SelectionCompiler::lookingAtLeadingWhitespace() {
190    
191     int ichT = ichToken;
192     while (ichT < cchScript && std::isspace(script[ichT])) {
193     ++ichT;
194     }
195     cchToken = ichT - ichToken;
196     return cchToken > 0;
197     }
198    
199     bool SelectionCompiler::lookingAtEndOfLine() {
200     if (ichToken == cchScript)
201     return true;
202     int ichT = ichToken;
203     char ch = script[ichT];
204     if (ch == '\r') {
205     ++ichT;
206     if (ichT < cchScript && script[ichT] == '\n')
207     ++ichT;
208     } else if (ch == '\n') {
209     ++ichT;
210     } else {
211     return false;
212     }
213     cchToken = ichT - ichToken;
214     return true;
215     }
216    
217     bool SelectionCompiler::lookingAtEndOfStatement() {
218     if (ichToken == cchScript || script[ichToken] != ';')
219     return false;
220     cchToken = 1;
221     return true;
222     }
223    
224     bool SelectionCompiler::lookingAtString() {
225     if (ichToken == cchScript)
226     return false;
227     if (script[ichToken] != '"')
228     return false;
229     // remove support for single quote
230     // in order to use it in atom expressions
231     // char chFirst = script.charAt(ichToken);
232     // if (chFirst != '"' && chFirst != '\'')
233     // return false;
234     int ichT = ichToken + 1;
235     // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
236     char ch;
237 tim 1965 bool previousCharBackslash = false;
238 tim 1963 while (ichT < cchScript) {
239 tim 1965 ch = script[ichT++];
240 tim 1963 if (ch == '"' && !previousCharBackslash)
241     break;
242     previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
243     }
244     cchToken = ichT - ichToken;
245     return true;
246     }
247    
248    
249     std::string SelectionCompiler::getUnescapedStringLiteral() {
250 tim 1965 /** @todo */
251     std::string sb(cchToken - 2, ' ');
252    
253 tim 1963 int ichMax = ichToken + cchToken - 1;
254     int ich = ichToken + 1;
255    
256     while (ich < ichMax) {
257     char ch = script[ich++];
258     if (ch == '\\' && ich < ichMax) {
259     ch = script[ich++];
260     switch (ch) {
261     case 'b':
262     ch = '\b';
263     break;
264     case 'n':
265     ch = '\n';
266     break;
267     case 't':
268     ch = '\t';
269     break;
270     case 'r':
271     ch = '\r';
272     // fall into
273     case '"':
274     case '\\':
275     case '\'':
276     break;
277     case 'x':
278     case 'u':
279     int digitCount = ch == 'x' ? 2 : 4;
280     if (ich < ichMax) {
281     int unicode = 0;
282     for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
283     char chT = script[ich];
284     int hexit = getHexitValue(chT);
285     if (hexit < 0)
286     break;
287     unicode <<= 4;
288     unicode += hexit;
289     ++ich;
290     }
291     ch = (char)unicode;
292     }
293     }
294     }
295 tim 1965 sb.append(1, ch);
296 tim 1963 }
297    
298 tim 1965 return sb;
299 tim 1963 }
300    
301 tim 1965 int SelectionCompiler::getHexitValue(char ch) {
302 tim 1963 if (ch >= '0' && ch <= '9')
303     return ch - '0';
304     else if (ch >= 'a' && ch <= 'f')
305     return 10 + ch - 'a';
306     else if (ch >= 'A' && ch <= 'F')
307     return 10 + ch - 'A';
308     else
309     return -1;
310     }
311    
312     bool SelectionCompiler::lookingAtSpecialString() {
313     int ichT = ichToken;
314     char ch = script[ichT];
315     while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
316     ++ichT;
317     }
318     cchToken = ichT - ichToken;
319     return cchToken > 0;
320     }
321    
322 tim 1965 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
323 tim 1963 if (ichToken == cchScript) {
324     return false;
325     }
326    
327     int ichT = ichToken;
328     if (script[ichT] == '-') {
329     ++ichT;
330     }
331 tim 1965 bool digitSeen = false;
332 tim 1963 char ch = 'X';
333     while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
334     ++ichT;
335     digitSeen = true;
336     }
337    
338     if (ichT == cchScript || ch != '.') {
339     return false;
340     }
341    
342     // to support 1.ca, let's check the character after the dot
343     // to determine if it is an alpha
344     if (ch == '.' && (ichT + 1 < cchScript) && std::isalpha(script[ichT + 1])) {
345     return false;
346     }
347    
348     ++ichT;
349     while (ichT < cchScript && std::isdigit(script[ichT])) {
350     ++ichT;
351     digitSeen = true;
352     }
353     cchToken = ichT - ichToken;
354     return digitSeen;
355     }
356    
357 tim 1965 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
358 tim 1963 if (ichToken == cchScript) {
359     return false;
360     }
361     int ichT = ichToken;
362     if (allowNegative && script[ichToken] == '-') {
363     ++ichT;
364     }
365     int ichBeginDigits = ichT;
366     while (ichT < cchScript && std::isdigit(script[ichT])) {
367     ++ichT;
368     }
369     if (ichBeginDigits == ichT) {
370     return false;
371     }
372     cchToken = ichT - ichToken;
373     return true;
374     }
375    
376     bool SelectionCompiler::lookingAtLookupToken() {
377     if (ichToken == cchScript) {
378     return false;
379     }
380    
381     int ichT = ichToken;
382     char ch;
383     switch (ch = script[ichT++]) {
384     case '(':
385     case ')':
386     case ',':
387     case '*':
388     case '-':
389     case '[':
390     case ']':
391     case '+':
392     case ':':
393     case '@':
394     case '.':
395     case '%':
396     break;
397     case '&':
398     case '|':
399     if (ichT < cchScript && script[ichT] == ch) {
400     ++ichT;
401     }
402     break;
403     case '<':
404     case '=':
405     case '>':
406     if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
407     ++ichT;
408     }
409     break;
410     case '/':
411     case '!':
412     if (ichT < cchScript && script[ichT] == '=') {
413     ++ichT;
414     }
415     break;
416     default:
417     if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
418     return false;
419     }
420     case '?': // include question marks in identifier for atom expressions
421 tim 1972 while (ichT < cchScript && !std::isspace(ch = script[ichT]) && (std::isalpha(ch) ||std::isdigit(ch) ||
422     ch == '_' || ch == '?') ) {
423    
424 tim 1963 ++ichT;
425     }
426     break;
427     }
428     cchToken = ichT - ichToken;
429     return true;
430     }
431    
432 tim 1965 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
433     const Token& tokenCommand = ltoken[0];
434 tim 1963 int tokCommand = tokenCommand.tok;
435 tim 1965
436     atokenCommand = ltoken;
437     if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
438 tim 1963 return false;
439     }
440 tim 1965
441 tim 1963 return true;
442     }
443    
444     bool SelectionCompiler::compileExpression() {
445     /** todo */
446     int i = 1;
447     int tokCommand = atokenCommand[0].tok;
448 tim 1965 if (tokCommand == Token::define) {
449     i = 2;
450     } else if ((tokCommand & Token::embeddedExpression) != 0) {
451     // look for the open parenthesis
452     while (i < atokenCommand.size() &&
453     atokenCommand[i].tok != Token::leftparen)
454 tim 1963 ++i;
455     }
456 tim 1965
457     if (i >= atokenCommand.size()) {
458     return true;
459     }
460 tim 1963 return compileExpression(i);
461     }
462    
463    
464 tim 1965 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
465 tim 1963 ltokenPostfix.push_back(token);
466     return true;
467     }
468    
469     bool SelectionCompiler::compileExpression(int itoken) {
470 tim 1965 ltokenPostfix.clear();
471     for (int i = 0; i < itoken; ++i) {
472 tim 1963 addTokenToPostfix(atokenCommand[i]);
473 tim 1965 }
474    
475 tim 1963 atokenInfix = atokenCommand;
476     itokenInfix = itoken;
477    
478 tim 1965 addTokenToPostfix(Token::tokenExpressionBegin);
479 tim 1963 if (!clauseOr()) {
480     return false;
481     }
482    
483 tim 1965 addTokenToPostfix(Token::tokenExpressionEnd);
484     if (itokenInfix != atokenInfix.size()) {
485 tim 1963 return endOfExpressionExpected();
486     }
487    
488     atokenCommand = ltokenPostfix;
489     return true;
490     }
491    
492     Token SelectionCompiler::tokenNext() {
493 tim 1965 if (itokenInfix == atokenInfix.size()) {
494     return Token();
495     }
496     return atokenInfix[itokenInfix++];
497 tim 1963 }
498    
499 tim 1965 boost::any SelectionCompiler::valuePeek() {
500     if (itokenInfix == atokenInfix.size()) {
501     return boost::any();
502 tim 1963 } else {
503     return atokenInfix[itokenInfix].value;
504     }
505     }
506    
507     int SelectionCompiler::tokPeek() {
508 tim 1965 if (itokenInfix == atokenInfix.size()) {
509 tim 1963 return 0;
510     }else {
511     return atokenInfix[itokenInfix].tok;
512     }
513     }
514    
515     bool SelectionCompiler::clauseOr() {
516     if (!clauseAnd()) {
517     return false;
518     }
519    
520 tim 1965 while (tokPeek() == Token::opOr) {
521 tim 1963 Token tokenOr = tokenNext();
522     if (!clauseAnd()) {
523     return false;
524     }
525     addTokenToPostfix(tokenOr);
526     }
527     return true;
528     }
529    
530     bool SelectionCompiler::clauseAnd() {
531     if (!clauseNot()) {
532     return false;
533     }
534    
535 tim 1965 while (tokPeek() == Token::opAnd) {
536 tim 1963 Token tokenAnd = tokenNext();
537     if (!clauseNot()) {
538     return false;
539     }
540     addTokenToPostfix(tokenAnd);
541     }
542     return true;
543     }
544    
545     bool SelectionCompiler::clauseNot() {
546 tim 1965 if (tokPeek() == Token::opNot) {
547 tim 1963 Token tokenNot = tokenNext();
548     if (!clauseNot()) {
549     return false;
550     }
551     return addTokenToPostfix(tokenNot);
552     }
553     return clausePrimitive();
554     }
555    
556     bool SelectionCompiler::clausePrimitive() {
557     int tok = tokPeek();
558     switch (tok) {
559 tim 1965 case Token::within:
560 tim 1963 return clauseWithin();
561 tim 1967
562     case Token::asterisk:
563     case Token::identifier:
564     return clauseChemObjName();
565    
566 tim 1963 default:
567 tim 1965 if ((tok & Token::atomproperty) == Token::atomproperty) {
568 tim 1963 return clauseComparator();
569     }
570 tim 1965 if ((tok & Token::predefinedset) != Token::predefinedset) {
571 tim 1963 break;
572     }
573     // fall into the code and below and just add the token
574 tim 1965 case Token::all:
575     case Token::none:
576 tim 1963 return addTokenToPostfix(tokenNext());
577 tim 1965 case Token::leftparen:
578 tim 1963 tokenNext();
579     if (!clauseOr()) {
580     return false;
581     }
582 tim 1965 if (tokenNext().tok != Token::rightparen) {
583 tim 1963 return rightParenthesisExpected();
584     }
585     return true;
586     }
587     return unrecognizedExpressionToken();
588     }
589    
590     bool SelectionCompiler::clauseComparator() {
591     Token tokenAtomProperty = tokenNext();
592     Token tokenComparator = tokenNext();
593 tim 1965 if ((tokenComparator.tok & Token::comparator) == 0) {
594 tim 1963 return comparisonOperatorExpected();
595     }
596    
597     Token tokenValue = tokenNext();
598 tim 1972 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
599     return numberExpected();
600 tim 1963 }
601 tim 1972
602     float val;
603     if (tokenValue.value.type() == typeid(int)) {
604     val = boost::any_cast<int>(tokenValue.value);
605     } else if (tokenValue.value.type() == typeid(float)) {
606     val = boost::any_cast<float>(tokenValue.value);
607     } else {
608     return false;
609     }
610    
611 tim 1965 return addTokenToPostfix(Token(tokenComparator.tok,
612     tokenAtomProperty.tok, boost::any(val)));
613 tim 1963 }
614    
615     bool SelectionCompiler::clauseWithin() {
616     tokenNext(); // WITHIN
617 tim 1965 if (tokenNext().tok != Token::leftparen) { // (
618 tim 1963 return leftParenthesisExpected();
619     }
620    
621 tim 1965 boost::any distance;
622 tim 1963 Token tokenDistance = tokenNext(); // distance
623     switch(tokenDistance.tok) {
624 tim 1965 case Token::integer:
625     distance = float(tokenDistance.intValue);
626 tim 1963 break;
627 tim 1965 case Token::decimal:
628 tim 1963 distance = tokenDistance.value;
629     break;
630     default:
631     return numberOrKeywordExpected();
632     }
633    
634 tim 1965 if (tokenNext().tok != Token::opOr) { // ,
635 tim 1963 return commaExpected();
636     }
637    
638     if (! clauseOr()) { // *expression*
639     return false;
640     }
641    
642 tim 1965 if (tokenNext().tok != Token::rightparen) { // )T
643 tim 1963 return rightParenthesisExpected();
644     }
645    
646 tim 1965 return addTokenToPostfix(Token(Token::within, distance));
647 tim 1963 }
648    
649 tim 1967 bool SelectionCompiler::clauseChemObjName() {
650     std::string chemObjName;
651     int tok = tokPeek();
652     if (!clauseName(chemObjName)){
653     return false;
654     }
655 tim 1963
656    
657 tim 1967 tok = tokPeek();
658     //allow two dot at most
659     if (tok == Token::dot) {
660 tim 1972 tokenNext();
661     chemObjName += ".";
662 tim 1967 if (!clauseName(chemObjName)) {
663     return false;
664     }
665     tok = tokPeek();
666     if (tok == Token::dot) {
667 tim 1972 tokenNext();
668     chemObjName += ".";
669    
670 tim 1967 if (!clauseName(chemObjName)) {
671     return false;
672     }
673     }
674     }
675    
676     return addTokenToPostfix(Token(Token::name, chemObjName));
677 tim 1963 }
678    
679 tim 1967 bool SelectionCompiler:: clauseName(std::string& name) {
680    
681     int tok = tokPeek();
682    
683     if (tok == Token::asterisk || tok == Token::identifier) {
684     name += boost::any_cast<std::string>(tokenNext().value);
685    
686     while(true){
687     tok = tokPeek();
688     switch (tok) {
689     case Token::asterisk :
690     name += "*";
691     tokenNext();
692     break;
693     case Token::identifier :
694     name += boost::any_cast<std::string>(tokenNext().value);
695     break;
696     case Token::integer :
697     name += toString(boost::any_cast<int>(tokenNext().value));
698     break;
699     case Token::dot :
700     return true;
701     default :
702     return true;
703     }
704     }
705    
706     }else {
707     return false;
708     }
709    
710 tim 1963 }
711    
712 tim 1967
713 tim 1963 }

Properties

Name Value
svn:executable *