ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-3.0/src/selection/SelectionCompiler.cpp
Revision: 1979
Committed: Mon Feb 7 19:13:18 2005 UTC (19 years, 5 months ago) by tim
File size: 21799 byte(s)
Log Message:
selection library is working

File Contents

# User Rev Content
1 tim 1963 /*
2     * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3     *
4     * The University of Notre Dame grants you ("Licensee") a
5     * non-exclusive, royalty free, license to use, modify and
6     * redistribute this software in source and binary code form, provided
7     * that the following conditions are met:
8     *
9     * 1. Acknowledgement of the program authors must be made in any
10     * publication of scientific results based in part on use of the
11     * program. An acceptable form of acknowledgement is citation of
12     * the article in which the program was described (Matthew
13     * A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14     * J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15     * Parallel Simulation Engine for Molecular Dynamics,"
16     * J. Comput. Chem. 26, pp. 252-271 (2005))
17     *
18     * 2. Redistributions of source code must retain the above copyright
19     * notice, this list of conditions and the following disclaimer.
20     *
21     * 3. Redistributions in binary form must reproduce the above copyright
22     * notice, this list of conditions and the following disclaimer in the
23     * documentation and/or other materials provided with the
24     * distribution.
25     *
26     * This software is provided "AS IS," without a warranty of any
27     * kind. All express or implied conditions, representations and
28     * warranties, including any implied warranty of merchantability,
29     * fitness for a particular purpose or non-infringement, are hereby
30     * excluded. The University of Notre Dame and its licensors shall not
31     * be liable for any damages suffered by licensee as a result of
32     * using, modifying or distributing the software or its
33     * derivatives. In no event will the University of Notre Dame or its
34     * licensors be liable for any lost revenue, profit or data, or for
35     * direct, indirect, special, consequential, incidental or punitive
36     * damages, however caused and regardless of the theory of liability,
37     * arising out of the use of or inability to use software, even if the
38     * University of Notre Dame has been advised of the possibility of
39     * such damages.
40     */
41    
42     #include "selection/SelectionCompiler.hpp"
43 tim 1965 #include "utils/StringUtils.hpp"
44 tim 1963 namespace oopse {
45    
46     bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47    
48     this->filename = filename;
49     this->script = script;
50     lineNumbers.clear();
51     lineIndices.clear();
52     aatokenCompiled.clear();
53    
54 tim 1965 if (internalCompile()) {
55 tim 1963 return true;
56     }
57    
58     int icharEnd;
59 tim 1965 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60     (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61 tim 1963 icharEnd = script.size();
62     }
63     errorLine = script.substr(ichCurrentCommand, icharEnd);
64     return false;
65     }
66    
67     bool SelectionCompiler::internalCompile(){
68    
69     cchScript = script.size();
70     ichToken = 0;
71     lineCurrent = 1;
72    
73     error = false;
74    
75 tim 1965 //std::vector<Token> lltoken;
76     aatokenCompiled.clear();
77 tim 1963 std::vector<Token> ltoken;
78    
79 tim 1965 Token tokenCommand;
80     int tokCommand = Token::nada;
81 tim 1963
82     for ( ; true; ichToken += cchToken) {
83     if (lookingAtLeadingWhitespace())
84     continue;
85 tim 1972 //if (lookingAtComment())
86     // continue;
87 tim 1965 bool endOfLine = lookingAtEndOfLine();
88 tim 1963 if (endOfLine || lookingAtEndOfStatement()) {
89 tim 1965 if (tokCommand != Token::nada) {
90 tim 1963 if (! compileCommand(ltoken)) {
91     return false;
92     }
93 tim 1965 aatokenCompiled.push_back(atokenCommand);
94     lineNumbers.push_back(lineCurrent);
95     lineIndices.push_back(ichCurrentCommand);
96     ltoken.clear();
97     tokCommand = Token::nada;
98 tim 1963 }
99    
100     if (ichToken < cchScript) {
101     if (endOfLine)
102     ++lineCurrent;
103     continue;
104     }
105     break;
106     }
107    
108 tim 1965 if (tokCommand != Token::nada) {
109 tim 1963 if (lookingAtString()) {
110     std::string str = getUnescapedStringLiteral();
111 tim 1965 ltoken.push_back(Token(Token::string, str));
112 tim 1963 continue;
113     }
114 tim 1965 //if ((tokCommand & Token::specialstring) != 0 &&
115     // lookingAtSpecialString()) {
116     // std::string str = script.substr(ichToken, ichToken + cchToken);
117     // ltoken.push_back(Token(Token::string, str));
118     // continue;
119     //}
120     if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121 tim 1979 float value = lexi_cast<float>(script.substr(ichToken, cchToken));
122     std::cout << "encount an decimal: " << value << std::endl;
123     ltoken.push_back(Token(Token::decimal, boost::any(value)));
124 tim 1963 continue;
125     }
126 tim 1965 if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
127 tim 1979
128     int val = lexi_cast<int>(script.substr(ichToken, cchToken));
129     std::cout << "encount an integer: " << val << std::endl;
130     ltoken.push_back(Token(Token::integer, boost::any(val)));
131 tim 1963 continue;
132     }
133     }
134    
135     if (lookingAtLookupToken()) {
136 tim 1972 std::string ident = script.substr(ichToken, cchToken);
137 tim 1965 Token token;
138     Token* pToken = TokenMap::getInstance()->getToken(ident);
139     if (pToken != NULL) {
140     token = *pToken;
141     } else {
142     token = Token(Token::identifier, ident);
143 tim 1963 }
144    
145     int tok = token.tok;
146    
147     switch (tokCommand) {
148 tim 1965 case Token::nada:
149 tim 1963 ichCurrentCommand = ichToken;
150     //tokenCommand = token;
151     tokCommand = tok;
152 tim 1965 if ((tokCommand & Token::command) == 0)
153 tim 1963 return commandExpected();
154     break;
155    
156 tim 1965 case Token::define:
157 tim 1963 if (ltoken.size() == 1) {
158     // we are looking at the variable name
159 tim 1965 if (tok != Token::identifier &&
160     (tok & Token::predefinedset) != Token::predefinedset)
161 tim 1963 return invalidExpressionToken(ident);
162     } else {
163     // we are looking at the expression
164 tim 1965 if (tok != Token::identifier &&
165     (tok & (Token::expression | Token::predefinedset)) == 0)
166 tim 1963 return invalidExpressionToken(ident);
167     }
168    
169     break;
170    
171 tim 1965 case Token::select:
172     if (tok != Token::identifier && (tok & Token::expression) == 0)
173 tim 1963 return invalidExpressionToken(ident);
174     break;
175     }
176     ltoken.push_back(token);
177     continue;
178     }
179    
180     if (ltoken.size() == 0) {
181     return commandExpected();
182     }
183    
184     return unrecognizedToken();
185     }
186    
187     return true;
188     }
189    
190    
191     bool SelectionCompiler::lookingAtLeadingWhitespace() {
192    
193     int ichT = ichToken;
194     while (ichT < cchScript && std::isspace(script[ichT])) {
195     ++ichT;
196     }
197     cchToken = ichT - ichToken;
198     return cchToken > 0;
199     }
200    
201     bool SelectionCompiler::lookingAtEndOfLine() {
202     if (ichToken == cchScript)
203     return true;
204     int ichT = ichToken;
205     char ch = script[ichT];
206     if (ch == '\r') {
207     ++ichT;
208     if (ichT < cchScript && script[ichT] == '\n')
209     ++ichT;
210     } else if (ch == '\n') {
211     ++ichT;
212     } else {
213     return false;
214     }
215     cchToken = ichT - ichToken;
216     return true;
217     }
218    
219     bool SelectionCompiler::lookingAtEndOfStatement() {
220     if (ichToken == cchScript || script[ichToken] != ';')
221     return false;
222     cchToken = 1;
223     return true;
224     }
225    
226     bool SelectionCompiler::lookingAtString() {
227     if (ichToken == cchScript)
228     return false;
229     if (script[ichToken] != '"')
230     return false;
231     // remove support for single quote
232     // in order to use it in atom expressions
233     // char chFirst = script.charAt(ichToken);
234     // if (chFirst != '"' && chFirst != '\'')
235     // return false;
236     int ichT = ichToken + 1;
237     // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
238     char ch;
239 tim 1965 bool previousCharBackslash = false;
240 tim 1963 while (ichT < cchScript) {
241 tim 1965 ch = script[ichT++];
242 tim 1963 if (ch == '"' && !previousCharBackslash)
243     break;
244     previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
245     }
246     cchToken = ichT - ichToken;
247 tim 1979
248    
249     std::cout << "lookingAtString: encount " << script.substr(ichToken, cchToken) << std::endl;
250 tim 1963 return true;
251     }
252    
253    
254     std::string SelectionCompiler::getUnescapedStringLiteral() {
255 tim 1965 /** @todo */
256     std::string sb(cchToken - 2, ' ');
257    
258 tim 1963 int ichMax = ichToken + cchToken - 1;
259     int ich = ichToken + 1;
260    
261     while (ich < ichMax) {
262     char ch = script[ich++];
263     if (ch == '\\' && ich < ichMax) {
264     ch = script[ich++];
265     switch (ch) {
266     case 'b':
267     ch = '\b';
268     break;
269     case 'n':
270     ch = '\n';
271     break;
272     case 't':
273     ch = '\t';
274     break;
275     case 'r':
276     ch = '\r';
277     // fall into
278     case '"':
279     case '\\':
280     case '\'':
281     break;
282     case 'x':
283     case 'u':
284     int digitCount = ch == 'x' ? 2 : 4;
285     if (ich < ichMax) {
286     int unicode = 0;
287     for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
288     char chT = script[ich];
289     int hexit = getHexitValue(chT);
290     if (hexit < 0)
291     break;
292     unicode <<= 4;
293     unicode += hexit;
294     ++ich;
295     }
296     ch = (char)unicode;
297     }
298     }
299     }
300 tim 1965 sb.append(1, ch);
301 tim 1963 }
302    
303 tim 1965 return sb;
304 tim 1963 }
305    
306 tim 1965 int SelectionCompiler::getHexitValue(char ch) {
307 tim 1963 if (ch >= '0' && ch <= '9')
308     return ch - '0';
309     else if (ch >= 'a' && ch <= 'f')
310     return 10 + ch - 'a';
311     else if (ch >= 'A' && ch <= 'F')
312     return 10 + ch - 'A';
313     else
314     return -1;
315     }
316    
317     bool SelectionCompiler::lookingAtSpecialString() {
318     int ichT = ichToken;
319     char ch = script[ichT];
320     while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
321     ++ichT;
322     }
323     cchToken = ichT - ichToken;
324     return cchToken > 0;
325     }
326    
327 tim 1965 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
328 tim 1963 if (ichToken == cchScript) {
329     return false;
330     }
331    
332     int ichT = ichToken;
333     if (script[ichT] == '-') {
334     ++ichT;
335     }
336 tim 1965 bool digitSeen = false;
337 tim 1963 char ch = 'X';
338     while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
339     ++ichT;
340     digitSeen = true;
341     }
342    
343     if (ichT == cchScript || ch != '.') {
344     return false;
345     }
346    
347     // to support 1.ca, let's check the character after the dot
348     // to determine if it is an alpha
349     if (ch == '.' && (ichT + 1 < cchScript) && std::isalpha(script[ichT + 1])) {
350     return false;
351     }
352    
353     ++ichT;
354     while (ichT < cchScript && std::isdigit(script[ichT])) {
355     ++ichT;
356     digitSeen = true;
357     }
358     cchToken = ichT - ichToken;
359     return digitSeen;
360     }
361    
362 tim 1965 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
363 tim 1963 if (ichToken == cchScript) {
364     return false;
365     }
366     int ichT = ichToken;
367     if (allowNegative && script[ichToken] == '-') {
368     ++ichT;
369     }
370     int ichBeginDigits = ichT;
371     while (ichT < cchScript && std::isdigit(script[ichT])) {
372     ++ichT;
373     }
374     if (ichBeginDigits == ichT) {
375     return false;
376     }
377     cchToken = ichT - ichToken;
378     return true;
379     }
380    
381     bool SelectionCompiler::lookingAtLookupToken() {
382     if (ichToken == cchScript) {
383     return false;
384     }
385    
386     int ichT = ichToken;
387     char ch;
388     switch (ch = script[ichT++]) {
389     case '(':
390     case ')':
391     case ',':
392     case '*':
393     case '-':
394     case '[':
395     case ']':
396     case '+':
397     case ':':
398     case '@':
399     case '.':
400     case '%':
401     break;
402     case '&':
403     case '|':
404     if (ichT < cchScript && script[ichT] == ch) {
405     ++ichT;
406     }
407     break;
408     case '<':
409     case '=':
410     case '>':
411     if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
412     ++ichT;
413     }
414     break;
415     case '/':
416     case '!':
417     if (ichT < cchScript && script[ichT] == '=') {
418     ++ichT;
419     }
420     break;
421     default:
422     if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
423     return false;
424     }
425     case '?': // include question marks in identifier for atom expressions
426 tim 1972 while (ichT < cchScript && !std::isspace(ch = script[ichT]) && (std::isalpha(ch) ||std::isdigit(ch) ||
427     ch == '_' || ch == '?') ) {
428    
429 tim 1963 ++ichT;
430     }
431     break;
432     }
433 tim 1979
434 tim 1963 cchToken = ichT - ichToken;
435 tim 1979
436     std::cout << "lookingAtLookupToken: encount " << script.substr(ichToken, cchToken) << std::endl;
437 tim 1963 return true;
438     }
439    
440 tim 1965 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
441     const Token& tokenCommand = ltoken[0];
442 tim 1963 int tokCommand = tokenCommand.tok;
443 tim 1965
444     atokenCommand = ltoken;
445     if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
446 tim 1963 return false;
447     }
448 tim 1965
449 tim 1963 return true;
450     }
451    
452     bool SelectionCompiler::compileExpression() {
453     /** todo */
454     int i = 1;
455     int tokCommand = atokenCommand[0].tok;
456 tim 1965 if (tokCommand == Token::define) {
457     i = 2;
458     } else if ((tokCommand & Token::embeddedExpression) != 0) {
459     // look for the open parenthesis
460     while (i < atokenCommand.size() &&
461     atokenCommand[i].tok != Token::leftparen)
462 tim 1963 ++i;
463     }
464 tim 1965
465     if (i >= atokenCommand.size()) {
466     return true;
467     }
468 tim 1963 return compileExpression(i);
469     }
470    
471    
472 tim 1965 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
473 tim 1963 ltokenPostfix.push_back(token);
474     return true;
475     }
476    
477     bool SelectionCompiler::compileExpression(int itoken) {
478 tim 1965 ltokenPostfix.clear();
479     for (int i = 0; i < itoken; ++i) {
480 tim 1963 addTokenToPostfix(atokenCommand[i]);
481 tim 1965 }
482    
483 tim 1963 atokenInfix = atokenCommand;
484     itokenInfix = itoken;
485    
486 tim 1965 addTokenToPostfix(Token::tokenExpressionBegin);
487 tim 1963 if (!clauseOr()) {
488     return false;
489     }
490    
491 tim 1965 addTokenToPostfix(Token::tokenExpressionEnd);
492     if (itokenInfix != atokenInfix.size()) {
493 tim 1963 return endOfExpressionExpected();
494     }
495    
496     atokenCommand = ltokenPostfix;
497     return true;
498     }
499    
500     Token SelectionCompiler::tokenNext() {
501 tim 1965 if (itokenInfix == atokenInfix.size()) {
502     return Token();
503     }
504     return atokenInfix[itokenInfix++];
505 tim 1963 }
506    
507 tim 1965 boost::any SelectionCompiler::valuePeek() {
508     if (itokenInfix == atokenInfix.size()) {
509     return boost::any();
510 tim 1963 } else {
511     return atokenInfix[itokenInfix].value;
512     }
513     }
514    
515     int SelectionCompiler::tokPeek() {
516 tim 1965 if (itokenInfix == atokenInfix.size()) {
517 tim 1963 return 0;
518     }else {
519     return atokenInfix[itokenInfix].tok;
520     }
521     }
522    
523     bool SelectionCompiler::clauseOr() {
524     if (!clauseAnd()) {
525     return false;
526     }
527    
528 tim 1965 while (tokPeek() == Token::opOr) {
529 tim 1963 Token tokenOr = tokenNext();
530     if (!clauseAnd()) {
531     return false;
532     }
533     addTokenToPostfix(tokenOr);
534     }
535     return true;
536     }
537    
538     bool SelectionCompiler::clauseAnd() {
539     if (!clauseNot()) {
540     return false;
541     }
542    
543 tim 1965 while (tokPeek() == Token::opAnd) {
544 tim 1963 Token tokenAnd = tokenNext();
545     if (!clauseNot()) {
546     return false;
547     }
548     addTokenToPostfix(tokenAnd);
549     }
550     return true;
551     }
552    
553     bool SelectionCompiler::clauseNot() {
554 tim 1965 if (tokPeek() == Token::opNot) {
555 tim 1963 Token tokenNot = tokenNext();
556     if (!clauseNot()) {
557     return false;
558     }
559     return addTokenToPostfix(tokenNot);
560     }
561     return clausePrimitive();
562     }
563    
564     bool SelectionCompiler::clausePrimitive() {
565     int tok = tokPeek();
566     switch (tok) {
567 tim 1965 case Token::within:
568 tim 1963 return clauseWithin();
569 tim 1967
570     case Token::asterisk:
571     case Token::identifier:
572     return clauseChemObjName();
573 tim 1979
574     case Token::integer :
575     return clauseIndex();
576 tim 1963 default:
577 tim 1965 if ((tok & Token::atomproperty) == Token::atomproperty) {
578 tim 1963 return clauseComparator();
579     }
580 tim 1965 if ((tok & Token::predefinedset) != Token::predefinedset) {
581 tim 1963 break;
582     }
583     // fall into the code and below and just add the token
584 tim 1965 case Token::all:
585     case Token::none:
586 tim 1963 return addTokenToPostfix(tokenNext());
587 tim 1965 case Token::leftparen:
588 tim 1963 tokenNext();
589     if (!clauseOr()) {
590     return false;
591     }
592 tim 1965 if (tokenNext().tok != Token::rightparen) {
593 tim 1963 return rightParenthesisExpected();
594     }
595     return true;
596     }
597     return unrecognizedExpressionToken();
598     }
599    
600     bool SelectionCompiler::clauseComparator() {
601     Token tokenAtomProperty = tokenNext();
602     Token tokenComparator = tokenNext();
603 tim 1965 if ((tokenComparator.tok & Token::comparator) == 0) {
604 tim 1963 return comparisonOperatorExpected();
605     }
606    
607     Token tokenValue = tokenNext();
608 tim 1972 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
609     return numberExpected();
610 tim 1963 }
611 tim 1972
612     float val;
613     if (tokenValue.value.type() == typeid(int)) {
614     val = boost::any_cast<int>(tokenValue.value);
615     } else if (tokenValue.value.type() == typeid(float)) {
616     val = boost::any_cast<float>(tokenValue.value);
617     } else {
618     return false;
619     }
620    
621 tim 1979 boost::any floatVal;
622     floatVal = val;
623 tim 1965 return addTokenToPostfix(Token(tokenComparator.tok,
624 tim 1979 tokenAtomProperty.tok, floatVal));
625 tim 1963 }
626    
627     bool SelectionCompiler::clauseWithin() {
628     tokenNext(); // WITHIN
629 tim 1965 if (tokenNext().tok != Token::leftparen) { // (
630 tim 1963 return leftParenthesisExpected();
631     }
632    
633 tim 1965 boost::any distance;
634 tim 1963 Token tokenDistance = tokenNext(); // distance
635     switch(tokenDistance.tok) {
636 tim 1965 case Token::integer:
637     case Token::decimal:
638 tim 1963 distance = tokenDistance.value;
639     break;
640     default:
641     return numberOrKeywordExpected();
642     }
643    
644 tim 1965 if (tokenNext().tok != Token::opOr) { // ,
645 tim 1963 return commaExpected();
646     }
647    
648     if (! clauseOr()) { // *expression*
649     return false;
650     }
651    
652 tim 1965 if (tokenNext().tok != Token::rightparen) { // )T
653 tim 1963 return rightParenthesisExpected();
654     }
655    
656 tim 1965 return addTokenToPostfix(Token(Token::within, distance));
657 tim 1963 }
658    
659 tim 1967 bool SelectionCompiler::clauseChemObjName() {
660     std::string chemObjName;
661     int tok = tokPeek();
662     if (!clauseName(chemObjName)){
663     return false;
664     }
665 tim 1963
666    
667 tim 1967 tok = tokPeek();
668     //allow two dot at most
669     if (tok == Token::dot) {
670 tim 1972 tokenNext();
671     chemObjName += ".";
672 tim 1967 if (!clauseName(chemObjName)) {
673     return false;
674     }
675     tok = tokPeek();
676     if (tok == Token::dot) {
677 tim 1972 tokenNext();
678     chemObjName += ".";
679    
680 tim 1967 if (!clauseName(chemObjName)) {
681     return false;
682     }
683     }
684     }
685    
686     return addTokenToPostfix(Token(Token::name, chemObjName));
687 tim 1963 }
688    
689 tim 1967 bool SelectionCompiler:: clauseName(std::string& name) {
690    
691     int tok = tokPeek();
692    
693     if (tok == Token::asterisk || tok == Token::identifier) {
694     name += boost::any_cast<std::string>(tokenNext().value);
695    
696     while(true){
697     tok = tokPeek();
698     switch (tok) {
699     case Token::asterisk :
700     name += "*";
701     tokenNext();
702     break;
703     case Token::identifier :
704     name += boost::any_cast<std::string>(tokenNext().value);
705     break;
706     case Token::integer :
707     name += toString(boost::any_cast<int>(tokenNext().value));
708     break;
709     case Token::dot :
710     return true;
711     default :
712     return true;
713     }
714     }
715    
716     }else {
717     return false;
718     }
719    
720 tim 1963 }
721    
722 tim 1979 bool SelectionCompiler::clauseIndex(){
723     Token token = tokenNext();
724     if (token.tok == Token::integer) {
725     int index = boost::any_cast<int>(token.value);
726     int tok = tokPeek();
727     std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
728     if (tok == Token::to) {
729     tokenNext();
730     tok = tokPeek();
731     if (tok != Token::integer) {
732     return numberExpected();
733     }
734    
735     boost::any intVal = tokenNext().value;
736     int first = index;
737     if (intVal.type() != typeid(int)){
738     return false;
739     }
740     int second = boost::any_cast<int>(intVal);
741 tim 1967
742 tim 1979 return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
743    
744     }else {
745     return addTokenToPostfix(Token(Token::index, boost::any(index)));
746     }
747     } else {
748     return numberExpected();
749     }
750 tim 1963 }
751 tim 1979
752     }

Properties

Name Value
svn:executable *