ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-3.0/src/selection/SelectionCompiler.cpp
Revision: 1967
Committed: Thu Feb 3 23:14:05 2005 UTC (19 years, 5 months ago) by tim
File size: 20539 byte(s)
Log Message:
more work in selection library

File Contents

# User Rev Content
1 tim 1963 /*
2     * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3     *
4     * The University of Notre Dame grants you ("Licensee") a
5     * non-exclusive, royalty free, license to use, modify and
6     * redistribute this software in source and binary code form, provided
7     * that the following conditions are met:
8     *
9     * 1. Acknowledgement of the program authors must be made in any
10     * publication of scientific results based in part on use of the
11     * program. An acceptable form of acknowledgement is citation of
12     * the article in which the program was described (Matthew
13     * A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14     * J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15     * Parallel Simulation Engine for Molecular Dynamics,"
16     * J. Comput. Chem. 26, pp. 252-271 (2005))
17     *
18     * 2. Redistributions of source code must retain the above copyright
19     * notice, this list of conditions and the following disclaimer.
20     *
21     * 3. Redistributions in binary form must reproduce the above copyright
22     * notice, this list of conditions and the following disclaimer in the
23     * documentation and/or other materials provided with the
24     * distribution.
25     *
26     * This software is provided "AS IS," without a warranty of any
27     * kind. All express or implied conditions, representations and
28     * warranties, including any implied warranty of merchantability,
29     * fitness for a particular purpose or non-infringement, are hereby
30     * excluded. The University of Notre Dame and its licensors shall not
31     * be liable for any damages suffered by licensee as a result of
32     * using, modifying or distributing the software or its
33     * derivatives. In no event will the University of Notre Dame or its
34     * licensors be liable for any lost revenue, profit or data, or for
35     * direct, indirect, special, consequential, incidental or punitive
36     * damages, however caused and regardless of the theory of liability,
37     * arising out of the use of or inability to use software, even if the
38     * University of Notre Dame has been advised of the possibility of
39     * such damages.
40     */
41    
42     #include "selection/SelectionCompiler.hpp"
43 tim 1965 #include "utils/StringUtils.hpp"
44 tim 1963 namespace oopse {
45    
46     bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47    
48     this->filename = filename;
49     this->script = script;
50     lineNumbers.clear();
51     lineIndices.clear();
52     aatokenCompiled.clear();
53    
54 tim 1965 if (internalCompile()) {
55 tim 1963 return true;
56     }
57    
58     int icharEnd;
59 tim 1965 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60     (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61 tim 1963 icharEnd = script.size();
62     }
63     errorLine = script.substr(ichCurrentCommand, icharEnd);
64     return false;
65     }
66    
67     bool SelectionCompiler::internalCompile(){
68    
69     cchScript = script.size();
70     ichToken = 0;
71     lineCurrent = 1;
72    
73     error = false;
74    
75 tim 1965 //std::vector<Token> lltoken;
76     aatokenCompiled.clear();
77 tim 1963 std::vector<Token> ltoken;
78    
79 tim 1965 Token tokenCommand;
80     int tokCommand = Token::nada;
81 tim 1963
82     for ( ; true; ichToken += cchToken) {
83     if (lookingAtLeadingWhitespace())
84     continue;
85     if (lookingAtComment())
86     continue;
87 tim 1965 bool endOfLine = lookingAtEndOfLine();
88 tim 1963 if (endOfLine || lookingAtEndOfStatement()) {
89 tim 1965 if (tokCommand != Token::nada) {
90 tim 1963 if (! compileCommand(ltoken)) {
91     return false;
92     }
93 tim 1965 aatokenCompiled.push_back(atokenCommand);
94     lineNumbers.push_back(lineCurrent);
95     lineIndices.push_back(ichCurrentCommand);
96     ltoken.clear();
97     tokCommand = Token::nada;
98 tim 1963 }
99    
100     if (ichToken < cchScript) {
101     if (endOfLine)
102     ++lineCurrent;
103     continue;
104     }
105     break;
106     }
107    
108 tim 1965 if (tokCommand != Token::nada) {
109 tim 1963 if (lookingAtString()) {
110     std::string str = getUnescapedStringLiteral();
111 tim 1965 ltoken.push_back(Token(Token::string, str));
112 tim 1963 continue;
113     }
114 tim 1965 //if ((tokCommand & Token::specialstring) != 0 &&
115     // lookingAtSpecialString()) {
116     // std::string str = script.substr(ichToken, ichToken + cchToken);
117     // ltoken.push_back(Token(Token::string, str));
118     // continue;
119     //}
120     if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121     float value = lexi_cast<float>(script.substr(ichToken, ichToken + cchToken));
122     ltoken.push_back(Token(Token::decimal, value));/**@todo*/
123 tim 1963 continue;
124     }
125 tim 1965 if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
126 tim 1963 std::string intString = script.substr(ichToken, ichToken + cchToken);
127     int val = lexi_cast<int>(intString);
128 tim 1965 ltoken.push_back(Token(Token::integer, val, intString));/**@todo*/
129 tim 1963 continue;
130     }
131     }
132    
133     if (lookingAtLookupToken()) {
134 tim 1965 std::string ident = script.substr(ichToken, ichToken + cchToken);
135 tim 1963
136 tim 1965 Token token;
137     Token* pToken = TokenMap::getInstance()->getToken(ident);
138     if (pToken != NULL) {
139     token = *pToken;
140     } else {
141     token = Token(Token::identifier, ident);
142 tim 1963 }
143    
144     int tok = token.tok;
145    
146     switch (tokCommand) {
147 tim 1965 case Token::nada:
148 tim 1963 ichCurrentCommand = ichToken;
149     //tokenCommand = token;
150     tokCommand = tok;
151 tim 1965 if ((tokCommand & Token::command) == 0)
152 tim 1963 return commandExpected();
153     break;
154    
155 tim 1965 case Token::define:
156 tim 1963 if (ltoken.size() == 1) {
157     // we are looking at the variable name
158 tim 1965 if (tok != Token::identifier &&
159     (tok & Token::predefinedset) != Token::predefinedset)
160 tim 1963 return invalidExpressionToken(ident);
161     } else {
162     // we are looking at the expression
163 tim 1965 if (tok != Token::identifier &&
164     (tok & (Token::expression | Token::predefinedset)) == 0)
165 tim 1963 return invalidExpressionToken(ident);
166     }
167    
168     break;
169    
170 tim 1965 case Token::select:
171     if (tok != Token::identifier && (tok & Token::expression) == 0)
172 tim 1963 return invalidExpressionToken(ident);
173     break;
174     }
175     ltoken.push_back(token);
176     continue;
177     }
178    
179     if (ltoken.size() == 0) {
180     return commandExpected();
181     }
182    
183     return unrecognizedToken();
184     }
185    
186     return true;
187     }
188    
189    
190     bool SelectionCompiler::lookingAtLeadingWhitespace() {
191    
192     int ichT = ichToken;
193     while (ichT < cchScript && std::isspace(script[ichT])) {
194     ++ichT;
195     }
196     cchToken = ichT - ichToken;
197     return cchToken > 0;
198     }
199    
200     bool SelectionCompiler::lookingAtEndOfLine() {
201     if (ichToken == cchScript)
202     return true;
203     int ichT = ichToken;
204     char ch = script[ichT];
205     if (ch == '\r') {
206     ++ichT;
207     if (ichT < cchScript && script[ichT] == '\n')
208     ++ichT;
209     } else if (ch == '\n') {
210     ++ichT;
211     } else {
212     return false;
213     }
214     cchToken = ichT - ichToken;
215     return true;
216     }
217    
218     bool SelectionCompiler::lookingAtEndOfStatement() {
219     if (ichToken == cchScript || script[ichToken] != ';')
220     return false;
221     cchToken = 1;
222     return true;
223     }
224    
225     bool SelectionCompiler::lookingAtString() {
226     if (ichToken == cchScript)
227     return false;
228     if (script[ichToken] != '"')
229     return false;
230     // remove support for single quote
231     // in order to use it in atom expressions
232     // char chFirst = script.charAt(ichToken);
233     // if (chFirst != '"' && chFirst != '\'')
234     // return false;
235     int ichT = ichToken + 1;
236     // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
237     char ch;
238 tim 1965 bool previousCharBackslash = false;
239 tim 1963 while (ichT < cchScript) {
240 tim 1965 ch = script[ichT++];
241 tim 1963 if (ch == '"' && !previousCharBackslash)
242     break;
243     previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
244     }
245     cchToken = ichT - ichToken;
246     return true;
247     }
248    
249    
250     std::string SelectionCompiler::getUnescapedStringLiteral() {
251 tim 1965 /** @todo */
252     std::string sb(cchToken - 2, ' ');
253    
254 tim 1963 int ichMax = ichToken + cchToken - 1;
255     int ich = ichToken + 1;
256    
257     while (ich < ichMax) {
258     char ch = script[ich++];
259     if (ch == '\\' && ich < ichMax) {
260     ch = script[ich++];
261     switch (ch) {
262     case 'b':
263     ch = '\b';
264     break;
265     case 'n':
266     ch = '\n';
267     break;
268     case 't':
269     ch = '\t';
270     break;
271     case 'r':
272     ch = '\r';
273     // fall into
274     case '"':
275     case '\\':
276     case '\'':
277     break;
278     case 'x':
279     case 'u':
280     int digitCount = ch == 'x' ? 2 : 4;
281     if (ich < ichMax) {
282     int unicode = 0;
283     for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
284     char chT = script[ich];
285     int hexit = getHexitValue(chT);
286     if (hexit < 0)
287     break;
288     unicode <<= 4;
289     unicode += hexit;
290     ++ich;
291     }
292     ch = (char)unicode;
293     }
294     }
295     }
296 tim 1965 sb.append(1, ch);
297 tim 1963 }
298    
299 tim 1965 return sb;
300 tim 1963 }
301    
302 tim 1965 int SelectionCompiler::getHexitValue(char ch) {
303 tim 1963 if (ch >= '0' && ch <= '9')
304     return ch - '0';
305     else if (ch >= 'a' && ch <= 'f')
306     return 10 + ch - 'a';
307     else if (ch >= 'A' && ch <= 'F')
308     return 10 + ch - 'A';
309     else
310     return -1;
311     }
312    
313     bool SelectionCompiler::lookingAtSpecialString() {
314     int ichT = ichToken;
315     char ch = script[ichT];
316     while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
317     ++ichT;
318     }
319     cchToken = ichT - ichToken;
320     return cchToken > 0;
321     }
322    
323 tim 1965 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
324 tim 1963 if (ichToken == cchScript) {
325     return false;
326     }
327    
328     int ichT = ichToken;
329     if (script[ichT] == '-') {
330     ++ichT;
331     }
332 tim 1965 bool digitSeen = false;
333 tim 1963 char ch = 'X';
334     while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
335     ++ichT;
336     digitSeen = true;
337     }
338    
339     if (ichT == cchScript || ch != '.') {
340     return false;
341     }
342    
343     // to support 1.ca, let's check the character after the dot
344     // to determine if it is an alpha
345     if (ch == '.' && (ichT + 1 < cchScript) && std::isalpha(script[ichT + 1])) {
346     return false;
347     }
348    
349     ++ichT;
350     while (ichT < cchScript && std::isdigit(script[ichT])) {
351     ++ichT;
352     digitSeen = true;
353     }
354     cchToken = ichT - ichToken;
355     return digitSeen;
356     }
357    
358 tim 1965 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
359 tim 1963 if (ichToken == cchScript) {
360     return false;
361     }
362     int ichT = ichToken;
363     if (allowNegative && script[ichToken] == '-') {
364     ++ichT;
365     }
366     int ichBeginDigits = ichT;
367     while (ichT < cchScript && std::isdigit(script[ichT])) {
368     ++ichT;
369     }
370     if (ichBeginDigits == ichT) {
371     return false;
372     }
373     cchToken = ichT - ichToken;
374     return true;
375     }
376    
377     bool SelectionCompiler::lookingAtLookupToken() {
378     if (ichToken == cchScript) {
379     return false;
380     }
381    
382     int ichT = ichToken;
383     char ch;
384     switch (ch = script[ichT++]) {
385     case '(':
386     case ')':
387     case ',':
388     case '*':
389     case '-':
390     case '[':
391     case ']':
392     case '+':
393     case ':':
394     case '@':
395     case '.':
396     case '%':
397     break;
398     case '&':
399     case '|':
400     if (ichT < cchScript && script[ichT] == ch) {
401     ++ichT;
402     }
403     break;
404     case '<':
405     case '=':
406     case '>':
407     if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
408     ++ichT;
409     }
410     break;
411     case '/':
412     case '!':
413     if (ichT < cchScript && script[ichT] == '=') {
414     ++ichT;
415     }
416     break;
417     default:
418     if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
419     return false;
420     }
421     case '?': // include question marks in identifier for atom expressions
422     while (ichT < cchScript && (std::isalpha(ch = script[ichT]) ||std::isdigit(ch) ||
423     ch == '_' || ch == '?') ||(ch == '^' && ichT > ichToken && std::isdigit(script[ichT - 1]))) {
424     // hack for insertion codes embedded in an atom expression :-(
425     // select c3^a
426     ++ichT;
427     }
428     break;
429     }
430     cchToken = ichT - ichToken;
431     return true;
432     }
433    
434 tim 1965 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
435     const Token& tokenCommand = ltoken[0];
436 tim 1963 int tokCommand = tokenCommand.tok;
437 tim 1965
438     atokenCommand = ltoken;
439     if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
440 tim 1963 return false;
441     }
442 tim 1965
443 tim 1963 return true;
444     }
445    
446     bool SelectionCompiler::compileExpression() {
447     /** todo */
448     int i = 1;
449     int tokCommand = atokenCommand[0].tok;
450 tim 1965 if (tokCommand == Token::define) {
451     i = 2;
452     } else if ((tokCommand & Token::embeddedExpression) != 0) {
453     // look for the open parenthesis
454     while (i < atokenCommand.size() &&
455     atokenCommand[i].tok != Token::leftparen)
456 tim 1963 ++i;
457     }
458 tim 1965
459     if (i >= atokenCommand.size()) {
460     return true;
461     }
462 tim 1963 return compileExpression(i);
463     }
464    
465    
466 tim 1965 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
467 tim 1963 ltokenPostfix.push_back(token);
468     return true;
469     }
470    
471     bool SelectionCompiler::compileExpression(int itoken) {
472 tim 1965 ltokenPostfix.clear();
473     for (int i = 0; i < itoken; ++i) {
474 tim 1963 addTokenToPostfix(atokenCommand[i]);
475 tim 1965 }
476    
477 tim 1963 atokenInfix = atokenCommand;
478     itokenInfix = itoken;
479    
480 tim 1965 addTokenToPostfix(Token::tokenExpressionBegin);
481 tim 1963 if (!clauseOr()) {
482     return false;
483     }
484    
485 tim 1965 addTokenToPostfix(Token::tokenExpressionEnd);
486     if (itokenInfix != atokenInfix.size()) {
487 tim 1963 return endOfExpressionExpected();
488     }
489    
490     atokenCommand = ltokenPostfix;
491     return true;
492     }
493    
494     Token SelectionCompiler::tokenNext() {
495 tim 1965 if (itokenInfix == atokenInfix.size()) {
496     return Token();
497     }
498     return atokenInfix[itokenInfix++];
499 tim 1963 }
500    
501 tim 1965 boost::any SelectionCompiler::valuePeek() {
502     if (itokenInfix == atokenInfix.size()) {
503     return boost::any();
504 tim 1963 } else {
505     return atokenInfix[itokenInfix].value;
506     }
507     }
508    
509     int SelectionCompiler::tokPeek() {
510 tim 1965 if (itokenInfix == atokenInfix.size()) {
511 tim 1963 return 0;
512     }else {
513     return atokenInfix[itokenInfix].tok;
514     }
515     }
516    
517     bool SelectionCompiler::clauseOr() {
518     if (!clauseAnd()) {
519     return false;
520     }
521    
522 tim 1965 while (tokPeek() == Token::opOr) {
523 tim 1963 Token tokenOr = tokenNext();
524     if (!clauseAnd()) {
525     return false;
526     }
527     addTokenToPostfix(tokenOr);
528     }
529     return true;
530     }
531    
532     bool SelectionCompiler::clauseAnd() {
533     if (!clauseNot()) {
534     return false;
535     }
536    
537 tim 1965 while (tokPeek() == Token::opAnd) {
538 tim 1963 Token tokenAnd = tokenNext();
539     if (!clauseNot()) {
540     return false;
541     }
542     addTokenToPostfix(tokenAnd);
543     }
544     return true;
545     }
546    
547     bool SelectionCompiler::clauseNot() {
548 tim 1965 if (tokPeek() == Token::opNot) {
549 tim 1963 Token tokenNot = tokenNext();
550     if (!clauseNot()) {
551     return false;
552     }
553     return addTokenToPostfix(tokenNot);
554     }
555     return clausePrimitive();
556     }
557    
558     bool SelectionCompiler::clausePrimitive() {
559     int tok = tokPeek();
560     switch (tok) {
561 tim 1965 case Token::within:
562 tim 1963 return clauseWithin();
563 tim 1967
564     case Token::asterisk:
565     case Token::identifier:
566     return clauseChemObjName();
567    
568 tim 1963 default:
569 tim 1965 if ((tok & Token::atomproperty) == Token::atomproperty) {
570 tim 1963 return clauseComparator();
571     }
572 tim 1965 if ((tok & Token::predefinedset) != Token::predefinedset) {
573 tim 1963 break;
574     }
575     // fall into the code and below and just add the token
576 tim 1965 case Token::all:
577     case Token::none:
578 tim 1963 return addTokenToPostfix(tokenNext());
579 tim 1965 case Token::leftparen:
580 tim 1963 tokenNext();
581     if (!clauseOr()) {
582     return false;
583     }
584 tim 1965 if (tokenNext().tok != Token::rightparen) {
585 tim 1963 return rightParenthesisExpected();
586     }
587     return true;
588     }
589     return unrecognizedExpressionToken();
590     }
591    
592     bool SelectionCompiler::clauseComparator() {
593     Token tokenAtomProperty = tokenNext();
594     Token tokenComparator = tokenNext();
595 tim 1965 if ((tokenComparator.tok & Token::comparator) == 0) {
596 tim 1963 return comparisonOperatorExpected();
597     }
598    
599     Token tokenValue = tokenNext();
600 tim 1965 if (tokenValue.tok != Token::integer) {
601 tim 1963 return integerExpected();
602     }
603     int val = tokenValue.intValue;
604     // note that a comparator instruction is a complicated instruction
605     // int intValue is the tok of the property you are comparing
606     // the value against which you are comparing is stored as an Integer
607     // in the object value
608 tim 1965 return addTokenToPostfix(Token(tokenComparator.tok,
609     tokenAtomProperty.tok, boost::any(val)));
610 tim 1963 }
611    
612     bool SelectionCompiler::clauseWithin() {
613     tokenNext(); // WITHIN
614 tim 1965 if (tokenNext().tok != Token::leftparen) { // (
615 tim 1963 return leftParenthesisExpected();
616     }
617    
618 tim 1965 boost::any distance;
619 tim 1963 Token tokenDistance = tokenNext(); // distance
620     switch(tokenDistance.tok) {
621 tim 1965 case Token::integer:
622     distance = float(tokenDistance.intValue);
623 tim 1963 break;
624 tim 1965 case Token::decimal:
625 tim 1963 distance = tokenDistance.value;
626     break;
627     default:
628     return numberOrKeywordExpected();
629     }
630    
631 tim 1965 if (tokenNext().tok != Token::opOr) { // ,
632 tim 1963 return commaExpected();
633     }
634    
635     if (! clauseOr()) { // *expression*
636     return false;
637     }
638    
639 tim 1965 if (tokenNext().tok != Token::rightparen) { // )T
640 tim 1963 return rightParenthesisExpected();
641     }
642    
643 tim 1965 return addTokenToPostfix(Token(Token::within, distance));
644 tim 1963 }
645    
646 tim 1967 bool SelectionCompiler::clauseChemObjName() {
647     std::string chemObjName;
648     int tok = tokPeek();
649     if (!clauseName(chemObjName)){
650     return false;
651     }
652 tim 1963
653    
654 tim 1967 tok = tokPeek();
655     //allow two dot at most
656     if (tok == Token::dot) {
657     if (!clauseName(chemObjName)) {
658     return false;
659     }
660     tok = tokPeek();
661     if (tok == Token::dot) {
662     if (!clauseName(chemObjName)) {
663     return false;
664     }
665     }
666     }
667    
668     return addTokenToPostfix(Token(Token::name, chemObjName));
669 tim 1963 }
670    
671 tim 1967 bool SelectionCompiler:: clauseName(std::string& name) {
672    
673     int tok = tokPeek();
674    
675     if (tok == Token::asterisk || tok == Token::identifier) {
676     name += boost::any_cast<std::string>(tokenNext().value);
677    
678     while(true){
679     tok = tokPeek();
680     switch (tok) {
681     case Token::asterisk :
682     name += "*";
683     tokenNext();
684     break;
685     case Token::identifier :
686     name += boost::any_cast<std::string>(tokenNext().value);
687     break;
688     case Token::integer :
689     name += toString(boost::any_cast<int>(tokenNext().value));
690     break;
691     case Token::dot :
692     return true;
693     default :
694     return true;
695     }
696     }
697    
698     }else {
699     return false;
700     }
701    
702 tim 1963 }
703    
704 tim 1967
705 tim 1963 }

Properties

Name Value
svn:executable *