src/selection/SelectionCompiler.cpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Acknowledgement of the program authors must be made in any
 *    publication of scientific results based in part on use of the
 *    program.  An acceptable form of acknowledgement is citation of
 *    the article in which the program was described (Matthew
 *    A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
 *    J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
 *    Parallel Simulation Engine for Molecular Dynamics,"
 *    J. Comput. Chem. 26, pp. 252-271 (2005))
 *
 * 2. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 3. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 */

#include "selection/SelectionCompiler.hpp"
#include "utils/StringUtils.hpp"
namespace oopse {

bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {

    this->filename = filename;
    this->script = script;
    lineNumbers.clear();
    lineIndices.clear();
    aatokenCompiled.clear();
        
    if (internalCompile()) {
        return true;
    }
    
    int icharEnd;
    if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
        (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
        icharEnd = script.size();
    }
    errorLine = script.substr(ichCurrentCommand, icharEnd);
    return false;
}

bool SelectionCompiler::internalCompile(){

    cchScript = script.size();
    ichToken = 0;
    lineCurrent = 1;

    error = false;

    //std::vector<Token> lltoken;
    aatokenCompiled.clear();
    std::vector<Token> ltoken;

    Token tokenCommand;
    int tokCommand = Token::nada;

    for ( ; true; ichToken += cchToken) {
        if (lookingAtLeadingWhitespace())
            continue;
        if (lookingAtComment())
            continue;
        bool endOfLine = lookingAtEndOfLine();
        if (endOfLine || lookingAtEndOfStatement()) {
            if (tokCommand != Token::nada) {
                if (! compileCommand(ltoken)) {
                    return false;
                }
                aatokenCompiled.push_back(atokenCommand);
                lineNumbers.push_back(lineCurrent);
                lineIndices.push_back(ichCurrentCommand);
                ltoken.clear();
                tokCommand = Token::nada;
            }
            
            if (ichToken < cchScript) {
                if (endOfLine)
                    ++lineCurrent;
              continue;
            }
            break;
        }

        if (tokCommand != Token::nada) {
            if (lookingAtString()) {
                std::string str = getUnescapedStringLiteral();
                ltoken.push_back(Token(Token::string, str));
                continue;
            }
            //if ((tokCommand & Token::specialstring) != 0 &&
            //    lookingAtSpecialString()) {
            //    std::string str = script.substr(ichToken, ichToken + cchToken);
            //    ltoken.push_back(Token(Token::string, str));
            //    continue;
            //}
            if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
                float value = lexi_cast<float>(script.substr(ichToken, ichToken + cchToken));          
                ltoken.push_back(Token(Token::decimal, value));/**@todo*/
                continue;
            }
            if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
                std::string intString = script.substr(ichToken, ichToken + cchToken);
                int val = lexi_cast<int>(intString);
                ltoken.push_back(Token(Token::integer, val, intString));/**@todo*/
                continue;
            }
        }
      
        if (lookingAtLookupToken()) {
            std::string ident = script.substr(ichToken, ichToken + cchToken);

            Token token;            
            Token* pToken = TokenMap::getInstance()->getToken(ident);
            if (pToken != NULL) {
                token = *pToken;
            } else {
                token = Token(Token::identifier, ident);
            }
            
            int tok = token.tok;
            
            switch (tokCommand) {
                case Token::nada:
                    ichCurrentCommand = ichToken;
                    //tokenCommand = token;
                    tokCommand = tok;
                    if ((tokCommand & Token::command) == 0)
                    return commandExpected();
                    break;

                case Token::define:
                    if (ltoken.size() == 1) {
                        // we are looking at the variable name
                        if (tok != Token::identifier &&
                        (tok & Token::predefinedset) != Token::predefinedset)
                        return invalidExpressionToken(ident);
                    } else {
                    // we are looking at the expression
                    if (tok != Token::identifier && 
                        (tok & (Token::expression | Token::predefinedset)) == 0)
                        return invalidExpressionToken(ident);
                    }
                    
                    break;

                case Token::select:
                    if (tok != Token::identifier && (tok & Token::expression) == 0)
                        return invalidExpressionToken(ident);
                break;
            }
            ltoken.push_back(token);
            continue;
        }

        if (ltoken.size() == 0) {
            return commandExpected();
        }
        
        return unrecognizedToken();
    }

    return true;
  }


  bool SelectionCompiler::lookingAtLeadingWhitespace() {

    int ichT = ichToken;
    while (ichT < cchScript && std::isspace(script[ichT])) {
      ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
  }

  bool SelectionCompiler::lookingAtEndOfLine() {
    if (ichToken == cchScript)
      return true;
    int ichT = ichToken;
    char ch = script[ichT];
    if (ch == '\r') {
      ++ichT;
      if (ichT < cchScript && script[ichT] == '\n')
          ++ichT;
    } else if (ch == '\n') {
      ++ichT;
    } else {
      return false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  bool SelectionCompiler::lookingAtEndOfStatement() {
    if (ichToken == cchScript || script[ichToken] != ';')
      return false;
    cchToken = 1;
    return true;
  }

  bool SelectionCompiler::lookingAtString() {
    if (ichToken == cchScript)
      return false;
    if (script[ichToken] != '"')
      return false;
    // remove support for single quote
    // in order to use it in atom expressions
    //    char chFirst = script.charAt(ichToken);
    //    if (chFirst != '"' && chFirst != '\'')
    //      return false;
    int ichT = ichToken + 1;
    //    while (ichT < cchScript && script.charAt(ichT++) != chFirst)
    char ch;
    bool previousCharBackslash = false;
    while (ichT < cchScript) {
      ch = script[ichT++];
      if (ch == '"' && !previousCharBackslash)
        break;
      previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  
std::string SelectionCompiler::getUnescapedStringLiteral() {
    /** @todo */
    std::string sb(cchToken - 2, ' ');
    
    int ichMax = ichToken + cchToken - 1;
    int ich = ichToken + 1;

    while (ich < ichMax) {
        char ch = script[ich++];
        if (ch == '\\' && ich < ichMax) {
            ch = script[ich++];
            switch (ch) {
                case 'b':
                    ch = '\b';
                    break;
                case 'n':
                    ch = '\n';
                    break;
                case 't':
                    ch = '\t';
                    break;
                case 'r':
                    ch = '\r';
                    // fall into
                case '"':
                case '\\':
                case '\'':
                break;
                case 'x':
                case 'u':
                    int digitCount = ch == 'x' ? 2 : 4;
                    if (ich < ichMax) {
                        int unicode = 0;
                        for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
                            char chT = script[ich];
                            int hexit = getHexitValue(chT);
                            if (hexit < 0)
                                break;
                            unicode <<= 4;
                            unicode += hexit;
                            ++ich;
                        }
                        ch = (char)unicode;
                    }
            }
        }
        sb.append(1, ch);
    }

    return sb;
}

int SelectionCompiler::getHexitValue(char ch) {
    if (ch >= '0' && ch <= '9')
        return ch - '0';
    else if (ch >= 'a' && ch <= 'f')
        return 10 + ch - 'a';
    else if (ch >= 'A' && ch <= 'F')
        return 10 + ch - 'A';
    else
        return -1;
}

bool SelectionCompiler::lookingAtSpecialString() {
    int ichT = ichToken;
    char ch = script[ichT];
    while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
        ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
}

bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
    if (ichToken == cchScript) {
        return false;
    }
    
    int ichT = ichToken;
    if (script[ichT] == '-') {
        ++ichT;
    }
    bool digitSeen = false;
    char ch = 'X';
    while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
        ++ichT;
        digitSeen = true;
    }

    if (ichT == cchScript || ch != '.') {
        return false;
    }

    // to support 1.ca, let's check the character after the dot
    // to determine if it is an alpha
    if (ch == '.' && (ichT + 1 < cchScript) && std::isalpha(script[ichT + 1])) {
        return false;
    }

    ++ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
        ++ichT;
        digitSeen = true;
    }
    cchToken = ichT - ichToken;
    return digitSeen;
}

bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
    if (ichToken == cchScript) {
        return false;
    }
    int ichT = ichToken;
    if (allowNegative && script[ichToken] == '-') {
        ++ichT;
    }
    int ichBeginDigits = ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
        ++ichT;
    }
    if (ichBeginDigits == ichT) {
        return false;
    }
    cchToken = ichT - ichToken;
    return true;
}

bool SelectionCompiler::lookingAtLookupToken() {
    if (ichToken == cchScript) {
        return false;
    }

    int ichT = ichToken;
    char ch;
    switch (ch = script[ichT++]) {
        case '(':
        case ')':
        case ',':
        case '*':
        case '-':
        case '[':
        case ']':
        case '+':
        case ':':
        case '@':
        case '.':
        case '%':
        break;
        case '&':
        case '|':
            if (ichT < cchScript && script[ichT] == ch) {
                ++ichT;
            }
        break;
        case '<':
        case '=':
        case '>':
            if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
                ++ichT;
            }
            break;
        case '/':
        case '!':
            if (ichT < cchScript && script[ichT] == '=') {
                ++ichT;
            }
            break;
        default:
            if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
                return false;
            }
        case '?': // include question marks in identifier for atom expressions
            while (ichT < cchScript && (std::isalpha(ch = script[ichT]) ||std::isdigit(ch) ||
                ch == '_' || ch == '?') ||(ch == '^' && ichT > ichToken && std::isdigit(script[ichT - 1]))) {
                // hack for insertion codes embedded in an atom expression :-(
                // select c3^a
                ++ichT;
            }
        break;
    }
    cchToken = ichT - ichToken;
    return true;
}

bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
    const Token& tokenCommand = ltoken[0];
    int tokCommand = tokenCommand.tok;

    atokenCommand = ltoken;
    if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
        return false;
    }
    
    return true;
}

bool SelectionCompiler::compileExpression() {
    /** todo */
    int i = 1;
    int tokCommand = atokenCommand[0].tok;
    if (tokCommand == Token::define) {
        i = 2;
    } else if ((tokCommand & Token::embeddedExpression) != 0) {
        // look for the open parenthesis
        while (i < atokenCommand.size() &&
         atokenCommand[i].tok != Token::leftparen)
        ++i;
    }

    if (i >= atokenCommand.size()) {
        return true;
    }
    return compileExpression(i);
  }

                  
bool SelectionCompiler::addTokenToPostfix(const Token& token) {
    ltokenPostfix.push_back(token);
    return true;
}

bool SelectionCompiler::compileExpression(int itoken) {
    ltokenPostfix.clear();
    for (int i = 0; i < itoken; ++i) {
        addTokenToPostfix(atokenCommand[i]);
    }
    
    atokenInfix = atokenCommand;
    itokenInfix = itoken;

    addTokenToPostfix(Token::tokenExpressionBegin);
    if (!clauseOr()) {
        return false;
    }
    
    addTokenToPostfix(Token::tokenExpressionEnd);
    if (itokenInfix != atokenInfix.size()) {
        return endOfExpressionExpected();
    }

    atokenCommand = ltokenPostfix;
    return true;
}

Token SelectionCompiler::tokenNext() {
    if (itokenInfix == atokenInfix.size()) {
        return Token();
    }
    return atokenInfix[itokenInfix++];
}

boost::any SelectionCompiler::valuePeek() {
    if (itokenInfix == atokenInfix.size()) {
        return boost::any();
    } else {
        return atokenInfix[itokenInfix].value;
    }
}

int SelectionCompiler::tokPeek() {
    if (itokenInfix == atokenInfix.size()) {
        return 0;
    }else {
        return atokenInfix[itokenInfix].tok;
    }
}

bool SelectionCompiler::clauseOr() {
    if (!clauseAnd()) {
        return false;
    }
    
    while (tokPeek() == Token::opOr) {
        Token tokenOr = tokenNext();
        if (!clauseAnd()) {
            return false;
        }
        addTokenToPostfix(tokenOr);
    }
    return true;
}

bool SelectionCompiler::clauseAnd() {
    if (!clauseNot()) {
        return false;
    }

    while (tokPeek() == Token::opAnd) {
        Token tokenAnd = tokenNext();
        if (!clauseNot()) {
            return false;
        }
        addTokenToPostfix(tokenAnd);
    }
    return true;
}

bool SelectionCompiler::clauseNot() {
    if (tokPeek() == Token::opNot) {
        Token tokenNot = tokenNext();
        if (!clauseNot()) {
            return false;
        }
        return addTokenToPostfix(tokenNot);
    }
    return clausePrimitive();
}

bool SelectionCompiler::clausePrimitive() {
    int tok = tokPeek();
    switch (tok) {
        case Token::within:
            return clauseWithin();

        case Token::asterisk:
        case Token::identifier:
            return clauseChemObjName();
      
        default:
            if ((tok & Token::atomproperty) == Token::atomproperty) {
                return clauseComparator();
            }
            if ((tok & Token::predefinedset) != Token::predefinedset) {
                break;
            }
            // fall into the code and below and just add the token
        case Token::all:
        case Token::none:
            return addTokenToPostfix(tokenNext());
        case Token::leftparen:
            tokenNext();
            if (!clauseOr()) {
                return false;
            }
            if (tokenNext().tok != Token::rightparen) {
                return rightParenthesisExpected();
            }
            return true;
    }
    return unrecognizedExpressionToken();
}

bool SelectionCompiler::clauseComparator() {
    Token tokenAtomProperty = tokenNext();
    Token tokenComparator = tokenNext();
    if ((tokenComparator.tok & Token::comparator) == 0) {
        return comparisonOperatorExpected();
    }

    Token tokenValue = tokenNext();
    if (tokenValue.tok != Token::integer) {
        return integerExpected();
    }
    int val = tokenValue.intValue;
    // note that a comparator instruction is a complicated instruction
    // int intValue is the tok of the property you are comparing
    // the value against which you are comparing is stored as an Integer
    // in the object value
    return addTokenToPostfix(Token(tokenComparator.tok,
                       tokenAtomProperty.tok, boost::any(val)));
}

bool SelectionCompiler::clauseWithin() {
    tokenNext();                             // WITHIN
    if (tokenNext().tok != Token::leftparen) {  // (
        return leftParenthesisExpected();
    }
    
    boost::any distance;
    Token tokenDistance = tokenNext();       // distance
    switch(tokenDistance.tok) {
        case Token::integer:
            distance = float(tokenDistance.intValue);
            break;
        case Token::decimal:
            distance = tokenDistance.value;
            break;
        default:
            return numberOrKeywordExpected();
    }

    if (tokenNext().tok != Token::opOr) {       // ,
        return commaExpected();
    }
    
    if (! clauseOr()) {                        // *expression*
        return false;
    }
    
    if (tokenNext().tok != Token::rightparen) { // )T
        return rightParenthesisExpected();
    }
    
    return addTokenToPostfix(Token(Token::within, distance));
}

bool SelectionCompiler::clauseChemObjName() {
    std::string chemObjName;
    int tok = tokPeek();
    if (!clauseName(chemObjName)){
        return false;
    }


    tok = tokPeek();
    //allow two dot at most
    if (tok == Token::dot) {
        if (!clauseName(chemObjName)) {
            return false;
        }
        tok = tokPeek();
        if (tok == Token::dot) {
            if (!clauseName(chemObjName)) {
                return false;
            }
        }        
    }

    return addTokenToPostfix(Token(Token::name, chemObjName));
}

bool SelectionCompiler:: clauseName(std::string& name) {

    int tok = tokPeek();

    if (tok == Token::asterisk || tok == Token::identifier) {
        name += boost::any_cast<std::string>(tokenNext().value);
        
        while(true){
            tok = tokPeek();
            switch (tok) {
                case Token::asterisk :
                    name += "*";
                    tokenNext();
                    break;
                case Token::identifier :
                    name += boost::any_cast<std::string>(tokenNext().value);
                    break;
                case Token::integer :
                    name += toString(boost::any_cast<int>(tokenNext().value));
                    break;
                case Token::dot :
                    return true;
                default :
                    return true;
            }
        }
        
    }else {
        return false;
    }

}


}
Revision:	283
Committed:	Thu Feb 3 23:14:05 2005 UTC (20 years, 9 months ago) by tim
File size:	20539 byte(s)
Log Message:	more work in selection library
#	User	Rev	Content
1	tim	279	/*
2			* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3			*
4			* The University of Notre Dame grants you ("Licensee") a
5			* non-exclusive, royalty free, license to use, modify and
6			* redistribute this software in source and binary code form, provided
7			* that the following conditions are met:
8			*
9			* 1. Acknowledgement of the program authors must be made in any
10			* publication of scientific results based in part on use of the
11			* program. An acceptable form of acknowledgement is citation of
12			* the article in which the program was described (Matthew
13			* A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14			* J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15			* Parallel Simulation Engine for Molecular Dynamics,"
16			* J. Comput. Chem. 26, pp. 252-271 (2005))
17			*
18			* 2. Redistributions of source code must retain the above copyright
19			* notice, this list of conditions and the following disclaimer.
20			*
21			* 3. Redistributions in binary form must reproduce the above copyright
22			* notice, this list of conditions and the following disclaimer in the
23			* documentation and/or other materials provided with the
24			* distribution.
25			*
26			* This software is provided "AS IS," without a warranty of any
27			* kind. All express or implied conditions, representations and
28			* warranties, including any implied warranty of merchantability,
29			* fitness for a particular purpose or non-infringement, are hereby
30			* excluded. The University of Notre Dame and its licensors shall not
31			* be liable for any damages suffered by licensee as a result of
32			* using, modifying or distributing the software or its
33			* derivatives. In no event will the University of Notre Dame or its
34			* licensors be liable for any lost revenue, profit or data, or for
35			* direct, indirect, special, consequential, incidental or punitive
36			* damages, however caused and regardless of the theory of liability,
37			* arising out of the use of or inability to use software, even if the
38			* University of Notre Dame has been advised of the possibility of
39			* such damages.
40			*/
41
42			#include "selection/SelectionCompiler.hpp"
43	tim	281	#include "utils/StringUtils.hpp"
44	tim	279	namespace oopse {
45
46			bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47
48			this->filename = filename;
49			this->script = script;
50			lineNumbers.clear();
51			lineIndices.clear();
52			aatokenCompiled.clear();
53
54	tim	281	if (internalCompile()) {
55	tim	279	return true;
56			}
57
58			int icharEnd;
59	tim	281	if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60			(icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61	tim	279	icharEnd = script.size();
62			}
63			errorLine = script.substr(ichCurrentCommand, icharEnd);
64			return false;
65			}
66
67			bool SelectionCompiler::internalCompile(){
68
69			cchScript = script.size();
70			ichToken = 0;
71			lineCurrent = 1;
72
73			error = false;
74
75	tim	281	//std::vector<Token> lltoken;
76			aatokenCompiled.clear();
77	tim	279	std::vector<Token> ltoken;
78
79	tim	281	Token tokenCommand;
80			int tokCommand = Token::nada;
81	tim	279
82			for ( ; true; ichToken += cchToken) {
83			if (lookingAtLeadingWhitespace())
84			continue;
85			if (lookingAtComment())
86			continue;
87	tim	281	bool endOfLine = lookingAtEndOfLine();
88	tim	279	if (endOfLine \|\| lookingAtEndOfStatement()) {
89	tim	281	if (tokCommand != Token::nada) {
90	tim	279	if (! compileCommand(ltoken)) {
91			return false;
92			}
93	tim	281	aatokenCompiled.push_back(atokenCommand);
94			lineNumbers.push_back(lineCurrent);
95			lineIndices.push_back(ichCurrentCommand);
96			ltoken.clear();
97			tokCommand = Token::nada;
98	tim	279	}
99
100			if (ichToken < cchScript) {
101			if (endOfLine)
102			++lineCurrent;
103			continue;
104			}
105			break;
106			}
107
108	tim	281	if (tokCommand != Token::nada) {
109	tim	279	if (lookingAtString()) {
110			std::string str = getUnescapedStringLiteral();
111	tim	281	ltoken.push_back(Token(Token::string, str));
112	tim	279	continue;
113			}
114	tim	281	//if ((tokCommand & Token::specialstring) != 0 &&
115			// lookingAtSpecialString()) {
116			// std::string str = script.substr(ichToken, ichToken + cchToken);
117			// ltoken.push_back(Token(Token::string, str));
118			// continue;
119			//}
120			if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121			float value = lexi_cast<float>(script.substr(ichToken, ichToken + cchToken));
122			ltoken.push_back(Token(Token::decimal, value));/*@todo/
123	tim	279	continue;
124			}
125	tim	281	if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
126	tim	279	std::string intString = script.substr(ichToken, ichToken + cchToken);
127			int val = lexi_cast<int>(intString);
128	tim	281	ltoken.push_back(Token(Token::integer, val, intString));/*@todo/
129	tim	279	continue;
130			}
131			}
132
133			if (lookingAtLookupToken()) {
134	tim	281	std::string ident = script.substr(ichToken, ichToken + cchToken);
135	tim	279
136	tim	281	Token token;
137			Token* pToken = TokenMap::getInstance()->getToken(ident);
138			if (pToken != NULL) {
139			token = *pToken;
140			} else {
141			token = Token(Token::identifier, ident);
142	tim	279	}
143
144			int tok = token.tok;
145
146			switch (tokCommand) {
147	tim	281	case Token::nada:
148	tim	279	ichCurrentCommand = ichToken;
149			//tokenCommand = token;
150			tokCommand = tok;
151	tim	281	if ((tokCommand & Token::command) == 0)
152	tim	279	return commandExpected();
153			break;
154
155	tim	281	case Token::define:
156	tim	279	if (ltoken.size() == 1) {
157			// we are looking at the variable name
158	tim	281	if (tok != Token::identifier &&
159			(tok & Token::predefinedset) != Token::predefinedset)
160	tim	279	return invalidExpressionToken(ident);
161			} else {
162			// we are looking at the expression
163	tim	281	if (tok != Token::identifier &&
164			(tok & (Token::expression \| Token::predefinedset)) == 0)
165	tim	279	return invalidExpressionToken(ident);
166			}
167
168			break;
169
170	tim	281	case Token::select:
171			if (tok != Token::identifier && (tok & Token::expression) == 0)
172	tim	279	return invalidExpressionToken(ident);
173			break;
174			}
175			ltoken.push_back(token);
176			continue;
177			}
178
179			if (ltoken.size() == 0) {
180			return commandExpected();
181			}
182
183			return unrecognizedToken();
184			}
185
186			return true;
187			}
188
189
190			bool SelectionCompiler::lookingAtLeadingWhitespace() {
191
192			int ichT = ichToken;
193			while (ichT < cchScript && std::isspace(script[ichT])) {
194			++ichT;
195			}
196			cchToken = ichT - ichToken;
197			return cchToken > 0;
198			}
199
200			bool SelectionCompiler::lookingAtEndOfLine() {
201			if (ichToken == cchScript)
202			return true;
203			int ichT = ichToken;
204			char ch = script[ichT];
205			if (ch == '\r') {
206			++ichT;
207			if (ichT < cchScript && script[ichT] == '\n')
208			++ichT;
209			} else if (ch == '\n') {
210			++ichT;
211			} else {
212			return false;
213			}
214			cchToken = ichT - ichToken;
215			return true;
216			}
217
218			bool SelectionCompiler::lookingAtEndOfStatement() {
219			if (ichToken == cchScript \|\| script[ichToken] != ';')
220			return false;
221			cchToken = 1;
222			return true;
223			}
224
225			bool SelectionCompiler::lookingAtString() {
226			if (ichToken == cchScript)
227			return false;
228			if (script[ichToken] != '"')
229			return false;
230			// remove support for single quote
231			// in order to use it in atom expressions
232			// char chFirst = script.charAt(ichToken);
233			// if (chFirst != '"' && chFirst != '\'')
234			// return false;
235			int ichT = ichToken + 1;
236			// while (ichT < cchScript && script.charAt(ichT++) != chFirst)
237			char ch;
238	tim	281	bool previousCharBackslash = false;
239	tim	279	while (ichT < cchScript) {
240	tim	281	ch = script[ichT++];
241	tim	279	if (ch == '"' && !previousCharBackslash)
242			break;
243			previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
244			}
245			cchToken = ichT - ichToken;
246			return true;
247			}
248
249
250			std::string SelectionCompiler::getUnescapedStringLiteral() {
251	tim	281	/** @todo */
252			std::string sb(cchToken - 2, ' ');
253
254	tim	279	int ichMax = ichToken + cchToken - 1;
255			int ich = ichToken + 1;
256
257			while (ich < ichMax) {
258			char ch = script[ich++];
259			if (ch == '\\' && ich < ichMax) {
260			ch = script[ich++];
261			switch (ch) {
262			case 'b':
263			ch = '\b';
264			break;
265			case 'n':
266			ch = '\n';
267			break;
268			case 't':
269			ch = '\t';
270			break;
271			case 'r':
272			ch = '\r';
273			// fall into
274			case '"':
275			case '\\':
276			case '\'':
277			break;
278			case 'x':
279			case 'u':
280			int digitCount = ch == 'x' ? 2 : 4;
281			if (ich < ichMax) {
282			int unicode = 0;
283			for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
284			char chT = script[ich];
285			int hexit = getHexitValue(chT);
286			if (hexit < 0)
287			break;
288			unicode <<= 4;
289			unicode += hexit;
290			++ich;
291			}
292			ch = (char)unicode;
293			}
294			}
295			}
296	tim	281	sb.append(1, ch);
297	tim	279	}
298
299	tim	281	return sb;
300	tim	279	}
301
302	tim	281	int SelectionCompiler::getHexitValue(char ch) {
303	tim	279	if (ch >= '0' && ch <= '9')
304			return ch - '0';
305			else if (ch >= 'a' && ch <= 'f')
306			return 10 + ch - 'a';
307			else if (ch >= 'A' && ch <= 'F')
308			return 10 + ch - 'A';
309			else
310			return -1;
311			}
312
313			bool SelectionCompiler::lookingAtSpecialString() {
314			int ichT = ichToken;
315			char ch = script[ichT];
316			while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
317			++ichT;
318			}
319			cchToken = ichT - ichToken;
320			return cchToken > 0;
321			}
322
323	tim	281	bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
324	tim	279	if (ichToken == cchScript) {
325			return false;
326			}
327
328			int ichT = ichToken;
329			if (script[ichT] == '-') {
330			++ichT;
331			}
332	tim	281	bool digitSeen = false;
333	tim	279	char ch = 'X';
334			while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
335			++ichT;
336			digitSeen = true;
337			}
338
339			if (ichT == cchScript \|\| ch != '.') {
340			return false;
341			}
342
343			// to support 1.ca, let's check the character after the dot
344			// to determine if it is an alpha
345			if (ch == '.' && (ichT + 1 < cchScript) && std::isalpha(script[ichT + 1])) {
346			return false;
347			}
348
349			++ichT;
350			while (ichT < cchScript && std::isdigit(script[ichT])) {
351			++ichT;
352			digitSeen = true;
353			}
354			cchToken = ichT - ichToken;
355			return digitSeen;
356			}
357
358	tim	281	bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
359	tim	279	if (ichToken == cchScript) {
360			return false;
361			}
362			int ichT = ichToken;
363			if (allowNegative && script[ichToken] == '-') {
364			++ichT;
365			}
366			int ichBeginDigits = ichT;
367			while (ichT < cchScript && std::isdigit(script[ichT])) {
368			++ichT;
369			}
370			if (ichBeginDigits == ichT) {
371			return false;
372			}
373			cchToken = ichT - ichToken;
374			return true;
375			}
376
377			bool SelectionCompiler::lookingAtLookupToken() {
378			if (ichToken == cchScript) {
379			return false;
380			}
381
382			int ichT = ichToken;
383			char ch;
384			switch (ch = script[ichT++]) {
385			case '(':
386			case ')':
387			case ',':
388			case '*':
389			case '-':
390			case '[':
391			case ']':
392			case '+':
393			case ':':
394			case '@':
395			case '.':
396			case '%':
397			break;
398			case '&':
399			case '\|':
400			if (ichT < cchScript && script[ichT] == ch) {
401			++ichT;
402			}
403			break;
404			case '<':
405			case '=':
406			case '>':
407			if (ichT < cchScript && ((ch = script[ichT]) == '<' \|\| ch == '=' \|\| ch == '>')) {
408			++ichT;
409			}
410			break;
411			case '/':
412			case '!':
413			if (ichT < cchScript && script[ichT] == '=') {
414			++ichT;
415			}
416			break;
417			default:
418			if ((ch < 'a' \|\| ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
419			return false;
420			}
421			case '?': // include question marks in identifier for atom expressions
422			while (ichT < cchScript && (std::isalpha(ch = script[ichT]) \|\|std::isdigit(ch) \|\|
423			ch == '_' \|\| ch == '?') \|\|(ch == '^' && ichT > ichToken && std::isdigit(script[ichT - 1]))) {
424			// hack for insertion codes embedded in an atom expression :-(
425			// select c3^a
426			++ichT;
427			}
428			break;
429			}
430			cchToken = ichT - ichToken;
431			return true;
432			}
433
434	tim	281	bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
435			const Token& tokenCommand = ltoken[0];
436	tim	279	int tokCommand = tokenCommand.tok;
437	tim	281
438			atokenCommand = ltoken;
439			if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
440	tim	279	return false;
441			}
442	tim	281
443	tim	279	return true;
444			}
445
446			bool SelectionCompiler::compileExpression() {
447			/** todo */
448			int i = 1;
449			int tokCommand = atokenCommand[0].tok;
450	tim	281	if (tokCommand == Token::define) {
451			i = 2;
452			} else if ((tokCommand & Token::embeddedExpression) != 0) {
453			// look for the open parenthesis
454			while (i < atokenCommand.size() &&
455			atokenCommand[i].tok != Token::leftparen)
456	tim	279	++i;
457			}
458	tim	281
459			if (i >= atokenCommand.size()) {
460			return true;
461			}
462	tim	279	return compileExpression(i);
463			}
464
465
466	tim	281	bool SelectionCompiler::addTokenToPostfix(const Token& token) {
467	tim	279	ltokenPostfix.push_back(token);
468			return true;
469			}
470
471			bool SelectionCompiler::compileExpression(int itoken) {
472	tim	281	ltokenPostfix.clear();
473			for (int i = 0; i < itoken; ++i) {
474	tim	279	addTokenToPostfix(atokenCommand[i]);
475	tim	281	}
476
477	tim	279	atokenInfix = atokenCommand;
478			itokenInfix = itoken;
479
480	tim	281	addTokenToPostfix(Token::tokenExpressionBegin);
481	tim	279	if (!clauseOr()) {
482			return false;
483			}
484
485	tim	281	addTokenToPostfix(Token::tokenExpressionEnd);
486			if (itokenInfix != atokenInfix.size()) {
487	tim	279	return endOfExpressionExpected();
488			}
489
490			atokenCommand = ltokenPostfix;
491			return true;
492			}
493
494			Token SelectionCompiler::tokenNext() {
495	tim	281	if (itokenInfix == atokenInfix.size()) {
496			return Token();
497			}
498			return atokenInfix[itokenInfix++];
499	tim	279	}
500
501	tim	281	boost::any SelectionCompiler::valuePeek() {
502			if (itokenInfix == atokenInfix.size()) {
503			return boost::any();
504	tim	279	} else {
505			return atokenInfix[itokenInfix].value;
506			}
507			}
508
509			int SelectionCompiler::tokPeek() {
510	tim	281	if (itokenInfix == atokenInfix.size()) {
511	tim	279	return 0;
512			}else {
513			return atokenInfix[itokenInfix].tok;
514			}
515			}
516
517			bool SelectionCompiler::clauseOr() {
518			if (!clauseAnd()) {
519			return false;
520			}
521
522	tim	281	while (tokPeek() == Token::opOr) {
523	tim	279	Token tokenOr = tokenNext();
524			if (!clauseAnd()) {
525			return false;
526			}
527			addTokenToPostfix(tokenOr);
528			}
529			return true;
530			}
531
532			bool SelectionCompiler::clauseAnd() {
533			if (!clauseNot()) {
534			return false;
535			}
536
537	tim	281	while (tokPeek() == Token::opAnd) {
538	tim	279	Token tokenAnd = tokenNext();
539			if (!clauseNot()) {
540			return false;
541			}
542			addTokenToPostfix(tokenAnd);
543			}
544			return true;
545			}
546
547			bool SelectionCompiler::clauseNot() {
548	tim	281	if (tokPeek() == Token::opNot) {
549	tim	279	Token tokenNot = tokenNext();
550			if (!clauseNot()) {
551			return false;
552			}
553			return addTokenToPostfix(tokenNot);
554			}
555			return clausePrimitive();
556			}
557
558			bool SelectionCompiler::clausePrimitive() {
559			int tok = tokPeek();
560			switch (tok) {
561	tim	281	case Token::within:
562	tim	279	return clauseWithin();
563	tim	283
564			case Token::asterisk:
565			case Token::identifier:
566			return clauseChemObjName();
567
568	tim	279	default:
569	tim	281	if ((tok & Token::atomproperty) == Token::atomproperty) {
570	tim	279	return clauseComparator();
571			}
572	tim	281	if ((tok & Token::predefinedset) != Token::predefinedset) {
573	tim	279	break;
574			}
575			// fall into the code and below and just add the token
576	tim	281	case Token::all:
577			case Token::none:
578	tim	279	return addTokenToPostfix(tokenNext());
579	tim	281	case Token::leftparen:
580	tim	279	tokenNext();
581			if (!clauseOr()) {
582			return false;
583			}
584	tim	281	if (tokenNext().tok != Token::rightparen) {
585	tim	279	return rightParenthesisExpected();
586			}
587			return true;
588			}
589			return unrecognizedExpressionToken();
590			}
591
592			bool SelectionCompiler::clauseComparator() {
593			Token tokenAtomProperty = tokenNext();
594			Token tokenComparator = tokenNext();
595	tim	281	if ((tokenComparator.tok & Token::comparator) == 0) {
596	tim	279	return comparisonOperatorExpected();
597			}
598
599			Token tokenValue = tokenNext();
600	tim	281	if (tokenValue.tok != Token::integer) {
601	tim	279	return integerExpected();
602			}
603			int val = tokenValue.intValue;
604			// note that a comparator instruction is a complicated instruction
605			// int intValue is the tok of the property you are comparing
606			// the value against which you are comparing is stored as an Integer
607			// in the object value
608	tim	281	return addTokenToPostfix(Token(tokenComparator.tok,
609			tokenAtomProperty.tok, boost::any(val)));
610	tim	279	}
611
612			bool SelectionCompiler::clauseWithin() {
613			tokenNext(); // WITHIN
614	tim	281	if (tokenNext().tok != Token::leftparen) { // (
615	tim	279	return leftParenthesisExpected();
616			}
617
618	tim	281	boost::any distance;
619	tim	279	Token tokenDistance = tokenNext(); // distance
620			switch(tokenDistance.tok) {
621	tim	281	case Token::integer:
622			distance = float(tokenDistance.intValue);
623	tim	279	break;
624	tim	281	case Token::decimal:
625	tim	279	distance = tokenDistance.value;
626			break;
627			default:
628			return numberOrKeywordExpected();
629			}
630
631	tim	281	if (tokenNext().tok != Token::opOr) { // ,
632	tim	279	return commaExpected();
633			}
634
635			if (! clauseOr()) { // expression
636			return false;
637			}
638
639	tim	281	if (tokenNext().tok != Token::rightparen) { // )T
640	tim	279	return rightParenthesisExpected();
641			}
642
643	tim	281	return addTokenToPostfix(Token(Token::within, distance));
644	tim	279	}
645
646	tim	283	bool SelectionCompiler::clauseChemObjName() {
647			std::string chemObjName;
648			int tok = tokPeek();
649			if (!clauseName(chemObjName)){
650			return false;
651			}
652	tim	279
653
654	tim	283	tok = tokPeek();
655			//allow two dot at most
656			if (tok == Token::dot) {
657			if (!clauseName(chemObjName)) {
658			return false;
659			}
660			tok = tokPeek();
661			if (tok == Token::dot) {
662			if (!clauseName(chemObjName)) {
663			return false;
664			}
665			}
666			}
667
668			return addTokenToPostfix(Token(Token::name, chemObjName));
669	tim	279	}
670
671	tim	283	bool SelectionCompiler:: clauseName(std::string& name) {
672
673			int tok = tokPeek();
674
675			if (tok == Token::asterisk \|\| tok == Token::identifier) {
676			name += boost::any_cast<std::string>(tokenNext().value);
677
678			while(true){
679			tok = tokPeek();
680			switch (tok) {
681			case Token::asterisk :
682			name += "*";
683			tokenNext();
684			break;
685			case Token::identifier :
686			name += boost::any_cast<std::string>(tokenNext().value);
687			break;
688			case Token::integer :
689			name += toString(boost::any_cast<int>(tokenNext().value));
690			break;
691			case Token::dot :
692			return true;
693			default :
694			return true;
695			}
696			}
697
698			}else {
699			return false;
700			}
701
702	tim	279	}
703
704	tim	283
705	tim	279	}