src/selection/SelectionCompiler.cpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 *
 * SUPPORT OPEN SCIENCE!  If you use OpenMD or its source code in your
 * research, please cite the appropriate papers when you publish your
 * work.  Good starting points are:
 *                                                                      
 * [1]  Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).             
 * [2]  Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).          
 * [3]  Sun, Lin & Gezelter, J. Chem. Phys. 128, 24107 (2008).          
 * [4]  Kuang & Gezelter,  J. Chem. Phys. 133, 164101 (2010).
 * [5]  Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
 */

#include "selection/SelectionCompiler.hpp"
#include "utils/StringUtils.hpp"
namespace OpenMD {

  bool SelectionCompiler::compile(const std::string& filename, 
                                  const std::string& script) {

    this->filename = filename;
    this->script = script;
    lineNumbers.clear();
    lineIndices.clear();
    aatokenCompiled.clear();
        
    if (internalCompile()) {
      return true;
    }
    
    int icharEnd;
    if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
        (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
      icharEnd = script.size();
    }
    errorLine = script.substr(ichCurrentCommand, icharEnd);
    return false;
  }

  bool SelectionCompiler::internalCompile(){

    cchScript = script.size();
    ichToken = 0;
    lineCurrent = 1;

    error = false;

    //std::vector<Token> lltoken;
    aatokenCompiled.clear();
    std::vector<Token> ltoken;

    Token tokenCommand;
    int tokCommand = Token::nada;

    for ( ; true; ichToken += cchToken) {
      if (lookingAtLeadingWhitespace())
        continue;
      //if (lookingAtComment())
      //    continue;
      bool endOfLine = lookingAtEndOfLine();
      if (endOfLine || lookingAtEndOfStatement()) {
        if (tokCommand != Token::nada) {
          if (! compileCommand(ltoken)) {
            return false;
          }
          aatokenCompiled.push_back(atokenCommand);
          lineNumbers.push_back(lineCurrent);
          lineIndices.push_back(ichCurrentCommand);
          ltoken.clear();
          tokCommand = Token::nada;
        }
            
        if (ichToken < cchScript) {
          if (endOfLine)
            ++lineCurrent;
          continue;
        }
        break;
      }

      if (tokCommand != Token::nada) {
        if (lookingAtString()) {
          std::string str = getUnescapedStringLiteral();
          ltoken.push_back(Token(Token::string, str));
          continue;
        }
        //if ((tokCommand & Token::specialstring) != 0 &&
        //    lookingAtSpecialString()) {
        //    std::string str = script.substr(ichToken, ichToken + cchToken);
        //    ltoken.push_back(Token(Token::string, str));
        //    continue;
        //}
        if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
          float value = lexi_cast<float>(script.substr(ichToken, cchToken));        
          ltoken.push_back(Token(Token::decimal, boost::any(value)));
          continue;
        }
        if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {

          int val = lexi_cast<int>(script.substr(ichToken, cchToken));
          ltoken.push_back(Token(Token::integer,   boost::any(val)));
          continue;
        }
      }
      
      if (lookingAtLookupToken()) {
        std::string ident = script.substr(ichToken, cchToken);
        Token token;            
        Token* pToken = TokenMap::getInstance()->getToken(ident);
        if (pToken != NULL) {
          token = *pToken;
        } else {
          token = Token(Token::identifier, ident);
        }
            
        int tok = token.tok;
            
        switch (tokCommand) {
        case Token::nada:
          ichCurrentCommand = ichToken;
          //tokenCommand = token;
          tokCommand = tok;
          if ((tokCommand & Token::command) == 0)
            return commandExpected();
          break;

        case Token::define:
          if (ltoken.size() == 1) {
            // we are looking at the variable name
            if (tok != Token::identifier &&
                (tok & Token::predefinedset) != Token::predefinedset)
              return invalidExpressionToken(ident);
          } else {
            // we are looking at the expression
            if (tok != Token::identifier && 
                (tok & (Token::expression | Token::predefinedset)) == 0)
              return invalidExpressionToken(ident);
          }
                    
          break;

        case Token::select:
          if (tok != Token::identifier && (tok & Token::expression) == 0)
            return invalidExpressionToken(ident);
          break;
        }
        ltoken.push_back(token);
        continue;
      }

      if (ltoken.size() == 0) {
        return commandExpected();
      }
        
      return unrecognizedToken();
    }

    return true;
  }


  bool SelectionCompiler::lookingAtLeadingWhitespace() {

    int ichT = ichToken;
    while (ichT < cchScript && std::isspace(script[ichT])) {
      ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
  }

  bool SelectionCompiler::lookingAtEndOfLine() {
    if (ichToken == cchScript)
      return true;
    int ichT = ichToken;
    char ch = script[ichT];
    if (ch == '\r') {
      ++ichT;
      if (ichT < cchScript && script[ichT] == '\n')
        ++ichT;
    } else if (ch == '\n') {
      ++ichT;
    } else {
      return false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  bool SelectionCompiler::lookingAtEndOfStatement() {
    if (ichToken == cchScript || script[ichToken] != ';')
      return false;
    cchToken = 1;
    return true;
  }

  bool SelectionCompiler::lookingAtString() {
    if (ichToken == cchScript)
      return false;
    if (script[ichToken] != '"')
      return false;
    // remove support for single quote
    // in order to use it in atom expressions
    //    char chFirst = script.charAt(ichToken);
    //    if (chFirst != '"' && chFirst != '\'')
    //      return false;
    int ichT = ichToken + 1;
    //    while (ichT < cchScript && script.charAt(ichT++) != chFirst)
    char ch;
    bool previousCharBackslash = false;
    while (ichT < cchScript) {
      ch = script[ichT++];
      if (ch == '"' && !previousCharBackslash)
        break;
      previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
    }
    cchToken = ichT - ichToken;

    return true;
  }

  
  std::string SelectionCompiler::getUnescapedStringLiteral() {
    /** @todo */
    std::string sb(cchToken - 2, ' ');
    
    int ichMax = ichToken + cchToken - 1;
    int ich = ichToken + 1;

    while (ich < ichMax) {
      char ch = script[ich++];
      if (ch == '\\' && ich < ichMax) {
        ch = script[ich++];
        switch (ch) {
        case 'b':
          ch = '\b';
          break;
        case 'n':
          ch = '\n';
          break;
        case 't':
          ch = '\t';
          break;
        case 'r':
          ch = '\r';
          // fall into
        case '"':
        case '\\':
        case '\'':
          break;
        case 'x':
        case 'u':
          int digitCount = ch == 'x' ? 2 : 4;
          if (ich < ichMax) {
            int unicode = 0;
            for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
              char chT = script[ich];
              int hexit = getHexitValue(chT);
              if (hexit < 0)
                break;
              unicode <<= 4;
              unicode += hexit;
              ++ich;
            }
            ch = (char)unicode;
          }
        }
      }
      sb.append(1, ch);
    }

    return sb;
  }

  int SelectionCompiler::getHexitValue(char ch) {
    if (ch >= '0' && ch <= '9')
      return ch - '0';
    else if (ch >= 'a' && ch <= 'f')
      return 10 + ch - 'a';
    else if (ch >= 'A' && ch <= 'F')
      return 10 + ch - 'A';
    else
      return -1;
  }

  bool SelectionCompiler::lookingAtSpecialString() {
    int ichT = ichToken;
    char ch = script[ichT];
    while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
      ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
  }

  bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
    if (ichToken == cchScript) {
      return false;
    }
    
    int ichT = ichToken;
    if (script[ichT] == '-') {
      ++ichT;
    }
    bool digitSeen = false;
    char ch = 'X';
    while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
      ++ichT;
      digitSeen = true;
    }

    if (ichT == cchScript || ch != '.') {
      return false;
    }

    // to support DMPC.1, let's check the character before the dot
    if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
      return false;
    }

    ++ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
      ++ichT;
      digitSeen = true;
    }
    cchToken = ichT - ichToken;
    return digitSeen;
  }

  bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
    if (ichToken == cchScript) {
      return false;
    }
    int ichT = ichToken;
    if (allowNegative && script[ichToken] == '-') {
      ++ichT;
    }
    int ichBeginDigits = ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
      ++ichT;
    }
    if (ichBeginDigits == ichT) {
      return false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  bool SelectionCompiler::lookingAtLookupToken() {
    if (ichToken == cchScript) {
      return false;
    }

    int ichT = ichToken;
    char ch;
    switch (ch = script[ichT++]) {
    case '(':
    case ')':
    case ',':
    case '[':
    case ']':
      break;
    case '&':
    case '|':
      if (ichT < cchScript && script[ichT] == ch) {
        ++ichT;
      }
      break;
    case '<':
    case '=':
    case '>':
      if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
        ++ichT;
      }
      break;
    case '/':
    case '!':
      if (ichT < cchScript && script[ichT] == '=') {
        ++ichT;
      }
      break;
    default:
      if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
        return false;
      }
    case '*':
    case '?': // include question marks in identifier for atom expressions
      while (ichT < cchScript && !std::isspace(ch = script[ichT]) && 
             (std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){

        ++ichT;
      }
      break;
    }

    cchToken = ichT - ichToken;

    return true;
  }

  bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
    const Token& tokenCommand = ltoken[0];
    int tokCommand = tokenCommand.tok;

    atokenCommand = ltoken;
    if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
      return false;
    }
    
    return true;
  }

  bool SelectionCompiler::compileExpression() {
    /** todo */
    unsigned int i = 1;
    int tokCommand = atokenCommand[0].tok;
    if (tokCommand == Token::define) {
      i = 2;
    } else if ((tokCommand & Token::embeddedExpression) != 0) {
      // look for the open parenthesis
      while (i < atokenCommand.size() &&
             atokenCommand[i].tok != Token::leftparen)
        ++i;
    }

    if (i >= atokenCommand.size()) {
      return true;
    }
    return compileExpression(i);
  }

                  
  bool SelectionCompiler::addTokenToPostfix(const Token& token) {
    ltokenPostfix.push_back(token);
    return true;
  }

  bool SelectionCompiler::compileExpression(int itoken) {
    ltokenPostfix.clear();
    for (int i = 0; i < itoken; ++i) {
      addTokenToPostfix(atokenCommand[i]);
    }
    
    atokenInfix = atokenCommand;
    itokenInfix = itoken;

    addTokenToPostfix(Token::tokenExpressionBegin);
    if (!clauseOr()) {
      return false;
    }
    
    addTokenToPostfix(Token::tokenExpressionEnd);
    if (itokenInfix != atokenInfix.size()) {
      return endOfExpressionExpected();
    }

    atokenCommand = ltokenPostfix;
    return true;
  }

  Token SelectionCompiler::tokenNext() {
    if (itokenInfix == atokenInfix.size()) {
      return Token();
    }
    return atokenInfix[itokenInfix++];
  }

  boost::any SelectionCompiler::valuePeek() {
    if (itokenInfix == atokenInfix.size()) {
      return boost::any();
    } else {
      return atokenInfix[itokenInfix].value;
    }
  }

  int SelectionCompiler::tokPeek() {
    if (itokenInfix == atokenInfix.size()) {
      return 0;
    }else {
      return atokenInfix[itokenInfix].tok;
    }
  }

  bool SelectionCompiler::clauseOr() {
    if (!clauseAnd()) {
      return false;
    }
    
    while (tokPeek() == Token::opOr) {
      Token tokenOr = tokenNext();
      if (!clauseAnd()) {
        return false;
      }
      addTokenToPostfix(tokenOr);
    }
    return true;
  }

  bool SelectionCompiler::clauseAnd() {
    if (!clauseNot()) {
      return false;
    }

    while (tokPeek() == Token::opAnd) {
      Token tokenAnd = tokenNext();
      if (!clauseNot()) {
        return false;
      }
      addTokenToPostfix(tokenAnd);
    }
    return true;
  }

  bool SelectionCompiler::clauseNot() {
    if (tokPeek() == Token::opNot) {
      Token tokenNot = tokenNext();
      if (!clauseNot()) {
        return false;
      }
      return addTokenToPostfix(tokenNot);
    }
    return clausePrimitive();
  }

  bool SelectionCompiler::clausePrimitive() {
    int tok = tokPeek();
    switch (tok) {
    case Token::within:
      return clauseWithin();

    case Token::asterisk:
    case Token::identifier:
      return clauseChemObjName();

    case Token::integer :
      return clauseIndex();
    default:
      if ((tok & Token::atomproperty) == Token::atomproperty) {
        return clauseComparator();
      }
      if ((tok & Token::predefinedset) != Token::predefinedset) {
        break;
      }
      // fall into the code and below and just add the token
    case Token::all:
    case Token::none:
    case Token::hull:
      return addTokenToPostfix(tokenNext());
    case Token::leftparen:
      tokenNext();
      if (!clauseOr()) {
        return false;
      }
      if (tokenNext().tok != Token::rightparen) {
        return rightParenthesisExpected();
      }
      return true;
    }
    return unrecognizedExpressionToken();
  }

  bool SelectionCompiler::clauseComparator() {
    Token tokenAtomProperty = tokenNext();
    Token tokenComparator = tokenNext();
    if ((tokenComparator.tok & Token::comparator) == 0) {
      return comparisonOperatorExpected();
    }

    Token tokenValue = tokenNext();
    if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
      return numberExpected();
    }
    
    float val;
    if (tokenValue.value.type() == typeid(int)) {
      val = boost::any_cast<int>(tokenValue.value);
    } else if (tokenValue.value.type() == typeid(float)) {
      val = boost::any_cast<float>(tokenValue.value);
    } else {
      return false;
    }

    boost::any floatVal;
    floatVal = val;
    return addTokenToPostfix(Token(tokenComparator.tok,
                                   tokenAtomProperty.tok, floatVal));
  }

  bool SelectionCompiler::clauseWithin() {
    tokenNext();                             // WITHIN
    if (tokenNext().tok != Token::leftparen) {  // (
      return leftParenthesisExpected();
    }
    
    boost::any distance;
    Token tokenDistance = tokenNext();       // distance
    switch(tokenDistance.tok) {
    case Token::integer:
    case Token::decimal:
      distance = tokenDistance.value;
      break;
    default:
      return numberOrKeywordExpected();
    }

    if (tokenNext().tok != Token::opOr) {       // ,
      return commaExpected();
    }
    
    if (! clauseOr()) {                        // *expression*
      return false;
    }
    
    if (tokenNext().tok != Token::rightparen) { // )T
      return rightParenthesisExpected();
    }
    
    return addTokenToPostfix(Token(Token::within, distance));
  }

  bool SelectionCompiler::clauseChemObjName() {
    Token token = tokenNext();
    if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {

      std::string name = boost::any_cast<std::string>(token.value);
      if (isNameValid(name)) {
        return addTokenToPostfix(Token(Token::name, name));
      } else {
        return compileError("invalid name: " + name);
      }
    } 

    return false;
        
  }

  bool SelectionCompiler::isNameValid(const std::string& name) {
    int nbracket = 0;
    int ndot = 0;
    for (unsigned int i = 0 ; i < name.size(); ++i) {
      switch(name[i]) {

      case '[' :
        ++nbracket;
        break;
      case ']' :
        --nbracket;
        break;
      case '.' :
        ++ndot;
        break;       
      }
    }

    //only allow 3 dots at most
    return (ndot <=3 && nbracket == 0) ? true : false;
  }

  bool SelectionCompiler::clauseIndex(){
    Token token = tokenNext();
    if (token.tok == Token::integer) {
      int index = boost::any_cast<int>(token.value);
      int tok = tokPeek();
      std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
      if (tok == Token::to) {
        tokenNext();
        tok = tokPeek();
        if (tok != Token::integer) {
          return numberExpected();
        }
            
        boost::any intVal = tokenNext().value;
        int first = index;
        if (intVal.type() != typeid(int)){
          return false;
        }
        int second = boost::any_cast<int>(intVal);

        return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
            
      }else {
        return addTokenToPostfix(Token(Token::index, boost::any(index)));
      }
    } else {
      return numberExpected();
    }
  }

}
Revision:	1782
Committed:	Wed Aug 22 02:28:28 2012 UTC (13 years, 2 months ago) by gezelter
File size:	18456 byte(s)
Log Message:	MERGE OpenMD development branch 1465:1781 into trunk
#	Content
1	/*
2	* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3	*
4	* The University of Notre Dame grants you ("Licensee") a
5	* non-exclusive, royalty free, license to use, modify and
6	* redistribute this software in source and binary code form, provided
7	* that the following conditions are met:
8	*
9	* 1. Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	*
12	* 2. Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the
15	* distribution.
16	*
17	* This software is provided "AS IS," without a warranty of any
18	* kind. All express or implied conditions, representations and
19	* warranties, including any implied warranty of merchantability,
20	* fitness for a particular purpose or non-infringement, are hereby
21	* excluded. The University of Notre Dame and its licensors shall not
22	* be liable for any damages suffered by licensee as a result of
23	* using, modifying or distributing the software or its
24	* derivatives. In no event will the University of Notre Dame or its
25	* licensors be liable for any lost revenue, profit or data, or for
26	* direct, indirect, special, consequential, incidental or punitive
27	* damages, however caused and regardless of the theory of liability,
28	* arising out of the use of or inability to use software, even if the
29	* University of Notre Dame has been advised of the possibility of
30	* such damages.
31	*
32	* SUPPORT OPEN SCIENCE! If you use OpenMD or its source code in your
33	* research, please cite the appropriate papers when you publish your
34	* work. Good starting points are:
35	*
36	* [1] Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).
37	* [2] Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).
38	* [3] Sun, Lin & Gezelter, J. Chem. Phys. 128, 24107 (2008).
39	* [4] Kuang & Gezelter, J. Chem. Phys. 133, 164101 (2010).
40	* [5] Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
41	*/
42
43	#include "selection/SelectionCompiler.hpp"
44	#include "utils/StringUtils.hpp"
45	namespace OpenMD {
46
47	bool SelectionCompiler::compile(const std::string& filename,
48	const std::string& script) {
49
50	this->filename = filename;
51	this->script = script;
52	lineNumbers.clear();
53	lineIndices.clear();
54	aatokenCompiled.clear();
55
56	if (internalCompile()) {
57	return true;
58	}
59
60	int icharEnd;
61	if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
62	(icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
63	icharEnd = script.size();
64	}
65	errorLine = script.substr(ichCurrentCommand, icharEnd);
66	return false;
67	}
68
69	bool SelectionCompiler::internalCompile(){
70
71	cchScript = script.size();
72	ichToken = 0;
73	lineCurrent = 1;
74
75	error = false;
76
77	//std::vector<Token> lltoken;
78	aatokenCompiled.clear();
79	std::vector<Token> ltoken;
80
81	Token tokenCommand;
82	int tokCommand = Token::nada;
83
84	for ( ; true; ichToken += cchToken) {
85	if (lookingAtLeadingWhitespace())
86	continue;
87	//if (lookingAtComment())
88	// continue;
89	bool endOfLine = lookingAtEndOfLine();
90	if (endOfLine \|\| lookingAtEndOfStatement()) {
91	if (tokCommand != Token::nada) {
92	if (! compileCommand(ltoken)) {
93	return false;
94	}
95	aatokenCompiled.push_back(atokenCommand);
96	lineNumbers.push_back(lineCurrent);
97	lineIndices.push_back(ichCurrentCommand);
98	ltoken.clear();
99	tokCommand = Token::nada;
100	}
101
102	if (ichToken < cchScript) {
103	if (endOfLine)
104	++lineCurrent;
105	continue;
106	}
107	break;
108	}
109
110	if (tokCommand != Token::nada) {
111	if (lookingAtString()) {
112	std::string str = getUnescapedStringLiteral();
113	ltoken.push_back(Token(Token::string, str));
114	continue;
115	}
116	//if ((tokCommand & Token::specialstring) != 0 &&
117	// lookingAtSpecialString()) {
118	// std::string str = script.substr(ichToken, ichToken + cchToken);
119	// ltoken.push_back(Token(Token::string, str));
120	// continue;
121	//}
122	if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
123	float value = lexi_cast<float>(script.substr(ichToken, cchToken));
124	ltoken.push_back(Token(Token::decimal, boost::any(value)));
125	continue;
126	}
127	if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
128
129	int val = lexi_cast<int>(script.substr(ichToken, cchToken));
130	ltoken.push_back(Token(Token::integer, boost::any(val)));
131	continue;
132	}
133	}
134
135	if (lookingAtLookupToken()) {
136	std::string ident = script.substr(ichToken, cchToken);
137	Token token;
138	Token* pToken = TokenMap::getInstance()->getToken(ident);
139	if (pToken != NULL) {
140	token = *pToken;
141	} else {
142	token = Token(Token::identifier, ident);
143	}
144
145	int tok = token.tok;
146
147	switch (tokCommand) {
148	case Token::nada:
149	ichCurrentCommand = ichToken;
150	//tokenCommand = token;
151	tokCommand = tok;
152	if ((tokCommand & Token::command) == 0)
153	return commandExpected();
154	break;
155
156	case Token::define:
157	if (ltoken.size() == 1) {
158	// we are looking at the variable name
159	if (tok != Token::identifier &&
160	(tok & Token::predefinedset) != Token::predefinedset)
161	return invalidExpressionToken(ident);
162	} else {
163	// we are looking at the expression
164	if (tok != Token::identifier &&
165	(tok & (Token::expression \| Token::predefinedset)) == 0)
166	return invalidExpressionToken(ident);
167	}
168
169	break;
170
171	case Token::select:
172	if (tok != Token::identifier && (tok & Token::expression) == 0)
173	return invalidExpressionToken(ident);
174	break;
175	}
176	ltoken.push_back(token);
177	continue;
178	}
179
180	if (ltoken.size() == 0) {
181	return commandExpected();
182	}
183
184	return unrecognizedToken();
185	}
186
187	return true;
188	}
189
190
191	bool SelectionCompiler::lookingAtLeadingWhitespace() {
192
193	int ichT = ichToken;
194	while (ichT < cchScript && std::isspace(script[ichT])) {
195	++ichT;
196	}
197	cchToken = ichT - ichToken;
198	return cchToken > 0;
199	}
200
201	bool SelectionCompiler::lookingAtEndOfLine() {
202	if (ichToken == cchScript)
203	return true;
204	int ichT = ichToken;
205	char ch = script[ichT];
206	if (ch == '\r') {
207	++ichT;
208	if (ichT < cchScript && script[ichT] == '\n')
209	++ichT;
210	} else if (ch == '\n') {
211	++ichT;
212	} else {
213	return false;
214	}
215	cchToken = ichT - ichToken;
216	return true;
217	}
218
219	bool SelectionCompiler::lookingAtEndOfStatement() {
220	if (ichToken == cchScript \|\| script[ichToken] != ';')
221	return false;
222	cchToken = 1;
223	return true;
224	}
225
226	bool SelectionCompiler::lookingAtString() {
227	if (ichToken == cchScript)
228	return false;
229	if (script[ichToken] != '"')
230	return false;
231	// remove support for single quote
232	// in order to use it in atom expressions
233	// char chFirst = script.charAt(ichToken);
234	// if (chFirst != '"' && chFirst != '\'')
235	// return false;
236	int ichT = ichToken + 1;
237	// while (ichT < cchScript && script.charAt(ichT++) != chFirst)
238	char ch;
239	bool previousCharBackslash = false;
240	while (ichT < cchScript) {
241	ch = script[ichT++];
242	if (ch == '"' && !previousCharBackslash)
243	break;
244	previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
245	}
246	cchToken = ichT - ichToken;
247
248	return true;
249	}
250
251
252	std::string SelectionCompiler::getUnescapedStringLiteral() {
253	/** @todo */
254	std::string sb(cchToken - 2, ' ');
255
256	int ichMax = ichToken + cchToken - 1;
257	int ich = ichToken + 1;
258
259	while (ich < ichMax) {
260	char ch = script[ich++];
261	if (ch == '\\' && ich < ichMax) {
262	ch = script[ich++];
263	switch (ch) {
264	case 'b':
265	ch = '\b';
266	break;
267	case 'n':
268	ch = '\n';
269	break;
270	case 't':
271	ch = '\t';
272	break;
273	case 'r':
274	ch = '\r';
275	// fall into
276	case '"':
277	case '\\':
278	case '\'':
279	break;
280	case 'x':
281	case 'u':
282	int digitCount = ch == 'x' ? 2 : 4;
283	if (ich < ichMax) {
284	int unicode = 0;
285	for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
286	char chT = script[ich];
287	int hexit = getHexitValue(chT);
288	if (hexit < 0)
289	break;
290	unicode <<= 4;
291	unicode += hexit;
292	++ich;
293	}
294	ch = (char)unicode;
295	}
296	}
297	}
298	sb.append(1, ch);
299	}
300
301	return sb;
302	}
303
304	int SelectionCompiler::getHexitValue(char ch) {
305	if (ch >= '0' && ch <= '9')
306	return ch - '0';
307	else if (ch >= 'a' && ch <= 'f')
308	return 10 + ch - 'a';
309	else if (ch >= 'A' && ch <= 'F')
310	return 10 + ch - 'A';
311	else
312	return -1;
313	}
314
315	bool SelectionCompiler::lookingAtSpecialString() {
316	int ichT = ichToken;
317	char ch = script[ichT];
318	while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
319	++ichT;
320	}
321	cchToken = ichT - ichToken;
322	return cchToken > 0;
323	}
324
325	bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
326	if (ichToken == cchScript) {
327	return false;
328	}
329
330	int ichT = ichToken;
331	if (script[ichT] == '-') {
332	++ichT;
333	}
334	bool digitSeen = false;
335	char ch = 'X';
336	while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
337	++ichT;
338	digitSeen = true;
339	}
340
341	if (ichT == cchScript \|\| ch != '.') {
342	return false;
343	}
344
345	// to support DMPC.1, let's check the character before the dot
346	if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
347	return false;
348	}
349
350	++ichT;
351	while (ichT < cchScript && std::isdigit(script[ichT])) {
352	++ichT;
353	digitSeen = true;
354	}
355	cchToken = ichT - ichToken;
356	return digitSeen;
357	}
358
359	bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
360	if (ichToken == cchScript) {
361	return false;
362	}
363	int ichT = ichToken;
364	if (allowNegative && script[ichToken] == '-') {
365	++ichT;
366	}
367	int ichBeginDigits = ichT;
368	while (ichT < cchScript && std::isdigit(script[ichT])) {
369	++ichT;
370	}
371	if (ichBeginDigits == ichT) {
372	return false;
373	}
374	cchToken = ichT - ichToken;
375	return true;
376	}
377
378	bool SelectionCompiler::lookingAtLookupToken() {
379	if (ichToken == cchScript) {
380	return false;
381	}
382
383	int ichT = ichToken;
384	char ch;
385	switch (ch = script[ichT++]) {
386	case '(':
387	case ')':
388	case ',':
389	case '[':
390	case ']':
391	break;
392	case '&':
393	case '\|':
394	if (ichT < cchScript && script[ichT] == ch) {
395	++ichT;
396	}
397	break;
398	case '<':
399	case '=':
400	case '>':
401	if (ichT < cchScript && ((ch = script[ichT]) == '<' \|\| ch == '=' \|\| ch == '>')) {
402	++ichT;
403	}
404	break;
405	case '/':
406	case '!':
407	if (ichT < cchScript && script[ichT] == '=') {
408	++ichT;
409	}
410	break;
411	default:
412	if ((ch < 'a' \|\| ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
413	return false;
414	}
415	case '*':
416	case '?': // include question marks in identifier for atom expressions
417	while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
418	(std::isalpha(ch) \|\|std::isdigit(ch) \|\| ch == '_' \|\| ch == '.' \|\| ch == '*' \|\| ch == '?' \|\| ch == '+' \|\| ch == '-' \|\| ch == '[' \|\| ch == ']') ){
419
420	++ichT;
421	}
422	break;
423	}
424
425	cchToken = ichT - ichToken;
426
427	return true;
428	}
429
430	bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
431	const Token& tokenCommand = ltoken[0];
432	int tokCommand = tokenCommand.tok;
433
434	atokenCommand = ltoken;
435	if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
436	return false;
437	}
438
439	return true;
440	}
441
442	bool SelectionCompiler::compileExpression() {
443	/** todo */
444	unsigned int i = 1;
445	int tokCommand = atokenCommand[0].tok;
446	if (tokCommand == Token::define) {
447	i = 2;
448	} else if ((tokCommand & Token::embeddedExpression) != 0) {
449	// look for the open parenthesis
450	while (i < atokenCommand.size() &&
451	atokenCommand[i].tok != Token::leftparen)
452	++i;
453	}
454
455	if (i >= atokenCommand.size()) {
456	return true;
457	}
458	return compileExpression(i);
459	}
460
461
462	bool SelectionCompiler::addTokenToPostfix(const Token& token) {
463	ltokenPostfix.push_back(token);
464	return true;
465	}
466
467	bool SelectionCompiler::compileExpression(int itoken) {
468	ltokenPostfix.clear();
469	for (int i = 0; i < itoken; ++i) {
470	addTokenToPostfix(atokenCommand[i]);
471	}
472
473	atokenInfix = atokenCommand;
474	itokenInfix = itoken;
475
476	addTokenToPostfix(Token::tokenExpressionBegin);
477	if (!clauseOr()) {
478	return false;
479	}
480
481	addTokenToPostfix(Token::tokenExpressionEnd);
482	if (itokenInfix != atokenInfix.size()) {
483	return endOfExpressionExpected();
484	}
485
486	atokenCommand = ltokenPostfix;
487	return true;
488	}
489
490	Token SelectionCompiler::tokenNext() {
491	if (itokenInfix == atokenInfix.size()) {
492	return Token();
493	}
494	return atokenInfix[itokenInfix++];
495	}
496
497	boost::any SelectionCompiler::valuePeek() {
498	if (itokenInfix == atokenInfix.size()) {
499	return boost::any();
500	} else {
501	return atokenInfix[itokenInfix].value;
502	}
503	}
504
505	int SelectionCompiler::tokPeek() {
506	if (itokenInfix == atokenInfix.size()) {
507	return 0;
508	}else {
509	return atokenInfix[itokenInfix].tok;
510	}
511	}
512
513	bool SelectionCompiler::clauseOr() {
514	if (!clauseAnd()) {
515	return false;
516	}
517
518	while (tokPeek() == Token::opOr) {
519	Token tokenOr = tokenNext();
520	if (!clauseAnd()) {
521	return false;
522	}
523	addTokenToPostfix(tokenOr);
524	}
525	return true;
526	}
527
528	bool SelectionCompiler::clauseAnd() {
529	if (!clauseNot()) {
530	return false;
531	}
532
533	while (tokPeek() == Token::opAnd) {
534	Token tokenAnd = tokenNext();
535	if (!clauseNot()) {
536	return false;
537	}
538	addTokenToPostfix(tokenAnd);
539	}
540	return true;
541	}
542
543	bool SelectionCompiler::clauseNot() {
544	if (tokPeek() == Token::opNot) {
545	Token tokenNot = tokenNext();
546	if (!clauseNot()) {
547	return false;
548	}
549	return addTokenToPostfix(tokenNot);
550	}
551	return clausePrimitive();
552	}
553
554	bool SelectionCompiler::clausePrimitive() {
555	int tok = tokPeek();
556	switch (tok) {
557	case Token::within:
558	return clauseWithin();
559
560	case Token::asterisk:
561	case Token::identifier:
562	return clauseChemObjName();
563
564	case Token::integer :
565	return clauseIndex();
566	default:
567	if ((tok & Token::atomproperty) == Token::atomproperty) {
568	return clauseComparator();
569	}
570	if ((tok & Token::predefinedset) != Token::predefinedset) {
571	break;
572	}
573	// fall into the code and below and just add the token
574	case Token::all:
575	case Token::none:
576	case Token::hull:
577	return addTokenToPostfix(tokenNext());
578	case Token::leftparen:
579	tokenNext();
580	if (!clauseOr()) {
581	return false;
582	}
583	if (tokenNext().tok != Token::rightparen) {
584	return rightParenthesisExpected();
585	}
586	return true;
587	}
588	return unrecognizedExpressionToken();
589	}
590
591	bool SelectionCompiler::clauseComparator() {
592	Token tokenAtomProperty = tokenNext();
593	Token tokenComparator = tokenNext();
594	if ((tokenComparator.tok & Token::comparator) == 0) {
595	return comparisonOperatorExpected();
596	}
597
598	Token tokenValue = tokenNext();
599	if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
600	return numberExpected();
601	}
602
603	float val;
604	if (tokenValue.value.type() == typeid(int)) {
605	val = boost::any_cast<int>(tokenValue.value);
606	} else if (tokenValue.value.type() == typeid(float)) {
607	val = boost::any_cast<float>(tokenValue.value);
608	} else {
609	return false;
610	}
611
612	boost::any floatVal;
613	floatVal = val;
614	return addTokenToPostfix(Token(tokenComparator.tok,
615	tokenAtomProperty.tok, floatVal));
616	}
617
618	bool SelectionCompiler::clauseWithin() {
619	tokenNext(); // WITHIN
620	if (tokenNext().tok != Token::leftparen) { // (
621	return leftParenthesisExpected();
622	}
623
624	boost::any distance;
625	Token tokenDistance = tokenNext(); // distance
626	switch(tokenDistance.tok) {
627	case Token::integer:
628	case Token::decimal:
629	distance = tokenDistance.value;
630	break;
631	default:
632	return numberOrKeywordExpected();
633	}
634
635	if (tokenNext().tok != Token::opOr) { // ,
636	return commaExpected();
637	}
638
639	if (! clauseOr()) { // expression
640	return false;
641	}
642
643	if (tokenNext().tok != Token::rightparen) { // )T
644	return rightParenthesisExpected();
645	}
646
647	return addTokenToPostfix(Token(Token::within, distance));
648	}
649
650	bool SelectionCompiler::clauseChemObjName() {
651	Token token = tokenNext();
652	if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {
653
654	std::string name = boost::any_cast<std::string>(token.value);
655	if (isNameValid(name)) {
656	return addTokenToPostfix(Token(Token::name, name));
657	} else {
658	return compileError("invalid name: " + name);
659	}
660	}
661
662	return false;
663
664	}
665
666	bool SelectionCompiler::isNameValid(const std::string& name) {
667	int nbracket = 0;
668	int ndot = 0;
669	for (unsigned int i = 0 ; i < name.size(); ++i) {
670	switch(name[i]) {
671
672	case '[' :
673	++nbracket;
674	break;
675	case ']' :
676	--nbracket;
677	break;
678	case '.' :
679	++ndot;
680	break;
681	}
682	}
683
684	//only allow 3 dots at most
685	return (ndot <=3 && nbracket == 0) ? true : false;
686	}
687
688	bool SelectionCompiler::clauseIndex(){
689	Token token = tokenNext();
690	if (token.tok == Token::integer) {
691	int index = boost::any_cast<int>(token.value);
692	int tok = tokPeek();
693	std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
694	if (tok == Token::to) {
695	tokenNext();
696	tok = tokPeek();
697	if (tok != Token::integer) {
698	return numberExpected();
699	}
700
701	boost::any intVal = tokenNext().value;
702	int first = index;
703	if (intVal.type() != typeid(int)){
704	return false;
705	}
706	int second = boost::any_cast<int>(intVal);
707
708	return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
709
710	}else {
711	return addTokenToPostfix(Token(Token::index, boost::any(index)));
712	}
713	} else {
714	return numberExpected();
715	}
716	}
717
718	}