| 1 | tim | 279 | /* | 
| 2 |  |  | * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved. | 
| 3 |  |  | * | 
| 4 |  |  | * The University of Notre Dame grants you ("Licensee") a | 
| 5 |  |  | * non-exclusive, royalty free, license to use, modify and | 
| 6 |  |  | * redistribute this software in source and binary code form, provided | 
| 7 |  |  | * that the following conditions are met: | 
| 8 |  |  | * | 
| 9 | gezelter | 1390 | * 1. Redistributions of source code must retain the above copyright | 
| 10 | tim | 279 | *    notice, this list of conditions and the following disclaimer. | 
| 11 |  |  | * | 
| 12 | gezelter | 1390 | * 2. Redistributions in binary form must reproduce the above copyright | 
| 13 | tim | 279 | *    notice, this list of conditions and the following disclaimer in the | 
| 14 |  |  | *    documentation and/or other materials provided with the | 
| 15 |  |  | *    distribution. | 
| 16 |  |  | * | 
| 17 |  |  | * This software is provided "AS IS," without a warranty of any | 
| 18 |  |  | * kind. All express or implied conditions, representations and | 
| 19 |  |  | * warranties, including any implied warranty of merchantability, | 
| 20 |  |  | * fitness for a particular purpose or non-infringement, are hereby | 
| 21 |  |  | * excluded.  The University of Notre Dame and its licensors shall not | 
| 22 |  |  | * be liable for any damages suffered by licensee as a result of | 
| 23 |  |  | * using, modifying or distributing the software or its | 
| 24 |  |  | * derivatives. In no event will the University of Notre Dame or its | 
| 25 |  |  | * licensors be liable for any lost revenue, profit or data, or for | 
| 26 |  |  | * direct, indirect, special, consequential, incidental or punitive | 
| 27 |  |  | * damages, however caused and regardless of the theory of liability, | 
| 28 |  |  | * arising out of the use of or inability to use software, even if the | 
| 29 |  |  | * University of Notre Dame has been advised of the possibility of | 
| 30 |  |  | * such damages. | 
| 31 | gezelter | 1390 | * | 
| 32 |  |  | * SUPPORT OPEN SCIENCE!  If you use OpenMD or its source code in your | 
| 33 |  |  | * research, please cite the appropriate papers when you publish your | 
| 34 |  |  | * work.  Good starting points are: | 
| 35 |  |  | * | 
| 36 |  |  | * [1]  Meineke, et al., J. Comp. Chem. 26, 252-271 (2005). | 
| 37 |  |  | * [2]  Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006). | 
| 38 | gezelter | 1879 | * [3]  Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008). | 
| 39 | gezelter | 1782 | * [4]  Kuang & Gezelter,  J. Chem. Phys. 133, 164101 (2010). | 
| 40 |  |  | * [5]  Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011). | 
| 41 | tim | 279 | */ | 
| 42 |  |  |  | 
| 43 |  |  | #include "selection/SelectionCompiler.hpp" | 
| 44 | tim | 281 | #include "utils/StringUtils.hpp" | 
| 45 | gezelter | 1390 | namespace OpenMD { | 
| 46 | tim | 279 |  | 
| 47 | cli2 | 1364 | bool SelectionCompiler::compile(const std::string& filename, | 
| 48 |  |  | const std::string& script) { | 
| 49 | tim | 279 |  | 
| 50 |  |  | this->filename = filename; | 
| 51 |  |  | this->script = script; | 
| 52 |  |  | lineNumbers.clear(); | 
| 53 |  |  | lineIndices.clear(); | 
| 54 |  |  | aatokenCompiled.clear(); | 
| 55 |  |  |  | 
| 56 | tim | 281 | if (internalCompile()) { | 
| 57 | gezelter | 507 | return true; | 
| 58 | tim | 279 | } | 
| 59 |  |  |  | 
| 60 |  |  | int icharEnd; | 
| 61 | tim | 281 | if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos && | 
| 62 |  |  | (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) { | 
| 63 | gezelter | 507 | icharEnd = script.size(); | 
| 64 | tim | 279 | } | 
| 65 |  |  | errorLine = script.substr(ichCurrentCommand, icharEnd); | 
| 66 |  |  | return false; | 
| 67 | gezelter | 507 | } | 
| 68 | tim | 279 |  | 
| 69 | gezelter | 507 | bool SelectionCompiler::internalCompile(){ | 
| 70 | tim | 279 |  | 
| 71 |  |  | cchScript = script.size(); | 
| 72 |  |  | ichToken = 0; | 
| 73 |  |  | lineCurrent = 1; | 
| 74 |  |  |  | 
| 75 |  |  | error = false; | 
| 76 |  |  |  | 
| 77 | tim | 281 | //std::vector<Token> lltoken; | 
| 78 |  |  | aatokenCompiled.clear(); | 
| 79 | tim | 279 | std::vector<Token> ltoken; | 
| 80 |  |  |  | 
| 81 | tim | 281 | Token tokenCommand; | 
| 82 |  |  | int tokCommand = Token::nada; | 
| 83 | tim | 279 |  | 
| 84 |  |  | for ( ; true; ichToken += cchToken) { | 
| 85 | gezelter | 507 | if (lookingAtLeadingWhitespace()) | 
| 86 |  |  | continue; | 
| 87 |  |  | //if (lookingAtComment()) | 
| 88 |  |  | //    continue; | 
| 89 |  |  | bool endOfLine = lookingAtEndOfLine(); | 
| 90 |  |  | if (endOfLine || lookingAtEndOfStatement()) { | 
| 91 |  |  | if (tokCommand != Token::nada) { | 
| 92 |  |  | if (! compileCommand(ltoken)) { | 
| 93 |  |  | return false; | 
| 94 |  |  | } | 
| 95 |  |  | aatokenCompiled.push_back(atokenCommand); | 
| 96 |  |  | lineNumbers.push_back(lineCurrent); | 
| 97 |  |  | lineIndices.push_back(ichCurrentCommand); | 
| 98 |  |  | ltoken.clear(); | 
| 99 |  |  | tokCommand = Token::nada; | 
| 100 |  |  | } | 
| 101 | tim | 279 |  | 
| 102 | gezelter | 507 | if (ichToken < cchScript) { | 
| 103 |  |  | if (endOfLine) | 
| 104 |  |  | ++lineCurrent; | 
| 105 |  |  | continue; | 
| 106 |  |  | } | 
| 107 |  |  | break; | 
| 108 |  |  | } | 
| 109 | tim | 279 |  | 
| 110 | gezelter | 507 | if (tokCommand != Token::nada) { | 
| 111 |  |  | if (lookingAtString()) { | 
| 112 |  |  | std::string str = getUnescapedStringLiteral(); | 
| 113 |  |  | ltoken.push_back(Token(Token::string, str)); | 
| 114 |  |  | continue; | 
| 115 |  |  | } | 
| 116 |  |  | //if ((tokCommand & Token::specialstring) != 0 && | 
| 117 |  |  | //    lookingAtSpecialString()) { | 
| 118 |  |  | //    std::string str = script.substr(ichToken, ichToken + cchToken); | 
| 119 |  |  | //    ltoken.push_back(Token(Token::string, str)); | 
| 120 |  |  | //    continue; | 
| 121 |  |  | //} | 
| 122 |  |  | if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) { | 
| 123 |  |  | float value = lexi_cast<float>(script.substr(ichToken, cchToken)); | 
| 124 |  |  | ltoken.push_back(Token(Token::decimal, boost::any(value))); | 
| 125 |  |  | continue; | 
| 126 |  |  | } | 
| 127 |  |  | if (lookingAtInteger((tokCommand & Token::negnums) != 0)) { | 
| 128 | tim | 295 |  | 
| 129 | gezelter | 507 | int val = lexi_cast<int>(script.substr(ichToken, cchToken)); | 
| 130 |  |  | ltoken.push_back(Token(Token::integer,   boost::any(val))); | 
| 131 |  |  | continue; | 
| 132 |  |  | } | 
| 133 |  |  | } | 
| 134 | tim | 279 |  | 
| 135 | gezelter | 507 | if (lookingAtLookupToken()) { | 
| 136 |  |  | std::string ident = script.substr(ichToken, cchToken); | 
| 137 |  |  | Token token; | 
| 138 |  |  | Token* pToken = TokenMap::getInstance()->getToken(ident); | 
| 139 |  |  | if (pToken != NULL) { | 
| 140 |  |  | token = *pToken; | 
| 141 |  |  | } else { | 
| 142 |  |  | token = Token(Token::identifier, ident); | 
| 143 |  |  | } | 
| 144 | tim | 279 |  | 
| 145 | gezelter | 507 | int tok = token.tok; | 
| 146 | tim | 279 |  | 
| 147 | gezelter | 507 | switch (tokCommand) { | 
| 148 |  |  | case Token::nada: | 
| 149 |  |  | ichCurrentCommand = ichToken; | 
| 150 |  |  | //tokenCommand = token; | 
| 151 |  |  | tokCommand = tok; | 
| 152 |  |  | if ((tokCommand & Token::command) == 0) | 
| 153 |  |  | return commandExpected(); | 
| 154 |  |  | break; | 
| 155 | tim | 279 |  | 
| 156 | gezelter | 507 | case Token::define: | 
| 157 |  |  | if (ltoken.size() == 1) { | 
| 158 |  |  | // we are looking at the variable name | 
| 159 |  |  | if (tok != Token::identifier && | 
| 160 |  |  | (tok & Token::predefinedset) != Token::predefinedset) | 
| 161 |  |  | return invalidExpressionToken(ident); | 
| 162 |  |  | } else { | 
| 163 |  |  | // we are looking at the expression | 
| 164 |  |  | if (tok != Token::identifier && | 
| 165 |  |  | (tok & (Token::expression | Token::predefinedset)) == 0) | 
| 166 |  |  | return invalidExpressionToken(ident); | 
| 167 |  |  | } | 
| 168 | tim | 279 |  | 
| 169 | gezelter | 507 | break; | 
| 170 | tim | 279 |  | 
| 171 | gezelter | 507 | case Token::select: | 
| 172 |  |  | if (tok != Token::identifier && (tok & Token::expression) == 0) | 
| 173 |  |  | return invalidExpressionToken(ident); | 
| 174 |  |  | break; | 
| 175 |  |  | } | 
| 176 |  |  | ltoken.push_back(token); | 
| 177 |  |  | continue; | 
| 178 |  |  | } | 
| 179 | tim | 279 |  | 
| 180 | gezelter | 1879 | if (ltoken.empty()) { | 
| 181 | gezelter | 507 | return commandExpected(); | 
| 182 |  |  | } | 
| 183 | tim | 279 |  | 
| 184 | gezelter | 507 | return unrecognizedToken(); | 
| 185 | tim | 279 | } | 
| 186 |  |  |  | 
| 187 |  |  | return true; | 
| 188 |  |  | } | 
| 189 |  |  |  | 
| 190 |  |  |  | 
| 191 |  |  | bool SelectionCompiler::lookingAtLeadingWhitespace() { | 
| 192 |  |  |  | 
| 193 |  |  | int ichT = ichToken; | 
| 194 |  |  | while (ichT < cchScript && std::isspace(script[ichT])) { | 
| 195 |  |  | ++ichT; | 
| 196 |  |  | } | 
| 197 |  |  | cchToken = ichT - ichToken; | 
| 198 |  |  | return cchToken > 0; | 
| 199 |  |  | } | 
| 200 |  |  |  | 
| 201 |  |  | bool SelectionCompiler::lookingAtEndOfLine() { | 
| 202 |  |  | if (ichToken == cchScript) | 
| 203 |  |  | return true; | 
| 204 |  |  | int ichT = ichToken; | 
| 205 |  |  | char ch = script[ichT]; | 
| 206 |  |  | if (ch == '\r') { | 
| 207 |  |  | ++ichT; | 
| 208 |  |  | if (ichT < cchScript && script[ichT] == '\n') | 
| 209 | gezelter | 507 | ++ichT; | 
| 210 | tim | 279 | } else if (ch == '\n') { | 
| 211 |  |  | ++ichT; | 
| 212 |  |  | } else { | 
| 213 |  |  | return false; | 
| 214 |  |  | } | 
| 215 |  |  | cchToken = ichT - ichToken; | 
| 216 |  |  | return true; | 
| 217 |  |  | } | 
| 218 |  |  |  | 
| 219 |  |  | bool SelectionCompiler::lookingAtEndOfStatement() { | 
| 220 |  |  | if (ichToken == cchScript || script[ichToken] != ';') | 
| 221 |  |  | return false; | 
| 222 |  |  | cchToken = 1; | 
| 223 |  |  | return true; | 
| 224 |  |  | } | 
| 225 |  |  |  | 
| 226 |  |  | bool SelectionCompiler::lookingAtString() { | 
| 227 |  |  | if (ichToken == cchScript) | 
| 228 |  |  | return false; | 
| 229 |  |  | if (script[ichToken] != '"') | 
| 230 |  |  | return false; | 
| 231 |  |  | // remove support for single quote | 
| 232 |  |  | // in order to use it in atom expressions | 
| 233 |  |  | //    char chFirst = script.charAt(ichToken); | 
| 234 |  |  | //    if (chFirst != '"' && chFirst != '\'') | 
| 235 |  |  | //      return false; | 
| 236 |  |  | int ichT = ichToken + 1; | 
| 237 |  |  | //    while (ichT < cchScript && script.charAt(ichT++) != chFirst) | 
| 238 |  |  | char ch; | 
| 239 | tim | 281 | bool previousCharBackslash = false; | 
| 240 | tim | 279 | while (ichT < cchScript) { | 
| 241 | tim | 281 | ch = script[ichT++]; | 
| 242 | tim | 279 | if (ch == '"' && !previousCharBackslash) | 
| 243 |  |  | break; | 
| 244 |  |  | previousCharBackslash = ch == '\\' ? !previousCharBackslash : false; | 
| 245 |  |  | } | 
| 246 |  |  | cchToken = ichT - ichToken; | 
| 247 | tim | 295 |  | 
| 248 | tim | 279 | return true; | 
| 249 |  |  | } | 
| 250 |  |  |  | 
| 251 |  |  |  | 
| 252 | gezelter | 507 | std::string SelectionCompiler::getUnescapedStringLiteral() { | 
| 253 | tim | 281 | /** @todo */ | 
| 254 |  |  | std::string sb(cchToken - 2, ' '); | 
| 255 |  |  |  | 
| 256 | tim | 279 | int ichMax = ichToken + cchToken - 1; | 
| 257 |  |  | int ich = ichToken + 1; | 
| 258 |  |  |  | 
| 259 |  |  | while (ich < ichMax) { | 
| 260 | gezelter | 507 | char ch = script[ich++]; | 
| 261 |  |  | if (ch == '\\' && ich < ichMax) { | 
| 262 |  |  | ch = script[ich++]; | 
| 263 |  |  | switch (ch) { | 
| 264 |  |  | case 'b': | 
| 265 |  |  | ch = '\b'; | 
| 266 |  |  | break; | 
| 267 |  |  | case 'n': | 
| 268 |  |  | ch = '\n'; | 
| 269 |  |  | break; | 
| 270 |  |  | case 't': | 
| 271 |  |  | ch = '\t'; | 
| 272 |  |  | break; | 
| 273 |  |  | case 'r': | 
| 274 |  |  | ch = '\r'; | 
| 275 |  |  | // fall into | 
| 276 |  |  | case '"': | 
| 277 |  |  | case '\\': | 
| 278 |  |  | case '\'': | 
| 279 |  |  | break; | 
| 280 |  |  | case 'x': | 
| 281 |  |  | case 'u': | 
| 282 |  |  | int digitCount = ch == 'x' ? 2 : 4; | 
| 283 |  |  | if (ich < ichMax) { | 
| 284 |  |  | int unicode = 0; | 
| 285 |  |  | for (int k = digitCount; --k >= 0 && ich < ichMax; ) { | 
| 286 |  |  | char chT = script[ich]; | 
| 287 |  |  | int hexit = getHexitValue(chT); | 
| 288 |  |  | if (hexit < 0) | 
| 289 |  |  | break; | 
| 290 |  |  | unicode <<= 4; | 
| 291 |  |  | unicode += hexit; | 
| 292 |  |  | ++ich; | 
| 293 |  |  | } | 
| 294 |  |  | ch = (char)unicode; | 
| 295 |  |  | } | 
| 296 |  |  | } | 
| 297 |  |  | } | 
| 298 |  |  | sb.append(1, ch); | 
| 299 | tim | 279 | } | 
| 300 |  |  |  | 
| 301 | tim | 281 | return sb; | 
| 302 | gezelter | 507 | } | 
| 303 | tim | 279 |  | 
| 304 | gezelter | 507 | int SelectionCompiler::getHexitValue(char ch) { | 
| 305 | tim | 279 | if (ch >= '0' && ch <= '9') | 
| 306 | gezelter | 507 | return ch - '0'; | 
| 307 | tim | 279 | else if (ch >= 'a' && ch <= 'f') | 
| 308 | gezelter | 507 | return 10 + ch - 'a'; | 
| 309 | tim | 279 | else if (ch >= 'A' && ch <= 'F') | 
| 310 | gezelter | 507 | return 10 + ch - 'A'; | 
| 311 | tim | 279 | else | 
| 312 | gezelter | 507 | return -1; | 
| 313 |  |  | } | 
| 314 | tim | 279 |  | 
| 315 | gezelter | 507 | bool SelectionCompiler::lookingAtSpecialString() { | 
| 316 | tim | 279 | int ichT = ichToken; | 
| 317 |  |  | char ch = script[ichT]; | 
| 318 |  |  | while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') { | 
| 319 | gezelter | 507 | ++ichT; | 
| 320 | tim | 279 | } | 
| 321 |  |  | cchToken = ichT - ichToken; | 
| 322 |  |  | return cchToken > 0; | 
| 323 | gezelter | 507 | } | 
| 324 | tim | 279 |  | 
| 325 | gezelter | 507 | bool SelectionCompiler::lookingAtDecimal(bool allowNegative) { | 
| 326 | tim | 279 | if (ichToken == cchScript) { | 
| 327 | gezelter | 507 | return false; | 
| 328 | tim | 279 | } | 
| 329 |  |  |  | 
| 330 |  |  | int ichT = ichToken; | 
| 331 |  |  | if (script[ichT] == '-') { | 
| 332 | gezelter | 507 | ++ichT; | 
| 333 | tim | 279 | } | 
| 334 | tim | 281 | bool digitSeen = false; | 
| 335 | tim | 279 | char ch = 'X'; | 
| 336 |  |  | while (ichT < cchScript && std::isdigit(ch = script[ichT])) { | 
| 337 | gezelter | 507 | ++ichT; | 
| 338 |  |  | digitSeen = true; | 
| 339 | tim | 279 | } | 
| 340 |  |  |  | 
| 341 |  |  | if (ichT == cchScript || ch != '.') { | 
| 342 | gezelter | 507 | return false; | 
| 343 | tim | 279 | } | 
| 344 |  |  |  | 
| 345 | tim | 303 | // to support DMPC.1, let's check the character before the dot | 
| 346 |  |  | if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) { | 
| 347 | gezelter | 507 | return false; | 
| 348 | tim | 279 | } | 
| 349 |  |  |  | 
| 350 |  |  | ++ichT; | 
| 351 |  |  | while (ichT < cchScript && std::isdigit(script[ichT])) { | 
| 352 | gezelter | 507 | ++ichT; | 
| 353 |  |  | digitSeen = true; | 
| 354 | tim | 279 | } | 
| 355 |  |  | cchToken = ichT - ichToken; | 
| 356 |  |  | return digitSeen; | 
| 357 | gezelter | 507 | } | 
| 358 | tim | 279 |  | 
| 359 | gezelter | 507 | bool SelectionCompiler::lookingAtInteger(bool allowNegative) { | 
| 360 | tim | 279 | if (ichToken == cchScript) { | 
| 361 | gezelter | 507 | return false; | 
| 362 | tim | 279 | } | 
| 363 |  |  | int ichT = ichToken; | 
| 364 |  |  | if (allowNegative && script[ichToken] == '-') { | 
| 365 | gezelter | 507 | ++ichT; | 
| 366 | tim | 279 | } | 
| 367 |  |  | int ichBeginDigits = ichT; | 
| 368 |  |  | while (ichT < cchScript && std::isdigit(script[ichT])) { | 
| 369 | gezelter | 507 | ++ichT; | 
| 370 | tim | 279 | } | 
| 371 |  |  | if (ichBeginDigits == ichT) { | 
| 372 | gezelter | 507 | return false; | 
| 373 | tim | 279 | } | 
| 374 |  |  | cchToken = ichT - ichToken; | 
| 375 |  |  | return true; | 
| 376 | gezelter | 507 | } | 
| 377 | tim | 279 |  | 
| 378 | gezelter | 507 | bool SelectionCompiler::lookingAtLookupToken() { | 
| 379 | tim | 279 | if (ichToken == cchScript) { | 
| 380 | gezelter | 507 | return false; | 
| 381 | tim | 279 | } | 
| 382 |  |  |  | 
| 383 |  |  | int ichT = ichToken; | 
| 384 |  |  | char ch; | 
| 385 |  |  | switch (ch = script[ichT++]) { | 
| 386 | gezelter | 507 | case '(': | 
| 387 |  |  | case ')': | 
| 388 |  |  | case ',': | 
| 389 |  |  | case '[': | 
| 390 |  |  | case ']': | 
| 391 |  |  | break; | 
| 392 |  |  | case '&': | 
| 393 |  |  | case '|': | 
| 394 |  |  | if (ichT < cchScript && script[ichT] == ch) { | 
| 395 |  |  | ++ichT; | 
| 396 |  |  | } | 
| 397 |  |  | break; | 
| 398 |  |  | case '<': | 
| 399 |  |  | case '=': | 
| 400 |  |  | case '>': | 
| 401 |  |  | if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) { | 
| 402 |  |  | ++ichT; | 
| 403 |  |  | } | 
| 404 |  |  | break; | 
| 405 |  |  | case '/': | 
| 406 |  |  | case '!': | 
| 407 |  |  | if (ichT < cchScript && script[ichT] == '=') { | 
| 408 |  |  | ++ichT; | 
| 409 |  |  | } | 
| 410 |  |  | break; | 
| 411 |  |  | default: | 
| 412 |  |  | if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') { | 
| 413 |  |  | return false; | 
| 414 |  |  | } | 
| 415 |  |  | case '*': | 
| 416 |  |  | case '?': // include question marks in identifier for atom expressions | 
| 417 |  |  | while (ichT < cchScript && !std::isspace(ch = script[ichT]) && | 
| 418 |  |  | (std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){ | 
| 419 | tim | 288 |  | 
| 420 | gezelter | 507 | ++ichT; | 
| 421 |  |  | } | 
| 422 |  |  | break; | 
| 423 | tim | 279 | } | 
| 424 | tim | 295 |  | 
| 425 | tim | 279 | cchToken = ichT - ichToken; | 
| 426 | tim | 295 |  | 
| 427 | tim | 279 | return true; | 
| 428 | gezelter | 507 | } | 
| 429 | tim | 279 |  | 
| 430 | gezelter | 507 | bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) { | 
| 431 | tim | 281 | const Token& tokenCommand = ltoken[0]; | 
| 432 | tim | 279 | int tokCommand = tokenCommand.tok; | 
| 433 | tim | 281 |  | 
| 434 |  |  | atokenCommand = ltoken; | 
| 435 |  |  | if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) { | 
| 436 | gezelter | 507 | return false; | 
| 437 | tim | 279 | } | 
| 438 | tim | 281 |  | 
| 439 | tim | 279 | return true; | 
| 440 | gezelter | 507 | } | 
| 441 | tim | 279 |  | 
| 442 | gezelter | 507 | bool SelectionCompiler::compileExpression() { | 
| 443 | tim | 279 | /** todo */ | 
| 444 | gezelter | 1782 | unsigned int i = 1; | 
| 445 | tim | 279 | int tokCommand = atokenCommand[0].tok; | 
| 446 | tim | 281 | if (tokCommand == Token::define) { | 
| 447 | gezelter | 507 | i = 2; | 
| 448 | tim | 281 | } else if ((tokCommand & Token::embeddedExpression) != 0) { | 
| 449 | gezelter | 507 | // look for the open parenthesis | 
| 450 |  |  | while (i < atokenCommand.size() && | 
| 451 |  |  | atokenCommand[i].tok != Token::leftparen) | 
| 452 | tim | 279 | ++i; | 
| 453 |  |  | } | 
| 454 | tim | 281 |  | 
| 455 |  |  | if (i >= atokenCommand.size()) { | 
| 456 | gezelter | 507 | return true; | 
| 457 | tim | 281 | } | 
| 458 | tim | 279 | return compileExpression(i); | 
| 459 |  |  | } | 
| 460 |  |  |  | 
| 461 |  |  |  | 
| 462 | gezelter | 507 | bool SelectionCompiler::addTokenToPostfix(const Token& token) { | 
| 463 | tim | 279 | ltokenPostfix.push_back(token); | 
| 464 |  |  | return true; | 
| 465 | gezelter | 507 | } | 
| 466 | tim | 279 |  | 
| 467 | gezelter | 507 | bool SelectionCompiler::compileExpression(int itoken) { | 
| 468 | tim | 281 | ltokenPostfix.clear(); | 
| 469 |  |  | for (int i = 0; i < itoken; ++i) { | 
| 470 | gezelter | 507 | addTokenToPostfix(atokenCommand[i]); | 
| 471 | tim | 281 | } | 
| 472 |  |  |  | 
| 473 | tim | 279 | atokenInfix = atokenCommand; | 
| 474 |  |  | itokenInfix = itoken; | 
| 475 |  |  |  | 
| 476 | tim | 281 | addTokenToPostfix(Token::tokenExpressionBegin); | 
| 477 | tim | 279 | if (!clauseOr()) { | 
| 478 | gezelter | 507 | return false; | 
| 479 | tim | 279 | } | 
| 480 |  |  |  | 
| 481 | tim | 281 | addTokenToPostfix(Token::tokenExpressionEnd); | 
| 482 |  |  | if (itokenInfix != atokenInfix.size()) { | 
| 483 | gezelter | 507 | return endOfExpressionExpected(); | 
| 484 | tim | 279 | } | 
| 485 |  |  |  | 
| 486 |  |  | atokenCommand = ltokenPostfix; | 
| 487 |  |  | return true; | 
| 488 | gezelter | 507 | } | 
| 489 | tim | 279 |  | 
| 490 | gezelter | 507 | Token SelectionCompiler::tokenNext() { | 
| 491 | tim | 281 | if (itokenInfix == atokenInfix.size()) { | 
| 492 | gezelter | 507 | return Token(); | 
| 493 | tim | 281 | } | 
| 494 |  |  | return atokenInfix[itokenInfix++]; | 
| 495 | gezelter | 507 | } | 
| 496 | tim | 279 |  | 
| 497 | gezelter | 507 | boost::any SelectionCompiler::valuePeek() { | 
| 498 | tim | 281 | if (itokenInfix == atokenInfix.size()) { | 
| 499 | gezelter | 507 | return boost::any(); | 
| 500 | tim | 279 | } else { | 
| 501 | gezelter | 507 | return atokenInfix[itokenInfix].value; | 
| 502 | tim | 279 | } | 
| 503 | gezelter | 507 | } | 
| 504 | tim | 279 |  | 
| 505 | gezelter | 507 | int SelectionCompiler::tokPeek() { | 
| 506 | tim | 281 | if (itokenInfix == atokenInfix.size()) { | 
| 507 | gezelter | 507 | return 0; | 
| 508 | tim | 279 | }else { | 
| 509 | gezelter | 507 | return atokenInfix[itokenInfix].tok; | 
| 510 | tim | 279 | } | 
| 511 | gezelter | 507 | } | 
| 512 | tim | 279 |  | 
| 513 | gezelter | 507 | bool SelectionCompiler::clauseOr() { | 
| 514 | tim | 279 | if (!clauseAnd()) { | 
| 515 | gezelter | 507 | return false; | 
| 516 | tim | 279 | } | 
| 517 |  |  |  | 
| 518 | tim | 281 | while (tokPeek() == Token::opOr) { | 
| 519 | gezelter | 507 | Token tokenOr = tokenNext(); | 
| 520 |  |  | if (!clauseAnd()) { | 
| 521 |  |  | return false; | 
| 522 |  |  | } | 
| 523 |  |  | addTokenToPostfix(tokenOr); | 
| 524 | tim | 279 | } | 
| 525 |  |  | return true; | 
| 526 | gezelter | 507 | } | 
| 527 | tim | 279 |  | 
| 528 | gezelter | 507 | bool SelectionCompiler::clauseAnd() { | 
| 529 | tim | 279 | if (!clauseNot()) { | 
| 530 | gezelter | 507 | return false; | 
| 531 | tim | 279 | } | 
| 532 |  |  |  | 
| 533 | tim | 281 | while (tokPeek() == Token::opAnd) { | 
| 534 | gezelter | 507 | Token tokenAnd = tokenNext(); | 
| 535 |  |  | if (!clauseNot()) { | 
| 536 |  |  | return false; | 
| 537 |  |  | } | 
| 538 |  |  | addTokenToPostfix(tokenAnd); | 
| 539 | tim | 279 | } | 
| 540 |  |  | return true; | 
| 541 | gezelter | 507 | } | 
| 542 | tim | 279 |  | 
| 543 | gezelter | 507 | bool SelectionCompiler::clauseNot() { | 
| 544 | tim | 281 | if (tokPeek() == Token::opNot) { | 
| 545 | gezelter | 507 | Token tokenNot = tokenNext(); | 
| 546 |  |  | if (!clauseNot()) { | 
| 547 |  |  | return false; | 
| 548 |  |  | } | 
| 549 |  |  | return addTokenToPostfix(tokenNot); | 
| 550 | tim | 279 | } | 
| 551 |  |  | return clausePrimitive(); | 
| 552 | gezelter | 507 | } | 
| 553 | tim | 279 |  | 
| 554 | gezelter | 507 | bool SelectionCompiler::clausePrimitive() { | 
| 555 | tim | 279 | int tok = tokPeek(); | 
| 556 |  |  | switch (tok) { | 
| 557 | gezelter | 507 | case Token::within: | 
| 558 |  |  | return clauseWithin(); | 
| 559 | tim | 283 |  | 
| 560 | gezelter | 507 | case Token::asterisk: | 
| 561 |  |  | case Token::identifier: | 
| 562 |  |  | return clauseChemObjName(); | 
| 563 | tim | 295 |  | 
| 564 | gezelter | 507 | case Token::integer : | 
| 565 |  |  | return clauseIndex(); | 
| 566 |  |  | default: | 
| 567 |  |  | if ((tok & Token::atomproperty) == Token::atomproperty) { | 
| 568 |  |  | return clauseComparator(); | 
| 569 |  |  | } | 
| 570 |  |  | if ((tok & Token::predefinedset) != Token::predefinedset) { | 
| 571 |  |  | break; | 
| 572 |  |  | } | 
| 573 |  |  | // fall into the code and below and just add the token | 
| 574 |  |  | case Token::all: | 
| 575 |  |  | case Token::none: | 
| 576 | kstocke1 | 1523 | case Token::hull: | 
| 577 | gezelter | 507 | return addTokenToPostfix(tokenNext()); | 
| 578 |  |  | case Token::leftparen: | 
| 579 |  |  | tokenNext(); | 
| 580 |  |  | if (!clauseOr()) { | 
| 581 |  |  | return false; | 
| 582 |  |  | } | 
| 583 |  |  | if (tokenNext().tok != Token::rightparen) { | 
| 584 |  |  | return rightParenthesisExpected(); | 
| 585 |  |  | } | 
| 586 |  |  | return true; | 
| 587 | tim | 279 | } | 
| 588 |  |  | return unrecognizedExpressionToken(); | 
| 589 | gezelter | 507 | } | 
| 590 | tim | 279 |  | 
| 591 | gezelter | 507 | bool SelectionCompiler::clauseComparator() { | 
| 592 | tim | 279 | Token tokenAtomProperty = tokenNext(); | 
| 593 |  |  | Token tokenComparator = tokenNext(); | 
| 594 | tim | 281 | if ((tokenComparator.tok & Token::comparator) == 0) { | 
| 595 | gezelter | 507 | return comparisonOperatorExpected(); | 
| 596 | tim | 279 | } | 
| 597 |  |  |  | 
| 598 |  |  | Token tokenValue = tokenNext(); | 
| 599 | tim | 288 | if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) { | 
| 600 | gezelter | 507 | return numberExpected(); | 
| 601 | tim | 279 | } | 
| 602 | tim | 288 |  | 
| 603 |  |  | float val; | 
| 604 |  |  | if (tokenValue.value.type() == typeid(int)) { | 
| 605 | gezelter | 507 | val = boost::any_cast<int>(tokenValue.value); | 
| 606 | tim | 288 | } else if (tokenValue.value.type() == typeid(float)) { | 
| 607 | gezelter | 507 | val = boost::any_cast<float>(tokenValue.value); | 
| 608 | tim | 288 | } else { | 
| 609 | gezelter | 507 | return false; | 
| 610 | tim | 288 | } | 
| 611 |  |  |  | 
| 612 | tim | 295 | boost::any floatVal; | 
| 613 |  |  | floatVal = val; | 
| 614 | tim | 281 | return addTokenToPostfix(Token(tokenComparator.tok, | 
| 615 | gezelter | 507 | tokenAtomProperty.tok, floatVal)); | 
| 616 |  |  | } | 
| 617 | tim | 279 |  | 
| 618 | gezelter | 507 | bool SelectionCompiler::clauseWithin() { | 
| 619 | tim | 279 | tokenNext();                             // WITHIN | 
| 620 | tim | 281 | if (tokenNext().tok != Token::leftparen) {  // ( | 
| 621 | gezelter | 507 | return leftParenthesisExpected(); | 
| 622 | tim | 279 | } | 
| 623 |  |  |  | 
| 624 | tim | 281 | boost::any distance; | 
| 625 | tim | 279 | Token tokenDistance = tokenNext();       // distance | 
| 626 |  |  | switch(tokenDistance.tok) { | 
| 627 | gezelter | 507 | case Token::integer: | 
| 628 |  |  | case Token::decimal: | 
| 629 |  |  | distance = tokenDistance.value; | 
| 630 |  |  | break; | 
| 631 |  |  | default: | 
| 632 |  |  | return numberOrKeywordExpected(); | 
| 633 | tim | 279 | } | 
| 634 |  |  |  | 
| 635 | tim | 281 | if (tokenNext().tok != Token::opOr) {       // , | 
| 636 | gezelter | 507 | return commaExpected(); | 
| 637 | tim | 279 | } | 
| 638 |  |  |  | 
| 639 |  |  | if (! clauseOr()) {                        // *expression* | 
| 640 | gezelter | 507 | return false; | 
| 641 | tim | 279 | } | 
| 642 |  |  |  | 
| 643 | tim | 281 | if (tokenNext().tok != Token::rightparen) { // )T | 
| 644 | gezelter | 507 | return rightParenthesisExpected(); | 
| 645 | tim | 279 | } | 
| 646 |  |  |  | 
| 647 | tim | 281 | return addTokenToPostfix(Token(Token::within, distance)); | 
| 648 | gezelter | 507 | } | 
| 649 | tim | 279 |  | 
| 650 | gezelter | 507 | bool SelectionCompiler::clauseChemObjName() { | 
| 651 | tim | 452 | Token token = tokenNext(); | 
| 652 |  |  | if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) { | 
| 653 | tim | 279 |  | 
| 654 | gezelter | 507 | std::string name = boost::any_cast<std::string>(token.value); | 
| 655 |  |  | if (isNameValid(name)) { | 
| 656 |  |  | return addTokenToPostfix(Token(Token::name, name)); | 
| 657 |  |  | } else { | 
| 658 |  |  | return compileError("invalid name: " + name); | 
| 659 |  |  | } | 
| 660 | tim | 452 | } | 
| 661 | tim | 288 |  | 
| 662 | tim | 452 | return false; | 
| 663 |  |  |  | 
| 664 | gezelter | 507 | } | 
| 665 | tim | 279 |  | 
| 666 | gezelter | 507 | bool SelectionCompiler::isNameValid(const std::string& name) { | 
| 667 |  |  | int nbracket = 0; | 
| 668 | tim | 452 | int ndot = 0; | 
| 669 | gezelter | 1782 | for (unsigned int i = 0 ; i < name.size(); ++i) { | 
| 670 | gezelter | 507 | switch(name[i]) { | 
| 671 | tim | 283 |  | 
| 672 | gezelter | 507 | case '[' : | 
| 673 |  |  | ++nbracket; | 
| 674 |  |  | break; | 
| 675 |  |  | case ']' : | 
| 676 |  |  | --nbracket; | 
| 677 |  |  | break; | 
| 678 |  |  | case '.' : | 
| 679 |  |  | ++ndot; | 
| 680 |  |  | break; | 
| 681 |  |  | } | 
| 682 | tim | 283 | } | 
| 683 |  |  |  | 
| 684 | tim | 452 | //only allow 3 dots at most | 
| 685 |  |  | return (ndot <=3 && nbracket == 0) ? true : false; | 
| 686 | gezelter | 507 | } | 
| 687 | tim | 279 |  | 
| 688 | gezelter | 507 | bool SelectionCompiler::clauseIndex(){ | 
| 689 | tim | 295 | Token token = tokenNext(); | 
| 690 |  |  | if (token.tok == Token::integer) { | 
| 691 | gezelter | 507 | int index = boost::any_cast<int>(token.value); | 
| 692 |  |  | int tok = tokPeek(); | 
| 693 |  |  | std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl; | 
| 694 |  |  | if (tok == Token::to) { | 
| 695 |  |  | tokenNext(); | 
| 696 |  |  | tok = tokPeek(); | 
| 697 |  |  | if (tok != Token::integer) { | 
| 698 |  |  | return numberExpected(); | 
| 699 |  |  | } | 
| 700 | tim | 295 |  | 
| 701 | gezelter | 507 | boost::any intVal = tokenNext().value; | 
| 702 |  |  | int first = index; | 
| 703 |  |  | if (intVal.type() != typeid(int)){ | 
| 704 |  |  | return false; | 
| 705 |  |  | } | 
| 706 |  |  | int second = boost::any_cast<int>(intVal); | 
| 707 | tim | 283 |  | 
| 708 | gezelter | 507 | return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second)))); | 
| 709 | tim | 295 |  | 
| 710 | gezelter | 507 | }else { | 
| 711 |  |  | return addTokenToPostfix(Token(Token::index, boost::any(index))); | 
| 712 |  |  | } | 
| 713 | tim | 295 | } else { | 
| 714 | gezelter | 507 | return numberExpected(); | 
| 715 | tim | 295 | } | 
| 716 | gezelter | 507 | } | 
| 717 | tim | 295 |  | 
| 718 |  |  | } |