45#include "selection/SelectionCompiler.hpp"
54 bool SelectionCompiler::compile(
const std::string& filename,
55 const std::string& script) {
56 this->filename = filename;
57 this->script = script;
60 aatokenCompiled.clear();
62 if (internalCompile()) {
return true; }
65 if ((icharEnd = script.find(
'\r', ichCurrentCommand)) ==
67 (icharEnd = script.find(
'\n', ichCurrentCommand)) ==
69 icharEnd = script.size();
71 errorLine = script.substr(ichCurrentCommand, icharEnd);
75 bool SelectionCompiler::internalCompile() {
76 cchScript = script.size();
83 aatokenCompiled.clear();
84 std::vector<Token> ltoken;
87 int tokCommand = Token::nada;
89 for (;
true; ichToken += cchToken) {
90 if (lookingAtLeadingWhitespace())
continue;
93 bool endOfLine = lookingAtEndOfLine();
94 if (endOfLine || lookingAtEndOfStatement()) {
95 if (tokCommand != Token::nada) {
96 if (!compileCommand(ltoken)) {
return false; }
97 aatokenCompiled.push_back(atokenCommand);
98 lineNumbers.push_back(lineCurrent);
99 lineIndices.push_back(ichCurrentCommand);
101 tokCommand = Token::nada;
104 if (ichToken < cchScript) {
105 if (endOfLine) ++lineCurrent;
111 if (tokCommand != Token::nada) {
112 if (lookingAtString()) {
113 std::string str = getUnescapedStringLiteral();
114 ltoken.push_back(Token(Token::string, str));
124 if (lookingAtDecimal((tokCommand) != 0)) {
125 float value = lexi_cast<float>(script.substr(ichToken, cchToken));
126 ltoken.push_back(Token(Token::decimal, std::any(value)));
130 if (lookingAtInteger((tokCommand) != 0)) {
131 int val = lexi_cast<int>(script.substr(ichToken, cchToken));
132 ltoken.push_back(Token(Token::integer, std::any(val)));
137 if (lookingAtLookupToken()) {
138 std::string ident = script.substr(ichToken, cchToken);
140 Token* pToken = TokenMap::getInstance().getToken(ident);
141 if (pToken != NULL) {
144 token = Token(Token::identifier, ident);
149 switch (tokCommand) {
151 ichCurrentCommand = ichToken;
154 if ((tokCommand & Token::command) == 0)
return commandExpected();
158 if (ltoken.size() == 1) {
160 if (tok != Token::identifier &&
161 (tok & Token::predefinedset) != Token::predefinedset)
162 return invalidExpressionToken(ident);
165 if (tok != Token::identifier &&
166 (tok & (Token::expression | Token::predefinedset)) == 0)
167 return invalidExpressionToken(ident);
173 if (tok != Token::identifier && (tok & Token::expression) == 0)
174 return invalidExpressionToken(ident);
177 ltoken.push_back(token);
181 if (ltoken.empty()) {
return commandExpected(); }
183 return unrecognizedToken();
189 bool SelectionCompiler::lookingAtLeadingWhitespace() {
191 while (ichT < cchScript && std::isspace(script[ichT])) {
194 cchToken = ichT - ichToken;
198 bool SelectionCompiler::lookingAtEndOfLine() {
199 if (ichToken == cchScript)
return true;
201 char ch = script[ichT];
204 if (ichT < cchScript && script[ichT] ==
'\n') ++ichT;
205 }
else if (ch ==
'\n') {
210 cchToken = ichT - ichToken;
214 bool SelectionCompiler::lookingAtEndOfStatement() {
215 if (ichToken == cchScript || script[ichToken] !=
';')
return false;
220 bool SelectionCompiler::lookingAtString() {
221 if (ichToken == cchScript)
return false;
222 if (script[ichToken] !=
'"')
return false;
228 int ichT = ichToken + 1;
231 bool previousCharBackslash =
false;
232 while (ichT < cchScript) {
234 if (ch ==
'"' && !previousCharBackslash)
break;
235 previousCharBackslash = ch ==
'\\' ? !previousCharBackslash :
false;
237 cchToken = ichT - ichToken;
242 std::string SelectionCompiler::getUnescapedStringLiteral() {
244 std::string sb(cchToken - 2,
' ');
246 int ichMax = ichToken + cchToken - 1;
247 int ich = ichToken + 1;
249 while (ich < ichMax) {
250 char ch = script[ich++];
251 if (ch ==
'\\' && ich < ichMax) {
272 int digitCount = ch ==
'x' ? 2 : 4;
275 for (
int k = digitCount; --k >= 0 && ich < ichMax;) {
276 char chT = script[ich];
277 int hexit = getHexitValue(chT);
278 if (hexit < 0)
break;
293 int SelectionCompiler::getHexitValue(
char ch) {
294 if (ch >=
'0' && ch <=
'9')
296 else if (ch >=
'a' && ch <=
'f')
297 return 10 + ch -
'a';
298 else if (ch >=
'A' && ch <=
'F')
299 return 10 + ch -
'A';
304 bool SelectionCompiler::lookingAtSpecialString() {
306 char ch = script[ichT];
307 while (ichT < cchScript && ch !=
';' && ch !=
'\r' && ch !=
'\n') {
310 cchToken = ichT - ichToken;
314 bool SelectionCompiler::lookingAtDecimal(
bool) {
315 if (ichToken == cchScript) {
return false; }
318 if (script[ichT] ==
'-') { ++ichT; }
319 bool digitSeen =
false;
321 while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
326 if (ichT == cchScript || ch !=
'.') {
return false; }
329 if (ch ==
'.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
334 while (ichT < cchScript && std::isdigit(script[ichT])) {
338 cchToken = ichT - ichToken;
342 bool SelectionCompiler::lookingAtInteger(
bool allowNegative) {
343 if (ichToken == cchScript) {
return false; }
345 if (allowNegative && script[ichToken] ==
'-') { ++ichT; }
346 int ichBeginDigits = ichT;
347 while (ichT < cchScript && std::isdigit(script[ichT])) {
350 if (ichBeginDigits == ichT) {
return false; }
351 cchToken = ichT - ichToken;
352 return isInteger(script.substr(ichToken, cchToken).c_str());
355 bool SelectionCompiler::lookingAtLookupToken() {
356 if (ichToken == cchScript) {
return false; }
360 switch (ch = script[ichT++]) {
369 if (ichT < cchScript && script[ichT] == ch) { ++ichT; }
374 if (ichT < cchScript &&
375 ((ch = script[ichT]) ==
'<' || ch ==
'=' || ch ==
'>')) {
381 if (ichT < cchScript && script[ichT] ==
'=') { ++ichT; }
384 if ((ch <
'a' || ch >
'z') && (ch <
'A' && ch >
'Z') && ch !=
'_') {
390 while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
391 (std::isalpha(ch) || std::isdigit(ch) || ch ==
'_' || ch ==
'.' ||
392 ch ==
'*' || ch ==
'?' || ch ==
'+' || ch ==
'-' || ch ==
'[' ||
399 cchToken = ichT - ichToken;
404 bool SelectionCompiler::compileCommand(
const std::vector<Token>& ltoken) {
405 const Token& tokenCommand = ltoken[0];
406 int tokCommand = tokenCommand.tok;
408 atokenCommand = ltoken;
409 if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
416 bool SelectionCompiler::compileExpression() {
419 int tokCommand = atokenCommand[0].tok;
420 if (tokCommand == Token::define) {
422 }
else if ((tokCommand & Token::embeddedExpression) != 0) {
424 while (i < atokenCommand.size() &&
425 atokenCommand[i].tok != Token::leftparen)
429 if (i >= atokenCommand.size()) {
return true; }
430 return compileExpression(i);
433 bool SelectionCompiler::addTokenToPostfix(
const Token& token) {
434 ltokenPostfix.push_back(token);
438 bool SelectionCompiler::compileExpression(
int itoken) {
439 ltokenPostfix.clear();
440 for (
int i = 0; i < itoken; ++i) {
441 addTokenToPostfix(atokenCommand[i]);
444 atokenInfix = atokenCommand;
445 itokenInfix = itoken;
447 addTokenToPostfix(Token::tokenExpressionBegin);
448 if (!clauseOr()) {
return false; }
450 addTokenToPostfix(Token::tokenExpressionEnd);
451 if (itokenInfix != atokenInfix.size()) {
return endOfExpressionExpected(); }
453 atokenCommand = ltokenPostfix;
457 Token SelectionCompiler::tokenNext() {
458 if (itokenInfix == atokenInfix.size()) {
return Token(); }
459 return atokenInfix[itokenInfix++];
462 std::any SelectionCompiler::valuePeek() {
463 if (itokenInfix == atokenInfix.size()) {
466 return atokenInfix[itokenInfix].value;
470 int SelectionCompiler::tokPeek() {
471 if (itokenInfix == atokenInfix.size()) {
474 return atokenInfix[itokenInfix].tok;
478 bool SelectionCompiler::clauseOr() {
479 if (!clauseAnd()) {
return false; }
481 while (tokPeek() == Token::opOr) {
482 Token tokenOr = tokenNext();
483 if (!clauseAnd()) {
return false; }
484 addTokenToPostfix(tokenOr);
489 bool SelectionCompiler::clauseAnd() {
490 if (!clauseNot()) {
return false; }
492 while (tokPeek() == Token::opAnd) {
493 Token tokenAnd = tokenNext();
494 if (!clauseNot()) {
return false; }
495 addTokenToPostfix(tokenAnd);
500 bool SelectionCompiler::clauseNot() {
501 if (tokPeek() == Token::opNot) {
502 Token tokenNot = tokenNext();
503 if (!clauseNot()) {
return false; }
504 return addTokenToPostfix(tokenNot);
506 return clausePrimitive();
509 bool SelectionCompiler::clausePrimitive() {
513 return clauseWithin();
515 case Token::alphahull:
516 return clauseAlphaHull();
518 case Token::asterisk:
519 case Token::identifier:
520 return clauseChemObjName();
523 return clauseIndex();
525 if ((tok & Token::atomproperty) == Token::atomproperty) {
526 return clauseComparator();
528 if ((tok & Token::predefinedset) != Token::predefinedset) {
break; }
534 return addTokenToPostfix(tokenNext());
535 case Token::leftparen:
537 if (!clauseOr()) {
return false; }
538 if (tokenNext().tok != Token::rightparen) {
539 return rightParenthesisExpected();
543 return unrecognizedExpressionToken();
546 bool SelectionCompiler::clauseComparator() {
547 Token tokenAtomProperty = tokenNext();
548 Token tokenComparator = tokenNext();
549 if ((tokenComparator.tok & Token::comparator) == 0) {
550 return comparisonOperatorExpected();
553 Token tokenValue = tokenNext();
554 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
555 return numberExpected();
559 if (tokenValue.value.type() ==
typeid(
int)) {
560 val = std::any_cast<int>(tokenValue.value);
561 }
else if (tokenValue.value.type() ==
typeid(
float)) {
562 val = std::any_cast<float>(tokenValue.value);
569 return addTokenToPostfix(
570 Token(tokenComparator.tok, tokenAtomProperty.tok, floatVal));
573 bool SelectionCompiler::clauseWithin() {
575 if (tokenNext().tok != Token::leftparen) {
576 return leftParenthesisExpected();
580 Token tokenDistance = tokenNext();
581 switch (tokenDistance.tok) {
587 return numberOrKeywordExpected();
590 if (tokenNext().tok != Token::opOr) {
591 return commaExpected();
598 if (tokenNext().tok != Token::rightparen) {
599 return rightParenthesisExpected();
602 return addTokenToPostfix(Token(Token::within,
distance));
605 bool SelectionCompiler::clauseAlphaHull() {
607 if (tokenNext().tok != Token::leftparen) {
608 return leftParenthesisExpected();
612 Token tokenAlpha = tokenNext();
613 switch (tokenAlpha.tok) {
616 alpha = tokenAlpha.value;
619 return numberOrKeywordExpected();
622 if (tokenNext().tok != Token::rightparen) {
623 return rightParenthesisExpected();
626 return addTokenToPostfix(Token(Token::alphahull, alpha));
629 bool SelectionCompiler::clauseChemObjName() {
630 Token token = tokenNext();
631 if (token.tok == Token::identifier &&
632 token.value.type() ==
typeid(std::string)) {
633 std::string name = std::any_cast<std::string>(token.value);
634 if (isNameValid(name)) {
635 return addTokenToPostfix(Token(Token::name, name));
637 return compileError(
"invalid name: " + name);
644 bool SelectionCompiler::isNameValid(
const std::string& name) {
647 for (
unsigned int i = 0; i < name.size(); ++i) {
662 return (ndot <= 3 && nbracket == 0) ? true :
false;
665 bool SelectionCompiler::clauseIndex() {
666 Token token = tokenNext();
667 if (token.tok == Token::integer) {
668 int index = std::any_cast<int>(token.value);
670 if (tok == Token::to) {
673 if (tok != Token::integer) {
return numberExpected(); }
675 std::any intVal = tokenNext().value;
677 if (intVal.type() !=
typeid(
int)) {
return false; }
678 int second = std::any_cast<int>(intVal);
680 return addTokenToPostfix(
681 Token(Token::index, std::any(std::make_pair(first, second))));
684 return addTokenToPostfix(Token(Token::index, std::any(index)));
687 return numberExpected();
This basic Periodic Table class was originally taken from the data.cpp file in OpenBabel.
Real distance(const DynamicVector< Real > &v1, const DynamicVector< Real > &v2)
Returns the distance between two DynamicVectors.