OpenMD 3.1
Molecular Dynamics in the Open
Loading...
Searching...
No Matches
SelectionCompiler.cpp
1/*
2 * Copyright (c) 2004-present, The University of Notre Dame. All rights
3 * reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 * SUPPORT OPEN SCIENCE! If you use OpenMD or its source code in your
32 * research, please cite the appropriate papers when you publish your
33 * work. Good starting points are:
34 *
35 * [1] Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).
36 * [2] Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).
37 * [3] Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).
38 * [4] Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
39 * [5] Kuang & Gezelter, Mol. Phys., 110, 691-701 (2012).
40 * [6] Lamichhane, Gezelter & Newman, J. Chem. Phys. 141, 134109 (2014).
41 * [7] Lamichhane, Newman & Gezelter, J. Chem. Phys. 141, 134110 (2014).
42 * [8] Bhattarai, Newman & Gezelter, Phys. Rev. B 99, 094106 (2019).
43 */
44
45#include "selection/SelectionCompiler.hpp"
46
47#include <any>
48#include <string>
49
50#include "utils/StringUtils.hpp"
51
52namespace OpenMD {
53
54 bool SelectionCompiler::compile(const std::string& filename,
55 const std::string& script) {
56 this->filename = filename;
57 this->script = script;
58 lineNumbers.clear();
59 lineIndices.clear();
60 aatokenCompiled.clear();
61
62 if (internalCompile()) { return true; }
63
64 std::size_t icharEnd;
65 if ((icharEnd = script.find('\r', ichCurrentCommand)) ==
66 std::string::npos &&
67 (icharEnd = script.find('\n', ichCurrentCommand)) ==
68 std::string::npos) {
69 icharEnd = script.size();
70 }
71 errorLine = script.substr(ichCurrentCommand, icharEnd);
72 return false;
73 }
74
75 bool SelectionCompiler::internalCompile() {
76 cchScript = script.size();
77 ichToken = 0;
78 lineCurrent = 1;
79
80 error = false;
81
82 // std::vector<Token> lltoken;
83 aatokenCompiled.clear();
84 std::vector<Token> ltoken;
85
86 Token tokenCommand;
87 int tokCommand = Token::nada;
88
89 for (; true; ichToken += cchToken) {
90 if (lookingAtLeadingWhitespace()) continue;
91 // if (lookingAtComment())
92 // continue;
93 bool endOfLine = lookingAtEndOfLine();
94 if (endOfLine || lookingAtEndOfStatement()) {
95 if (tokCommand != Token::nada) {
96 if (!compileCommand(ltoken)) { return false; }
97 aatokenCompiled.push_back(atokenCommand);
98 lineNumbers.push_back(lineCurrent);
99 lineIndices.push_back(ichCurrentCommand);
100 ltoken.clear();
101 tokCommand = Token::nada;
102 }
103
104 if (ichToken < cchScript) {
105 if (endOfLine) ++lineCurrent;
106 continue;
107 }
108 break;
109 }
110
111 if (tokCommand != Token::nada) {
112 if (lookingAtString()) {
113 std::string str = getUnescapedStringLiteral();
114 ltoken.push_back(Token(Token::string, str));
115 continue;
116 }
117 // if ((tokCommand & Token::specialstring) != 0 &&
118 // lookingAtSpecialString()) {
119 // std::string str = script.substr(ichToken, ichToken + cchToken);
120 // ltoken.push_back(Token(Token::string, str));
121 // continue;
122 //}
123 // if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
124 if (lookingAtDecimal((tokCommand) != 0)) {
125 float value = lexi_cast<float>(script.substr(ichToken, cchToken));
126 ltoken.push_back(Token(Token::decimal, std::any(value)));
127 continue;
128 }
129 // if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
130 if (lookingAtInteger((tokCommand) != 0)) {
131 int val = lexi_cast<int>(script.substr(ichToken, cchToken));
132 ltoken.push_back(Token(Token::integer, std::any(val)));
133 continue;
134 }
135 }
136
137 if (lookingAtLookupToken()) {
138 std::string ident = script.substr(ichToken, cchToken);
139 Token token;
140 Token* pToken = TokenMap::getInstance().getToken(ident);
141 if (pToken != NULL) {
142 token = *pToken;
143 } else {
144 token = Token(Token::identifier, ident);
145 }
146
147 int tok = token.tok;
148
149 switch (tokCommand) {
150 case Token::nada:
151 ichCurrentCommand = ichToken;
152 // tokenCommand = token;
153 tokCommand = tok;
154 if ((tokCommand & Token::command) == 0) return commandExpected();
155 break;
156
157 case Token::define:
158 if (ltoken.size() == 1) {
159 // we are looking at the variable name
160 if (tok != Token::identifier &&
161 (tok & Token::predefinedset) != Token::predefinedset)
162 return invalidExpressionToken(ident);
163 } else {
164 // we are looking at the expression
165 if (tok != Token::identifier &&
166 (tok & (Token::expression | Token::predefinedset)) == 0)
167 return invalidExpressionToken(ident);
168 }
169
170 break;
171
172 case Token::select:
173 if (tok != Token::identifier && (tok & Token::expression) == 0)
174 return invalidExpressionToken(ident);
175 break;
176 }
177 ltoken.push_back(token);
178 continue;
179 }
180
181 if (ltoken.empty()) { return commandExpected(); }
182
183 return unrecognizedToken();
184 }
185
186 return true;
187 }
188
189 bool SelectionCompiler::lookingAtLeadingWhitespace() {
190 int ichT = ichToken;
191 while (ichT < cchScript && std::isspace(script[ichT])) {
192 ++ichT;
193 }
194 cchToken = ichT - ichToken;
195 return cchToken > 0;
196 }
197
198 bool SelectionCompiler::lookingAtEndOfLine() {
199 if (ichToken == cchScript) return true;
200 int ichT = ichToken;
201 char ch = script[ichT];
202 if (ch == '\r') {
203 ++ichT;
204 if (ichT < cchScript && script[ichT] == '\n') ++ichT;
205 } else if (ch == '\n') {
206 ++ichT;
207 } else {
208 return false;
209 }
210 cchToken = ichT - ichToken;
211 return true;
212 }
213
214 bool SelectionCompiler::lookingAtEndOfStatement() {
215 if (ichToken == cchScript || script[ichToken] != ';') return false;
216 cchToken = 1;
217 return true;
218 }
219
220 bool SelectionCompiler::lookingAtString() {
221 if (ichToken == cchScript) return false;
222 if (script[ichToken] != '"') return false;
223 // remove support for single quote
224 // in order to use it in atom expressions
225 // char chFirst = script.charAt(ichToken);
226 // if (chFirst != '"' && chFirst != '\'')
227 // return false;
228 int ichT = ichToken + 1;
229 // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
230 char ch;
231 bool previousCharBackslash = false;
232 while (ichT < cchScript) {
233 ch = script[ichT++];
234 if (ch == '"' && !previousCharBackslash) break;
235 previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
236 }
237 cchToken = ichT - ichToken;
238
239 return true;
240 }
241
242 std::string SelectionCompiler::getUnescapedStringLiteral() {
243 /** @todo */
244 std::string sb(cchToken - 2, ' ');
245
246 int ichMax = ichToken + cchToken - 1;
247 int ich = ichToken + 1;
248
249 while (ich < ichMax) {
250 char ch = script[ich++];
251 if (ch == '\\' && ich < ichMax) {
252 ch = script[ich++];
253 switch (ch) {
254 case 'b':
255 ch = '\b';
256 break;
257 case 'n':
258 ch = '\n';
259 break;
260 case 't':
261 ch = '\t';
262 break;
263 case 'r':
264 ch = '\r';
265 // fall into
266 case '"':
267 case '\\':
268 case '\'':
269 break;
270 case 'x':
271 case 'u':
272 int digitCount = ch == 'x' ? 2 : 4;
273 if (ich < ichMax) {
274 int unicode = 0;
275 for (int k = digitCount; --k >= 0 && ich < ichMax;) {
276 char chT = script[ich];
277 int hexit = getHexitValue(chT);
278 if (hexit < 0) break;
279 unicode <<= 4;
280 unicode += hexit;
281 ++ich;
282 }
283 ch = (char)unicode;
284 }
285 }
286 }
287 sb.append(1, ch);
288 }
289
290 return sb;
291 }
292
293 int SelectionCompiler::getHexitValue(char ch) {
294 if (ch >= '0' && ch <= '9')
295 return ch - '0';
296 else if (ch >= 'a' && ch <= 'f')
297 return 10 + ch - 'a';
298 else if (ch >= 'A' && ch <= 'F')
299 return 10 + ch - 'A';
300 else
301 return -1;
302 }
303
304 bool SelectionCompiler::lookingAtSpecialString() {
305 int ichT = ichToken;
306 char ch = script[ichT];
307 while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
308 ++ichT;
309 }
310 cchToken = ichT - ichToken;
311 return cchToken > 0;
312 }
313
314 bool SelectionCompiler::lookingAtDecimal(bool) {
315 if (ichToken == cchScript) { return false; }
316
317 int ichT = ichToken;
318 if (script[ichT] == '-') { ++ichT; }
319 bool digitSeen = false;
320 char ch = 'X';
321 while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
322 ++ichT;
323 digitSeen = true;
324 }
325
326 if (ichT == cchScript || ch != '.') { return false; }
327
328 // to support DMPC.1, let's check the character before the dot
329 if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
330 return false;
331 }
332
333 ++ichT;
334 while (ichT < cchScript && std::isdigit(script[ichT])) {
335 ++ichT;
336 digitSeen = true;
337 }
338 cchToken = ichT - ichToken;
339 return digitSeen;
340 }
341
342 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
343 if (ichToken == cchScript) { return false; }
344 int ichT = ichToken;
345 if (allowNegative && script[ichToken] == '-') { ++ichT; }
346 int ichBeginDigits = ichT;
347 while (ichT < cchScript && std::isdigit(script[ichT])) {
348 ++ichT;
349 }
350 if (ichBeginDigits == ichT) { return false; }
351 cchToken = ichT - ichToken;
352 return isInteger(script.substr(ichToken, cchToken).c_str());
353 }
354
355 bool SelectionCompiler::lookingAtLookupToken() {
356 if (ichToken == cchScript) { return false; }
357
358 int ichT = ichToken;
359 char ch;
360 switch (ch = script[ichT++]) {
361 case '(':
362 case ')':
363 case ',':
364 case '[':
365 case ']':
366 break;
367 case '&':
368 case '|':
369 if (ichT < cchScript && script[ichT] == ch) { ++ichT; }
370 break;
371 case '<':
372 case '=':
373 case '>':
374 if (ichT < cchScript &&
375 ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
376 ++ichT;
377 }
378 break;
379 case '/':
380 case '!':
381 if (ichT < cchScript && script[ichT] == '=') { ++ichT; }
382 break;
383 default:
384 if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
385 return false;
386 }
387 [[fallthrough]];
388 case '*':
389 case '?': // include question marks in identifier for atom expressions
390 while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
391 (std::isalpha(ch) || std::isdigit(ch) || ch == '_' || ch == '.' ||
392 ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' ||
393 ch == ']')) {
394 ++ichT;
395 }
396 break;
397 }
398
399 cchToken = ichT - ichToken;
400
401 return true;
402 }
403
404 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
405 const Token& tokenCommand = ltoken[0];
406 int tokCommand = tokenCommand.tok;
407
408 atokenCommand = ltoken;
409 if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
410 return false;
411 }
412
413 return true;
414 }
415
416 bool SelectionCompiler::compileExpression() {
417 /** todo */
418 unsigned int i = 1;
419 int tokCommand = atokenCommand[0].tok;
420 if (tokCommand == Token::define) {
421 i = 2;
422 } else if ((tokCommand & Token::embeddedExpression) != 0) {
423 // look for the open parenthesis
424 while (i < atokenCommand.size() &&
425 atokenCommand[i].tok != Token::leftparen)
426 ++i;
427 }
428
429 if (i >= atokenCommand.size()) { return true; }
430 return compileExpression(i);
431 }
432
433 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
434 ltokenPostfix.push_back(token);
435 return true;
436 }
437
438 bool SelectionCompiler::compileExpression(int itoken) {
439 ltokenPostfix.clear();
440 for (int i = 0; i < itoken; ++i) {
441 addTokenToPostfix(atokenCommand[i]);
442 }
443
444 atokenInfix = atokenCommand;
445 itokenInfix = itoken;
446
447 addTokenToPostfix(Token::tokenExpressionBegin);
448 if (!clauseOr()) { return false; }
449
450 addTokenToPostfix(Token::tokenExpressionEnd);
451 if (itokenInfix != atokenInfix.size()) { return endOfExpressionExpected(); }
452
453 atokenCommand = ltokenPostfix;
454 return true;
455 }
456
457 Token SelectionCompiler::tokenNext() {
458 if (itokenInfix == atokenInfix.size()) { return Token(); }
459 return atokenInfix[itokenInfix++];
460 }
461
462 std::any SelectionCompiler::valuePeek() {
463 if (itokenInfix == atokenInfix.size()) {
464 return std::any();
465 } else {
466 return atokenInfix[itokenInfix].value;
467 }
468 }
469
470 int SelectionCompiler::tokPeek() {
471 if (itokenInfix == atokenInfix.size()) {
472 return 0;
473 } else {
474 return atokenInfix[itokenInfix].tok;
475 }
476 }
477
478 bool SelectionCompiler::clauseOr() {
479 if (!clauseAnd()) { return false; }
480
481 while (tokPeek() == Token::opOr) {
482 Token tokenOr = tokenNext();
483 if (!clauseAnd()) { return false; }
484 addTokenToPostfix(tokenOr);
485 }
486 return true;
487 }
488
489 bool SelectionCompiler::clauseAnd() {
490 if (!clauseNot()) { return false; }
491
492 while (tokPeek() == Token::opAnd) {
493 Token tokenAnd = tokenNext();
494 if (!clauseNot()) { return false; }
495 addTokenToPostfix(tokenAnd);
496 }
497 return true;
498 }
499
500 bool SelectionCompiler::clauseNot() {
501 if (tokPeek() == Token::opNot) {
502 Token tokenNot = tokenNext();
503 if (!clauseNot()) { return false; }
504 return addTokenToPostfix(tokenNot);
505 }
506 return clausePrimitive();
507 }
508
509 bool SelectionCompiler::clausePrimitive() {
510 int tok = tokPeek();
511 switch (tok) {
512 case Token::within:
513 return clauseWithin();
514
515 case Token::alphahull:
516 return clauseAlphaHull();
517
518 case Token::asterisk:
519 case Token::identifier:
520 return clauseChemObjName();
521
522 case Token::integer:
523 return clauseIndex();
524 default:
525 if ((tok & Token::atomproperty) == Token::atomproperty) {
526 return clauseComparator();
527 }
528 if ((tok & Token::predefinedset) != Token::predefinedset) { break; }
529 // fall into the code and below and just add the token
530 [[fallthrough]];
531 case Token::all:
532 case Token::none:
533 case Token::hull:
534 return addTokenToPostfix(tokenNext());
535 case Token::leftparen:
536 tokenNext();
537 if (!clauseOr()) { return false; }
538 if (tokenNext().tok != Token::rightparen) {
539 return rightParenthesisExpected();
540 }
541 return true;
542 }
543 return unrecognizedExpressionToken();
544 }
545
546 bool SelectionCompiler::clauseComparator() {
547 Token tokenAtomProperty = tokenNext();
548 Token tokenComparator = tokenNext();
549 if ((tokenComparator.tok & Token::comparator) == 0) {
550 return comparisonOperatorExpected();
551 }
552
553 Token tokenValue = tokenNext();
554 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
555 return numberExpected();
556 }
557
558 float val;
559 if (tokenValue.value.type() == typeid(int)) {
560 val = std::any_cast<int>(tokenValue.value);
561 } else if (tokenValue.value.type() == typeid(float)) {
562 val = std::any_cast<float>(tokenValue.value);
563 } else {
564 return false;
565 }
566
567 std::any floatVal;
568 floatVal = val;
569 return addTokenToPostfix(
570 Token(tokenComparator.tok, tokenAtomProperty.tok, floatVal));
571 }
572
573 bool SelectionCompiler::clauseWithin() {
574 tokenNext(); // WITHIN
575 if (tokenNext().tok != Token::leftparen) { // (
576 return leftParenthesisExpected();
577 }
578
579 std::any distance;
580 Token tokenDistance = tokenNext(); // distance
581 switch (tokenDistance.tok) {
582 case Token::integer:
583 case Token::decimal:
584 distance = tokenDistance.value;
585 break;
586 default:
587 return numberOrKeywordExpected();
588 }
589
590 if (tokenNext().tok != Token::opOr) { // ,
591 return commaExpected();
592 }
593
594 if (!clauseOr()) { // *expression*
595 return false;
596 }
597
598 if (tokenNext().tok != Token::rightparen) { // )T
599 return rightParenthesisExpected();
600 }
601
602 return addTokenToPostfix(Token(Token::within, distance));
603 }
604
605 bool SelectionCompiler::clauseAlphaHull() {
606 tokenNext(); // alphaHull
607 if (tokenNext().tok != Token::leftparen) { // (
608 return leftParenthesisExpected();
609 }
610
611 std::any alpha;
612 Token tokenAlpha = tokenNext(); // alpha
613 switch (tokenAlpha.tok) {
614 case Token::integer:
615 case Token::decimal:
616 alpha = tokenAlpha.value;
617 break;
618 default:
619 return numberOrKeywordExpected();
620 }
621
622 if (tokenNext().tok != Token::rightparen) { // )T
623 return rightParenthesisExpected();
624 }
625
626 return addTokenToPostfix(Token(Token::alphahull, alpha));
627 }
628
629 bool SelectionCompiler::clauseChemObjName() {
630 Token token = tokenNext();
631 if (token.tok == Token::identifier &&
632 token.value.type() == typeid(std::string)) {
633 std::string name = std::any_cast<std::string>(token.value);
634 if (isNameValid(name)) {
635 return addTokenToPostfix(Token(Token::name, name));
636 } else {
637 return compileError("invalid name: " + name);
638 }
639 }
640
641 return false;
642 }
643
644 bool SelectionCompiler::isNameValid(const std::string& name) {
645 int nbracket = 0;
646 int ndot = 0;
647 for (unsigned int i = 0; i < name.size(); ++i) {
648 switch (name[i]) {
649 case '[':
650 ++nbracket;
651 break;
652 case ']':
653 --nbracket;
654 break;
655 case '.':
656 ++ndot;
657 break;
658 }
659 }
660
661 // only allow 3 dots at most
662 return (ndot <= 3 && nbracket == 0) ? true : false;
663 }
664
665 bool SelectionCompiler::clauseIndex() {
666 Token token = tokenNext();
667 if (token.tok == Token::integer) {
668 int index = std::any_cast<int>(token.value);
669 int tok = tokPeek();
670 if (tok == Token::to) {
671 tokenNext();
672 tok = tokPeek();
673 if (tok != Token::integer) { return numberExpected(); }
674
675 std::any intVal = tokenNext().value;
676 int first = index;
677 if (intVal.type() != typeid(int)) { return false; }
678 int second = std::any_cast<int>(intVal);
679
680 return addTokenToPostfix(
681 Token(Token::index, std::any(std::make_pair(first, second))));
682
683 } else {
684 return addTokenToPostfix(Token(Token::index, std::any(index)));
685 }
686 } else {
687 return numberExpected();
688 }
689 }
690} // namespace OpenMD
This basic Periodic Table class was originally taken from the data.cpp file in OpenBabel.
Real distance(const DynamicVector< Real > &v1, const DynamicVector< Real > &v2)
Returns the distance between two DynamicVectors.