OpenMD 3.1
Molecular Dynamics in the Open
Loading...
Searching...
No Matches
CharScanner.hpp
1#ifndef INC_CharScanner_hpp__
2#define INC_CharScanner_hpp__
3
4/* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 *
8 * $Id$
9 */
10
11#include <antlr/config.hpp>
12#include <cstdio>
13#include <map>
14#include <cstring>
15
16#ifdef HAS_NOT_CCTYPE_H
17#include <ctype.h>
18#else
19#include <cctype>
20#endif
21
22#if ( _MSC_VER == 1200 )
23// VC6 seems to need this
24// note that this is not a standard C++ include file.
25# include <stdio.h>
26#endif
27
28#include <antlr/TokenStream.hpp>
29#include <antlr/RecognitionException.hpp>
30#include <antlr/SemanticException.hpp>
31#include <antlr/MismatchedCharException.hpp>
32#include <antlr/InputBuffer.hpp>
33#include <antlr/BitSet.hpp>
34#include <antlr/LexerSharedInputState.hpp>
35
36#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37namespace antlr {
38#endif
39
40class ANTLR_API CharScanner;
41
42ANTLR_C_USING(tolower)
43
44#ifdef ANTLR_REALLY_NO_STRCASECMP
45// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
46// on the mac has neither...
47inline int strcasecmp(const char *s1, const char *s2)
48{
49 while (true)
50 {
51 char c1 = tolower(*s1++),
52 c2 = tolower(*s2++);
53 if (c1 < c2) return -1;
54 if (c1 > c2) return 1;
55 if (c1 == 0) return 0;
56 }
57}
58#else
59#ifdef NO_STRCASECMP
60ANTLR_C_USING(stricmp)
61#else
62ANTLR_C_USING(strcasecmp)
63#endif
64#endif
65
66/** Functor for the literals map
67 */
68class ANTLR_API CharScannerLiteralsLess {
69private:
70 const CharScanner* scanner;
71public:
72#ifdef NO_TEMPLATE_PARTS
73 CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74#endif
75 CharScannerLiteralsLess(const CharScanner* theScanner)
76 : scanner(theScanner)
77 {
78 }
79
80 using result_type = bool;
81 using first_argument_type = ANTLR_USE_NAMESPACE(std)string;
82 using second_argument_type = ANTLR_USE_NAMESPACE(std)string;
83
84 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
85
86 // defaults are good enough..
87 // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
88 // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
89};
90
91/** Superclass of generated lexers
92 */
93class ANTLR_API CharScanner : public TokenStream {
94protected:
95 typedef RefToken (*factory_type)();
96public:
97 CharScanner(InputBuffer& cb, bool case_sensitive );
98 CharScanner(InputBuffer* cb, bool case_sensitive );
99 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
100
101 virtual ~CharScanner()
102 {
103 }
104
105 virtual int LA(unsigned int i);
106
107 virtual void append(char c)
108 {
109 if (saveConsumedInput)
110 {
111 size_t l = text.length();
112
113 if ((l%256) == 0)
114 text.reserve(l+256);
115
116 text.replace(l,0,&c,1);
117 }
118 }
119
120 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
121 {
122 if( saveConsumedInput )
123 text += s;
124 }
125
126 virtual void commit()
127 {
128 inputState->getInput().commit();
129 }
130
131 /** called by the generated lexer to do error recovery, override to
132 * customize the behaviour.
133 */
134 virtual void recover(const RecognitionException&, const BitSet& tokenSet)
135 {
136 consume();
137 consumeUntil(tokenSet);
138 }
139
140 virtual void consume()
141 {
142 if (inputState->guessing == 0)
143 {
144 int c = LA(1);
145 if (caseSensitive)
146 {
147 append(c);
148 }
149 else
150 {
151 // use input.LA(), not LA(), to get original case
152 // CharScanner.LA() would toLower it.
153 append(inputState->getInput().LA(1));
154 }
155
156 // RK: in a sense I don't like this automatic handling.
157 if (c == '\t')
158 tab();
159 else
160 inputState->column++;
161 }
162 inputState->getInput().consume();
163 }
164
165 /** Consume chars until one matches the given char */
166 virtual void consumeUntil(int c)
167 {
168 for(;;)
169 {
170 int la_1 = LA(1);
171 if( la_1 == EOF_CHAR || la_1 == c )
172 break;
173 consume();
174 }
175 }
176
177 /** Consume chars until one matches the given set */
178 virtual void consumeUntil(const BitSet& set)
179 {
180 for(;;)
181 {
182 int la_1 = LA(1);
183 if( la_1 == EOF_CHAR || set.member(la_1) )
184 break;
185 consume();
186 }
187 }
188
189 /// Mark the current position and return a id for it
190 virtual unsigned int mark()
191 {
192 return inputState->getInput().mark();
193 }
194 /// Rewind the scanner to a previously marked position
195 virtual void rewind(unsigned int pos)
196 {
197 inputState->getInput().rewind(pos);
198 }
199
200 /// See if input contains character 'c' throw MismatchedCharException if not
201 virtual void match(int c)
202 {
203 int la_1 = LA(1);
204 if ( la_1 != c )
205 throw MismatchedCharException(la_1, c, false, this);
206 consume();
207 }
208
209 /** See if input contains element from bitset b
210 * throw MismatchedCharException if not
211 */
212 virtual void match(const BitSet& b)
213 {
214 int la_1 = LA(1);
215
216 if ( !b.member(la_1) )
217 throw MismatchedCharException( la_1, b, false, this );
218 consume();
219 }
220
221 /** See if input contains string 's' throw MismatchedCharException if not
222 * @note the string cannot match EOF
223 */
224 virtual void match( const char* s )
225 {
226 while( *s != '\0' )
227 {
228 // the & 0xFF is here to prevent sign extension lateron
229 int la_1 = LA(1), c = (*s++ & 0xFF);
230
231 if ( la_1 != c )
232 throw MismatchedCharException(la_1, c, false, this);
233
234 consume();
235 }
236 }
237 /** See if input contains string 's' throw MismatchedCharException if not
238 * @note the string cannot match EOF
239 */
240 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
241 {
242 size_t len = s.length();
243
244 for (size_t i = 0; i < len; i++)
245 {
246 // the & 0xFF is here to prevent sign extension lateron
247 int la_1 = LA(1), c = (s[i] & 0xFF);
248
249 if ( la_1 != c )
250 throw MismatchedCharException(la_1, c, false, this);
251
252 consume();
253 }
254 }
255 /** See if input does not contain character 'c'
256 * throw MismatchedCharException if not
257 */
258 virtual void matchNot(int c)
259 {
260 int la_1 = LA(1);
261
262 if ( la_1 == c )
263 throw MismatchedCharException(la_1, c, true, this);
264
265 consume();
266 }
267 /** See if input contains character in range c1-c2
268 * throw MismatchedCharException if not
269 */
270 virtual void matchRange(int c1, int c2)
271 {
272 int la_1 = LA(1);
273
274 if ( la_1 < c1 || la_1 > c2 )
275 throw MismatchedCharException(la_1, c1, c2, false, this);
276
277 consume();
278 }
279
280 virtual bool getCaseSensitive() const
281 {
282 return caseSensitive;
283 }
284
285 virtual void setCaseSensitive(bool t)
286 {
287 caseSensitive = t;
288 }
289
290 virtual bool getCaseSensitiveLiterals() const=0;
291
292 /// Get the line the scanner currently is in (starts at 1)
293 virtual int getLine() const
294 {
295 return inputState->line;
296 }
297
298 /// set the line number
299 virtual void setLine(int l)
300 {
301 inputState->line = l;
302 }
303
304 /// Get the column the scanner currently is in (starts at 1)
305 virtual int getColumn() const
306 {
307 return inputState->column;
308 }
309 /// set the column number
310 virtual void setColumn(int c)
311 {
312 inputState->column = c;
313 }
314
315 /// get the filename for the file currently used
316 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
317 {
318 return inputState->filename;
319 }
320 /// Set the filename the scanner is using (used in error messages)
321 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
322 {
323 inputState->filename = f;
324 }
325
326 virtual bool getCommitToPath() const
327 {
328 return commitToPath;
329 }
330
331 virtual void setCommitToPath(bool commit)
332 {
333 commitToPath = commit;
334 }
335
336 /** return a copy of the current text buffer */
337 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
338 {
339 return text;
340 }
341
342 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
343 {
344 text = s;
345 }
346
347 virtual void resetText()
348 {
349 text = "";
350 inputState->tokenStartColumn = inputState->column;
351 inputState->tokenStartLine = inputState->line;
352 }
353
354 virtual RefToken getTokenObject() const
355 {
356 return _returnToken;
357 }
358
359 /** Used to keep track of line breaks, needs to be called from
360 * within generated lexers when a \n \r is encountered.
361 */
362 virtual void newline()
363 {
364 ++inputState->line;
365 inputState->column = 1;
366 }
367
368 /** Advance the current column number by an appropriate amount according
369 * to the tabsize. This method needs to be explicitly called from the
370 * lexer rules encountering tabs.
371 */
372 virtual void tab()
373 {
374 int c = getColumn();
375 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
376 setColumn( nc );
377 }
378 /// set the tabsize. Returns the old tabsize
379 int setTabsize( int size )
380 {
381 int oldsize = tabsize;
382 tabsize = size;
383 return oldsize;
384 }
385 /// Return the tabsize used by the scanner
386 int getTabSize() const
387 {
388 return tabsize;
389 }
390
391 /** Report exception errors caught in nextToken() */
392 virtual void reportError(const RecognitionException& e);
393
394 /** Parser error-reporting function can be overridden in subclass */
395 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
396
397 /** Parser warning-reporting function can be overridden in subclass */
398 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
399
400 virtual InputBuffer& getInputBuffer()
401 {
402 return inputState->getInput();
403 }
404
405 virtual LexerSharedInputState getInputState()
406 {
407 return inputState;
408 }
409
410 /** set the input state for the lexer.
411 * @note state is a reference counted object, hence no reference */
413 {
414 inputState = state;
415 }
416
417 /// Set the factory for created tokens
418 virtual void setTokenObjectFactory(factory_type factory)
419 {
420 tokenFactory = factory;
421 }
422
423 /** Test the token text against the literals table
424 * Override this method to perform a different literals test
425 */
426 virtual int testLiteralsTable(int ttype) const
427 {
428 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
429 if (i != literals.end())
430 ttype = (*i).second;
431 return ttype;
432 }
433
434 /** Test the text passed in against the literals table
435 * Override this method to perform a different literals test
436 * This is used primarily when you want to test a portion of
437 * a token
438 */
439 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
440 {
441 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
442 if (i != literals.end())
443 ttype = (*i).second;
444 return ttype;
445 }
446
447 /// Override this method to get more specific case handling
448 virtual int toLower(int c) const
449 {
450 // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
451 // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
452 // this one is more structural. Maybe make this configurable.
453 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
454 }
455
456 /** This method is called by YourLexer::nextToken() when the lexer has
457 * hit EOF condition. EOF is NOT a character.
458 * This method is not called if EOF is reached during
459 * syntactic predicate evaluation or during evaluation
460 * of normal lexical rules, which presumably would be
461 * an IOException. This traps the "normal" EOF condition.
462 *
463 * uponEOF() is called after the complete evaluation of
464 * the previous token and only if your parser asks
465 * for another token beyond that last non-EOF token.
466 *
467 * You might want to throw token or char stream exceptions
468 * like: "Heh, premature eof" or a retry stream exception
469 * ("I found the end of this file, go back to referencing file").
470 */
471 virtual void uponEOF()
472 {
473 }
474
475 /// Methods used to change tracing behavior
476 virtual void traceIndent();
477 virtual void traceIn(const char* rname);
478 virtual void traceOut(const char* rname);
479
480#ifndef NO_STATIC_CONSTS
481 static const int EOF_CHAR = EOF;
482#else
483 enum {
484 EOF_CHAR = EOF
485 };
486#endif
487protected:
488 ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
489 /// flag indicating wether consume saves characters
490 bool saveConsumedInput;
491 factory_type tokenFactory; ///< Factory for tokens
492 bool caseSensitive; ///< Is this lexer case sensitive
493 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
494
495 RefToken _returnToken; ///< used to return tokens w/o using return val
496
497 /// Input state, gives access to input stream, shared among different lexers
499
500 /** Used during filter mode to indicate that path is desired.
501 * A subsequent scan error will report an error as usual
502 * if acceptPath=true;
503 */
505
506 int tabsize; ///< tab size the scanner uses.
507
508 /// Create a new RefToken of type t
509 virtual RefToken makeToken(int t)
510 {
511 RefToken tok = tokenFactory();
512 tok->setType(t);
513 tok->setColumn(inputState->tokenStartColumn);
514 tok->setLine(inputState->tokenStartLine);
515 return tok;
516 }
517
518 /** Tracer class, used when -traceLexer is passed to antlr
519 */
520 class Tracer {
521 private:
522 CharScanner* parser;
523 const char* text;
524
525 Tracer(const Tracer& other); // undefined
526 Tracer& operator=(const Tracer& other); // undefined
527 public:
528 Tracer( CharScanner* p,const char* t )
529 : parser(p), text(t)
530 {
531 parser->traceIn(text);
532 }
533 ~Tracer()
534 {
535 parser->traceOut(text);
536 }
537 };
538
539 int traceDepth;
540private:
541 CharScanner( const CharScanner& other ); // undefined
542 CharScanner& operator=( const CharScanner& other ); // undefined
543
544#ifndef NO_STATIC_CONSTS
545 static const int NO_CHAR = 0;
546#else
547 enum {
548 NO_CHAR = 0
549 };
550#endif
551};
552
553inline int CharScanner::LA(unsigned int i)
554{
555 int c = inputState->getInput().LA(i);
556
557 if ( caseSensitive )
558 return c;
559 else
560 return toLower(c); // VC 6 tolower bug caught in toLower.
561}
562
563inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
564{
565 if (scanner->getCaseSensitiveLiterals())
566 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
567 else
568 {
569#ifdef NO_STRCASECMP
570 return (_stricmp(x.c_str(),y.c_str())<0);
571#else
572 return (strcasecmp(x.c_str(),y.c_str())<0);
573#endif
574 }
575}
576
577#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
578}
579#endif
580
581#endif //INC_CharScanner_hpp__
A BitSet to replace java.util.BitSet.
Definition BitSet.hpp:40
Tracer class, used when -traceLexer is passed to antlr.
Superclass of generated lexers.
virtual int getColumn() const
Get the column the scanner currently is in (starts at 1)
virtual void matchRange(int c1, int c2)
See if input contains character in range c1-c2 throw MismatchedCharException if not.
virtual void setTokenObjectFactory(factory_type factory)
Set the factory for created tokens.
virtual void consumeUntil(int c)
Consume chars until one matches the given char.
virtual void consumeUntil(const BitSet &set)
Consume chars until one matches the given set.
bool caseSensitive
Is this lexer case sensitive.
virtual int getLine() const
Get the line the scanner currently is in (starts at 1)
virtual unsigned int mark()
Mark the current position and return a id for it.
virtual void uponEOF()
This method is called by YourLexer::nextToken() when the lexer has hit EOF condition.
virtual void setInputState(LexerSharedInputState state)
set the input state for the lexer.
virtual int testLiteralsTable(const std ::string &txt, int ttype) const
Test the text passed in against the literals table Override this method to perform a different litera...
LexerSharedInputState inputState
Input state, gives access to input stream, shared among different lexers.
std::string text
Text of current token flag indicating wether consume saves characters.
virtual void setFilename(const std ::string &f)
Set the filename the scanner is using (used in error messages)
virtual RefToken makeToken(int t)
Create a new RefToken of type t.
virtual void setLine(int l)
set the line number
virtual int toLower(int c) const
Override this method to get more specific case handling.
virtual const std::string & getText() const
return a copy of the current text buffer
virtual const std::string & getFilename() const
get the filename for the file currently used
int setTabsize(int size)
set the tabsize. Returns the old tabsize
virtual int testLiteralsTable(int ttype) const
Test the token text against the literals table Override this method to perform a different literals t...
int getTabSize() const
Return the tabsize used by the scanner.
virtual void matchNot(int c)
See if input does not contain character 'c' throw MismatchedCharException if not.
virtual void newline()
Used to keep track of line breaks, needs to be called from within generated lexers when a \r is enc...
virtual void rewind(unsigned int pos)
Rewind the scanner to a previously marked position.
virtual void match(const BitSet &b)
See if input contains element from bitset b throw MismatchedCharException if not.
factory_type tokenFactory
Factory for tokens.
virtual void setColumn(int c)
set the column number
bool commitToPath
Used during filter mode to indicate that path is desired.
virtual void tab()
Advance the current column number by an appropriate amount according to the tabsize.
virtual void match(const std ::string &s)
See if input contains string 's' throw MismatchedCharException if not.
virtual void recover(const RecognitionException &, const BitSet &tokenSet)
called by the generated lexer to do error recovery, override to customize the behaviour.
RefToken _returnToken
used to return tokens w/o using return val
int tabsize
tab size the scanner uses.
virtual void match(int c)
See if input contains character 'c' throw MismatchedCharException if not.
virtual void match(const char *s)
See if input contains string 's' throw MismatchedCharException if not.
Functor for the literals map.
A Stream of characters fed to the lexer from a InputStream that can be rewound via mark()/rewind() me...
virtual int LA(unsigned int i)
Get a lookahead character.
This interface allows any object to pretend it is a stream of tokens.