src/antlr/CharScanner.hpp

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id$
 */

#include <antlr/config.hpp>

#include <map>

#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif

#if ( _MSC_VER == 1200 )
// VC6 seems to need this
// note that this is not a standard C++ include file.
# include <stdio.h>
#endif

#include <antlr/TokenStream.hpp>
#include <antlr/RecognitionException.hpp>
#include <antlr/SemanticException.hpp>
#include <antlr/MismatchedCharException.hpp>
#include <antlr/InputBuffer.hpp>
#include <antlr/BitSet.hpp>
#include <antlr/LexerSharedInputState.hpp>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

class ANTLR_API CharScanner;

ANTLR_C_USING(tolower)

#ifdef ANTLR_REALLY_NO_STRCASECMP
// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
// on the mac has neither...
inline int strcasecmp(const char *s1, const char *s2)
{
        while (true)
        {
                char  c1 = tolower(*s1++),
                                c2 = tolower(*s2++);
                if (c1 < c2) return -1;
                if (c1 > c2) return 1;
                if (c1 == 0) return 0;
        }
}
#else
#ifdef NO_STRCASECMP
ANTLR_C_USING(stricmp)
#else
ANTLR_C_USING(strcasecmp)
#endif
#endif

/** Functor for the literals map
 */
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
private:
        const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
#endif
        CharScannerLiteralsLess(const CharScanner* theScanner)
        : scanner(theScanner)
        {
        }
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
// defaults are good enough..
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

/** Superclass of generated lexers
 */
class ANTLR_API CharScanner : public TokenStream {
protected:
        typedef RefToken (*factory_type)();
public:
        CharScanner(InputBuffer& cb, bool case_sensitive );
        CharScanner(InputBuffer* cb, bool case_sensitive );
        CharScanner(const LexerSharedInputState& state, bool case_sensitive );

        virtual ~CharScanner()
        {
        }

        virtual int LA(unsigned int i);

        virtual void append(char c)
        {
                if (saveConsumedInput)
                {
                        size_t l = text.length();

                        if ((l%256) == 0)
                                text.reserve(l+256);

                        text.replace(l,0,&c,1);
                }
        }

        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                if( saveConsumedInput )
                        text += s;
        }

        virtual void commit()
        {
                inputState->getInput().commit();
        }

        /** called by the generated lexer to do error recovery, override to
         * customize the behaviour.
         */
        virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
        {
                consume();
                consumeUntil(tokenSet);
        }

        virtual void consume()
        {
                if (inputState->guessing == 0)
                {
                        int c = LA(1);
                        if (caseSensitive)
                        {
                                append(c);
                        }
                        else
                        {
                                // use input.LA(), not LA(), to get original case
                                // CharScanner.LA() would toLower it.
                                append(inputState->getInput().LA(1));
                        }

                        // RK: in a sense I don't like this automatic handling.
                        if (c == '\t')
                                tab();
                        else
                                inputState->column++;
                }
                inputState->getInput().consume();
        }

        /** Consume chars until one matches the given char */
        virtual void consumeUntil(int c)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || la_1 == c )
                                break;
                        consume();
                }
        }

        /** Consume chars until one matches the given set */
        virtual void consumeUntil(const BitSet& set)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || set.member(la_1) )
                                break;
                        consume();
                }
        }

        /// Mark the current position and return a id for it
        virtual unsigned int mark()
        {
                return inputState->getInput().mark();
        }
        /// Rewind the scanner to a previously marked position
        virtual void rewind(unsigned int pos)
        {
                inputState->getInput().rewind(pos);
        }

        /// See if input contains character 'c' throw MismatchedCharException if not
        virtual void match(int c)
        {
                int la_1 = LA(1);
                if ( la_1 != c )
                        throw MismatchedCharException(la_1, c, false, this);
                consume();
        }

        /** See if input contains element from bitset b
         * throw MismatchedCharException if not
         */
        virtual void match(const BitSet& b)
        {
                int la_1 = LA(1);

                if ( !b.member(la_1) )
                        throw MismatchedCharException( la_1, b, false, this );
                consume();
        }

        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match( const char* s )
        {
                while( *s != '\0' )
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (*s++ & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                size_t len = s.length();

                for (size_t i = 0; i < len; i++)
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (s[i] & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input does not contain character 'c'
         * throw MismatchedCharException if not
         */
        virtual void matchNot(int c)
        {
                int la_1 = LA(1);

                if ( la_1 == c )
                        throw MismatchedCharException(la_1, c, true, this);

                consume();
        }
        /** See if input contains character in range c1-c2
         * throw MismatchedCharException if not
         */
        virtual void matchRange(int c1, int c2)
        {
                int la_1 = LA(1);

                if ( la_1 < c1 || la_1 > c2 )
                        throw MismatchedCharException(la_1, c1, c2, false, this);

                consume();
        }

        virtual bool getCaseSensitive() const
        {
                return caseSensitive;
        }

        virtual void setCaseSensitive(bool t)
        {
                caseSensitive = t;
        }

        virtual bool getCaseSensitiveLiterals() const=0;

        /// Get the line the scanner currently is in (starts at 1)
        virtual int getLine() const
        {
                return inputState->line;
        }

        /// set the line number
        virtual void setLine(int l)
        {
                inputState->line = l;
        }

        /// Get the column the scanner currently is in (starts at 1)
        virtual int getColumn() const
        {
                return inputState->column;
        }
        /// set the column number
        virtual void setColumn(int c)
        {
                inputState->column = c;
        }

        /// get the filename for the file currently used
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
        {
                return inputState->filename;
        }
        /// Set the filename the scanner is using (used in error messages)
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
        {
                inputState->filename = f;
        }

        virtual bool getCommitToPath() const
        {
                return commitToPath;
        }

        virtual void setCommitToPath(bool commit)
        {
                commitToPath = commit;
        }

        /** return a copy of the current text buffer */
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
        {
                return text;
        }

        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                text = s;
        }

        virtual void resetText()
        {
                text = "";
                inputState->tokenStartColumn = inputState->column;
                inputState->tokenStartLine = inputState->line;
        }

        virtual RefToken getTokenObject() const
        {
                return _returnToken;
        }

        /** Used to keep track of line breaks, needs to be called from
         * within generated lexers when a \n \r is encountered.
         */
        virtual void newline()
        {
                ++inputState->line;
                inputState->column = 1;
        }

        /** Advance the current column number by an appropriate amount according
         * to the tabsize. This method needs to be explicitly called from the
         * lexer rules encountering tabs.
         */
        virtual void tab()
        {
                int c = getColumn();
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
                setColumn( nc );
        }
        /// set the tabsize. Returns the old tabsize
        int setTabsize( int size )
        {
                int oldsize = tabsize;
                tabsize = size;
                return oldsize;
        }
        /// Return the tabsize used by the scanner
        int getTabSize() const
        {
                return tabsize;
        }

        /** Report exception errors caught in nextToken() */
        virtual void reportError(const RecognitionException& e);

        /** Parser error-reporting function can be overridden in subclass */
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

        /** Parser warning-reporting function can be overridden in subclass */
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

        virtual InputBuffer& getInputBuffer()
        {
                return inputState->getInput();
        }

        virtual LexerSharedInputState getInputState()
        {
                return inputState;
        }

        /** set the input state for the lexer.
         * @note state is a reference counted object, hence no reference */
        virtual void setInputState(LexerSharedInputState state)
        {
                inputState = state;
        }

        /// Set the factory for created tokens
        virtual void setTokenObjectFactory(factory_type factory)
        {
                tokenFactory = factory;
        }

        /** Test the token text against the literals table
         * Override this method to perform a different literals test
         */
        virtual int testLiteralsTable(int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /** Test the text passed in against the literals table
         * Override this method to perform a different literals test
         * This is used primarily when you want to test a portion of
         * a token
         */
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /// Override this method to get more specific case handling
        virtual int toLower(int c) const
        {
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
                // this one is more structural. Maybe make this configurable.
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
        }

        /** This method is called by YourLexer::nextToken() when the lexer has
         *  hit EOF condition.  EOF is NOT a character.
         *  This method is not called if EOF is reached during
         *  syntactic predicate evaluation or during evaluation
         *  of normal lexical rules, which presumably would be
         *  an IOException.  This traps the "normal" EOF condition.
         *
         *  uponEOF() is called after the complete evaluation of
         *  the previous token and only if your parser asks
         *  for another token beyond that last non-EOF token.
         *
         *  You might want to throw token or char stream exceptions
         *  like: "Heh, premature eof" or a retry stream exception
         *  ("I found the end of this file, go back to referencing file").
         */
        virtual void uponEOF()
        {
        }

        /// Methods used to change tracing behavior
        virtual void traceIndent();
        virtual void traceIn(const char* rname);
        virtual void traceOut(const char* rname);

#ifndef NO_STATIC_CONSTS
        static const int EOF_CHAR = EOF;
#else
        enum {
                EOF_CHAR = EOF
        };
#endif
protected:
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
        /// flag indicating wether consume saves characters
        bool saveConsumedInput;
        factory_type tokenFactory;                              ///< Factory for tokens
        bool caseSensitive;                                             ///< Is this lexer case sensitive
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

        RefToken _returnToken;          ///< used to return tokens w/o using return val

        /// Input state, gives access to input stream, shared among different lexers
        LexerSharedInputState inputState;

        /** Used during filter mode to indicate that path is desired.
         * A subsequent scan error will report an error as usual
         * if acceptPath=true;
         */
        bool commitToPath;

        int tabsize;    ///< tab size the scanner uses.

        /// Create a new RefToken of type t
        virtual RefToken makeToken(int t)
        {
                RefToken tok = tokenFactory();
                tok->setType(t);
                tok->setColumn(inputState->tokenStartColumn);
                tok->setLine(inputState->tokenStartLine);
                return tok;
        }

        /** Tracer class, used when -traceLexer is passed to antlr
         */
        class Tracer {
        private:
                CharScanner* parser;
                const char* text;

                Tracer(const Tracer& other);                                    // undefined
                Tracer& operator=(const Tracer& other);         // undefined
        public:
                Tracer( CharScanner* p,const char* t )
                : parser(p), text(t)
                {
                        parser->traceIn(text);
                }
                ~Tracer()
                {
                        parser->traceOut(text);
                }
        };

        int traceDepth;
private:
        CharScanner( const CharScanner& other );                                        // undefined
        CharScanner& operator=( const CharScanner& other );     // undefined

#ifndef NO_STATIC_CONSTS
        static const int NO_CHAR = 0;
#else
        enum {
                NO_CHAR = 0
        };
#endif
};

inline int CharScanner::LA(unsigned int i)
{
        int c = inputState->getInput().LA(i);

        if ( caseSensitive )
                return c;
        else
                return toLower(c);      // VC 6 tolower bug caught in toLower.
}

inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
        if (scanner->getCaseSensitiveLiterals())
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
        else
        {
#ifdef NO_STRCASECMP
                return (stricmp(x.c_str(),y.c_str())<0);
#else
                return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
        }
}

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_CharScanner_hpp__
Revision:	1558
Committed:	Wed May 11 16:32:48 2011 UTC (14 years, 5 months ago) by gezelter
File size:	13661 byte(s)
Log Message:	Updated antlr, some minor formatting changes
#	Content
1	#ifndef INC_CharScanner_hpp__
2	#define INC_CharScanner_hpp__
3
4	/* ANTLR Translator Generator
5	* Project led by Terence Parr at http://www.jGuru.com
6	* Software rights: http://www.antlr.org/license.html
7	*
8	* $Id$
9	*/
10
11	#include <antlr/config.hpp>
12
13	#include <map>
14
15	#ifdef HAS_NOT_CCTYPE_H
16	#include <ctype.h>
17	#else
18	#include <cctype>
19	#endif
20
21	#if ( _MSC_VER == 1200 )
22	// VC6 seems to need this
23	// note that this is not a standard C++ include file.
24	# include <stdio.h>
25	#endif
26
27	#include <antlr/TokenStream.hpp>
28	#include <antlr/RecognitionException.hpp>
29	#include <antlr/SemanticException.hpp>
30	#include <antlr/MismatchedCharException.hpp>
31	#include <antlr/InputBuffer.hpp>
32	#include <antlr/BitSet.hpp>
33	#include <antlr/LexerSharedInputState.hpp>
34
35	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
36	namespace antlr {
37	#endif
38
39	class ANTLR_API CharScanner;
40
41	ANTLR_C_USING(tolower)
42
43	#ifdef ANTLR_REALLY_NO_STRCASECMP
44	// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
45	// on the mac has neither...
46	inline int strcasecmp(const char s1, const char s2)
47	{
48	while (true)
49	{
50	char c1 = tolower(*s1++),
51	c2 = tolower(*s2++);
52	if (c1 < c2) return -1;
53	if (c1 > c2) return 1;
54	if (c1 == 0) return 0;
55	}
56	}
57	#else
58	#ifdef NO_STRCASECMP
59	ANTLR_C_USING(stricmp)
60	#else
61	ANTLR_C_USING(strcasecmp)
62	#endif
63	#endif
64
65	/** Functor for the literals map
66	*/
67	class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
68	private:
69	const CharScanner* scanner;
70	public:
71	#ifdef NO_TEMPLATE_PARTS
72	CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
73	#endif
74	CharScannerLiteralsLess(const CharScanner* theScanner)
75	: scanner(theScanner)
76	{
77	}
78	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
79	// defaults are good enough..
80	// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
81	// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
82	};
83
84	/** Superclass of generated lexers
85	*/
86	class ANTLR_API CharScanner : public TokenStream {
87	protected:
88	typedef RefToken (*factory_type)();
89	public:
90	CharScanner(InputBuffer& cb, bool case_sensitive );
91	CharScanner(InputBuffer* cb, bool case_sensitive );
92	CharScanner(const LexerSharedInputState& state, bool case_sensitive );
93
94	virtual ~CharScanner()
95	{
96	}
97
98	virtual int LA(unsigned int i);
99
100	virtual void append(char c)
101	{
102	if (saveConsumedInput)
103	{
104	size_t l = text.length();
105
106	if ((l%256) == 0)
107	text.reserve(l+256);
108
109	text.replace(l,0,&c,1);
110	}
111	}
112
113	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
114	{
115	if( saveConsumedInput )
116	text += s;
117	}
118
119	virtual void commit()
120	{
121	inputState->getInput().commit();
122	}
123
124	/** called by the generated lexer to do error recovery, override to
125	* customize the behaviour.
126	*/
127	virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
128	{
129	consume();
130	consumeUntil(tokenSet);
131	}
132
133	virtual void consume()
134	{
135	if (inputState->guessing == 0)
136	{
137	int c = LA(1);
138	if (caseSensitive)
139	{
140	append(c);
141	}
142	else
143	{
144	// use input.LA(), not LA(), to get original case
145	// CharScanner.LA() would toLower it.
146	append(inputState->getInput().LA(1));
147	}
148
149	// RK: in a sense I don't like this automatic handling.
150	if (c == '\t')
151	tab();
152	else
153	inputState->column++;
154	}
155	inputState->getInput().consume();
156	}
157
158	/** Consume chars until one matches the given char */
159	virtual void consumeUntil(int c)
160	{
161	for(;;)
162	{
163	int la_1 = LA(1);
164	if( la_1 == EOF_CHAR \|\| la_1 == c )
165	break;
166	consume();
167	}
168	}
169
170	/** Consume chars until one matches the given set */
171	virtual void consumeUntil(const BitSet& set)
172	{
173	for(;;)
174	{
175	int la_1 = LA(1);
176	if( la_1 == EOF_CHAR \|\| set.member(la_1) )
177	break;
178	consume();
179	}
180	}
181
182	/// Mark the current position and return a id for it
183	virtual unsigned int mark()
184	{
185	return inputState->getInput().mark();
186	}
187	/// Rewind the scanner to a previously marked position
188	virtual void rewind(unsigned int pos)
189	{
190	inputState->getInput().rewind(pos);
191	}
192
193	/// See if input contains character 'c' throw MismatchedCharException if not
194	virtual void match(int c)
195	{
196	int la_1 = LA(1);
197	if ( la_1 != c )
198	throw MismatchedCharException(la_1, c, false, this);
199	consume();
200	}
201
202	/** See if input contains element from bitset b
203	* throw MismatchedCharException if not
204	*/
205	virtual void match(const BitSet& b)
206	{
207	int la_1 = LA(1);
208
209	if ( !b.member(la_1) )
210	throw MismatchedCharException( la_1, b, false, this );
211	consume();
212	}
213
214	/** See if input contains string 's' throw MismatchedCharException if not
215	* @note the string cannot match EOF
216	*/
217	virtual void match( const char* s )
218	{
219	while( *s != '\0' )
220	{
221	// the & 0xFF is here to prevent sign extension lateron
222	int la_1 = LA(1), c = (*s++ & 0xFF);
223
224	if ( la_1 != c )
225	throw MismatchedCharException(la_1, c, false, this);
226
227	consume();
228	}
229	}
230	/** See if input contains string 's' throw MismatchedCharException if not
231	* @note the string cannot match EOF
232	*/
233	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
234	{
235	size_t len = s.length();
236
237	for (size_t i = 0; i < len; i++)
238	{
239	// the & 0xFF is here to prevent sign extension lateron
240	int la_1 = LA(1), c = (s[i] & 0xFF);
241
242	if ( la_1 != c )
243	throw MismatchedCharException(la_1, c, false, this);
244
245	consume();
246	}
247	}
248	/** See if input does not contain character 'c'
249	* throw MismatchedCharException if not
250	*/
251	virtual void matchNot(int c)
252	{
253	int la_1 = LA(1);
254
255	if ( la_1 == c )
256	throw MismatchedCharException(la_1, c, true, this);
257
258	consume();
259	}
260	/** See if input contains character in range c1-c2
261	* throw MismatchedCharException if not
262	*/
263	virtual void matchRange(int c1, int c2)
264	{
265	int la_1 = LA(1);
266
267	if ( la_1 < c1 \|\| la_1 > c2 )
268	throw MismatchedCharException(la_1, c1, c2, false, this);
269
270	consume();
271	}
272
273	virtual bool getCaseSensitive() const
274	{
275	return caseSensitive;
276	}
277
278	virtual void setCaseSensitive(bool t)
279	{
280	caseSensitive = t;
281	}
282
283	virtual bool getCaseSensitiveLiterals() const=0;
284
285	/// Get the line the scanner currently is in (starts at 1)
286	virtual int getLine() const
287	{
288	return inputState->line;
289	}
290
291	/// set the line number
292	virtual void setLine(int l)
293	{
294	inputState->line = l;
295	}
296
297	/// Get the column the scanner currently is in (starts at 1)
298	virtual int getColumn() const
299	{
300	return inputState->column;
301	}
302	/// set the column number
303	virtual void setColumn(int c)
304	{
305	inputState->column = c;
306	}
307
308	/// get the filename for the file currently used
309	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
310	{
311	return inputState->filename;
312	}
313	/// Set the filename the scanner is using (used in error messages)
314	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
315	{
316	inputState->filename = f;
317	}
318
319	virtual bool getCommitToPath() const
320	{
321	return commitToPath;
322	}
323
324	virtual void setCommitToPath(bool commit)
325	{
326	commitToPath = commit;
327	}
328
329	/** return a copy of the current text buffer */
330	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
331	{
332	return text;
333	}
334
335	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
336	{
337	text = s;
338	}
339
340	virtual void resetText()
341	{
342	text = "";
343	inputState->tokenStartColumn = inputState->column;
344	inputState->tokenStartLine = inputState->line;
345	}
346
347	virtual RefToken getTokenObject() const
348	{
349	return _returnToken;
350	}
351
352	/** Used to keep track of line breaks, needs to be called from
353	* within generated lexers when a \n \r is encountered.
354	*/
355	virtual void newline()
356	{
357	++inputState->line;
358	inputState->column = 1;
359	}
360
361	/** Advance the current column number by an appropriate amount according
362	* to the tabsize. This method needs to be explicitly called from the
363	* lexer rules encountering tabs.
364	*/
365	virtual void tab()
366	{
367	int c = getColumn();
368	int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
369	setColumn( nc );
370	}
371	/// set the tabsize. Returns the old tabsize
372	int setTabsize( int size )
373	{
374	int oldsize = tabsize;
375	tabsize = size;
376	return oldsize;
377	}
378	/// Return the tabsize used by the scanner
379	int getTabSize() const
380	{
381	return tabsize;
382	}
383
384	/** Report exception errors caught in nextToken() */
385	virtual void reportError(const RecognitionException& e);
386
387	/** Parser error-reporting function can be overridden in subclass */
388	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
389
390	/** Parser warning-reporting function can be overridden in subclass */
391	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
392
393	virtual InputBuffer& getInputBuffer()
394	{
395	return inputState->getInput();
396	}
397
398	virtual LexerSharedInputState getInputState()
399	{
400	return inputState;
401	}
402
403	/** set the input state for the lexer.
404	* @note state is a reference counted object, hence no reference */
405	virtual void setInputState(LexerSharedInputState state)
406	{
407	inputState = state;
408	}
409
410	/// Set the factory for created tokens
411	virtual void setTokenObjectFactory(factory_type factory)
412	{
413	tokenFactory = factory;
414	}
415
416	/** Test the token text against the literals table
417	* Override this method to perform a different literals test
418	*/
419	virtual int testLiteralsTable(int ttype) const
420	{
421	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
422	if (i != literals.end())
423	ttype = (*i).second;
424	return ttype;
425	}
426
427	/** Test the text passed in against the literals table
428	* Override this method to perform a different literals test
429	* This is used primarily when you want to test a portion of
430	* a token
431	*/
432	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
433	{
434	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
435	if (i != literals.end())
436	ttype = (*i).second;
437	return ttype;
438	}
439
440	/// Override this method to get more specific case handling
441	virtual int toLower(int c) const
442	{
443	// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
444	// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
445	// this one is more structural. Maybe make this configurable.
446	return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
447	}
448
449	/** This method is called by YourLexer::nextToken() when the lexer has
450	* hit EOF condition. EOF is NOT a character.
451	* This method is not called if EOF is reached during
452	* syntactic predicate evaluation or during evaluation
453	* of normal lexical rules, which presumably would be
454	* an IOException. This traps the "normal" EOF condition.
455	*
456	* uponEOF() is called after the complete evaluation of
457	* the previous token and only if your parser asks
458	* for another token beyond that last non-EOF token.
459	*
460	* You might want to throw token or char stream exceptions
461	* like: "Heh, premature eof" or a retry stream exception
462	* ("I found the end of this file, go back to referencing file").
463	*/
464	virtual void uponEOF()
465	{
466	}
467
468	/// Methods used to change tracing behavior
469	virtual void traceIndent();
470	virtual void traceIn(const char* rname);
471	virtual void traceOut(const char* rname);
472
473	#ifndef NO_STATIC_CONSTS
474	static const int EOF_CHAR = EOF;
475	#else
476	enum {
477	EOF_CHAR = EOF
478	};
479	#endif
480	protected:
481	ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
482	/// flag indicating wether consume saves characters
483	bool saveConsumedInput;
484	factory_type tokenFactory; ///< Factory for tokens
485	bool caseSensitive; ///< Is this lexer case sensitive
486	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
487
488	RefToken _returnToken; ///< used to return tokens w/o using return val
489
490	/// Input state, gives access to input stream, shared among different lexers
491	LexerSharedInputState inputState;
492
493	/** Used during filter mode to indicate that path is desired.
494	* A subsequent scan error will report an error as usual
495	* if acceptPath=true;
496	*/
497	bool commitToPath;
498
499	int tabsize; ///< tab size the scanner uses.
500
501	/// Create a new RefToken of type t
502	virtual RefToken makeToken(int t)
503	{
504	RefToken tok = tokenFactory();
505	tok->setType(t);
506	tok->setColumn(inputState->tokenStartColumn);
507	tok->setLine(inputState->tokenStartLine);
508	return tok;
509	}
510
511	/** Tracer class, used when -traceLexer is passed to antlr
512	*/
513	class Tracer {
514	private:
515	CharScanner* parser;
516	const char* text;
517
518	Tracer(const Tracer& other); // undefined
519	Tracer& operator=(const Tracer& other); // undefined
520	public:
521	Tracer( CharScanner* p,const char* t )
522	: parser(p), text(t)
523	{
524	parser->traceIn(text);
525	}
526	~Tracer()
527	{
528	parser->traceOut(text);
529	}
530	};
531
532	int traceDepth;
533	private:
534	CharScanner( const CharScanner& other ); // undefined
535	CharScanner& operator=( const CharScanner& other ); // undefined
536
537	#ifndef NO_STATIC_CONSTS
538	static const int NO_CHAR = 0;
539	#else
540	enum {
541	NO_CHAR = 0
542	};
543	#endif
544	};
545
546	inline int CharScanner::LA(unsigned int i)
547	{
548	int c = inputState->getInput().LA(i);
549
550	if ( caseSensitive )
551	return c;
552	else
553	return toLower(c); // VC 6 tolower bug caught in toLower.
554	}
555
556	inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
557	{
558	if (scanner->getCaseSensitiveLiterals())
559	return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
560	else
561	{
562	#ifdef NO_STRCASECMP
563	return (stricmp(x.c_str(),y.c_str())<0);
564	#else
565	return (strcasecmp(x.c_str(),y.c_str())<0);
566	#endif
567	}
568	}
569
570	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
571	}
572	#endif
573
574	#endif //INC_CharScanner_hpp__