src/antlr/CharScanner.hpp

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id: CharScanner.hpp,v 1.2 2005-12-15 14:48:26 gezelter Exp $
 */

#include <antlr/config.hpp>

#include <map>

#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif

#if ( _MSC_VER == 1200 )
// VC6 seems to need this
// note that this is not a standard C++ include file.
# include <stdio.h>
#endif

#include <antlr/TokenStream.hpp>
#include <antlr/RecognitionException.hpp>
#include <antlr/SemanticException.hpp>
#include <antlr/MismatchedCharException.hpp>
#include <antlr/InputBuffer.hpp>
#include <antlr/BitSet.hpp>
#include <antlr/LexerSharedInputState.hpp>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

class ANTLR_API CharScanner;

ANTLR_C_USING(tolower)

#if !defined(HAVE_STRCASECMP) && defined(HAVE_STRICMP) && !defined(stricmp)
#define strcasecmp stricmp
#endif
#if !defined(HAVE_STRNCASECMP) && defined(HAVE_STRNICMP) && !defined(strnicmp)
#define strncasecmp strnicmp
#endif


#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
inline int strcasecmp(const char *s1, const char *s2)
{
        while (true)
        {
                char  c1 = tolower(*s1++),
                                c2 = tolower(*s2++);
                if (c1 < c2) return -1;
                if (c1 > c2) return 1;
                if (c1 == 0) return 0;
        }
}
#endif

/** Functor for the literals map
 */
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
private:
        const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
#endif
        CharScannerLiteralsLess(const CharScanner* theScanner)
        : scanner(theScanner)
        {
        }
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
// defaults are good enough..
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

/** Superclass of generated lexers
 */
class ANTLR_API CharScanner : public TokenStream {
protected:
        typedef RefToken (*factory_type)();
public:
        CharScanner(InputBuffer& cb, bool case_sensitive );
        CharScanner(InputBuffer* cb, bool case_sensitive );
        CharScanner(const LexerSharedInputState& state, bool case_sensitive );

        virtual ~CharScanner()
        {
        }

        virtual int LA(unsigned int i);

        virtual void append(char c)
        {
                if (saveConsumedInput)
                {
                        size_t l = text.length();

                        if ((l%256) == 0)
                                text.reserve(l+256);

                        text.replace(l,0,&c,1);
                }
        }

        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                if( saveConsumedInput )
                        text += s;
        }

        virtual void commit()
        {
                inputState->getInput().commit();
        }

        virtual void consume()
        {
                if (inputState->guessing == 0)
                {
                        int c = LA(1);
                        if (caseSensitive)
                        {
                                append(c);
                        }
                        else
                        {
                                // use input.LA(), not LA(), to get original case
                                // CharScanner.LA() would toLower it.
                                append(inputState->getInput().LA(1));
                        }

                        // RK: in a sense I don't like this automatic handling.
                        if (c == '\t')
                                tab();
                        else
                                inputState->column++;
                }
                inputState->getInput().consume();
        }

        /** Consume chars until one matches the given char */
        virtual void consumeUntil(int c)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || la_1 == c )
                                break;
                        consume();
                }
        }

        /** Consume chars until one matches the given set */
        virtual void consumeUntil(const BitSet& set)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || set.member(la_1) )
                                break;
                        consume();
                }
        }

        /// Mark the current position and return a id for it
        virtual unsigned int mark()
        {
                return inputState->getInput().mark();
        }
        /// Rewind the scanner to a previously marked position
        virtual void rewind(unsigned int pos)
        {
                inputState->getInput().rewind(pos);
        }

        /// See if input contains character 'c' throw MismatchedCharException if not
        virtual void match(int c)
        {
                int la_1 = LA(1);
                if ( la_1 != c )
                        throw MismatchedCharException(la_1, c, false, this);
                consume();
        }

        /** See if input contains element from bitset b
         * throw MismatchedCharException if not
         */
        virtual void match(const BitSet& b)
        {
                int la_1 = LA(1);

                if ( !b.member(la_1) )
                        throw MismatchedCharException( la_1, b, false, this );
                consume();
        }

        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match( const char* s )
        {
                while( *s != '\0' )
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (*s++ & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                size_t len = s.length();

                for (size_t i = 0; i < len; i++)
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (s[i] & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input does not contain character 'c'
         * throw MismatchedCharException if not
         */
        virtual void matchNot(int c)
        {
                int la_1 = LA(1);

                if ( la_1 == c )
                        throw MismatchedCharException(la_1, c, true, this);

                consume();
        }
        /** See if input contains character in range c1-c2
         * throw MismatchedCharException if not
         */
        virtual void matchRange(int c1, int c2)
        {
                int la_1 = LA(1);

                if ( la_1 < c1 || la_1 > c2 )
                        throw MismatchedCharException(la_1, c1, c2, false, this);

                consume();
        }

        virtual bool getCaseSensitive() const
        {
                return caseSensitive;
        }

        virtual void setCaseSensitive(bool t)
        {
                caseSensitive = t;
        }

        virtual bool getCaseSensitiveLiterals() const=0;

        /// Get the line the scanner currently is in (starts at 1)
        virtual int getLine() const
        {
                return inputState->line;
        }

        /// set the line number
        virtual void setLine(int l)
        {
                inputState->line = l;
        }

        /// Get the column the scanner currently is in (starts at 1)
        virtual int getColumn() const
        {
                return inputState->column;
        }
        /// set the column number
        virtual void setColumn(int c)
        {
                inputState->column = c;
        }

        /// get the filename for the file currently used
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
        {
                return inputState->filename;
        }
        /// Set the filename the scanner is using (used in error messages)
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
        {
                inputState->filename = f;
        }

        virtual bool getCommitToPath() const
        {
                return commitToPath;
        }

        virtual void setCommitToPath(bool commit)
        {
                commitToPath = commit;
        }

        /** return a copy of the current text buffer */
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
        {
                return text;
        }

        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                text = s;
        }

        virtual void resetText()
        {
                text = "";
                inputState->tokenStartColumn = inputState->column;
                inputState->tokenStartLine = inputState->line;
        }

        virtual RefToken getTokenObject() const
        {
                return _returnToken;
        }

        /** Used to keep track of line breaks, needs to be called from
         * within generated lexers when a \n \r is encountered.
         */
        virtual void newline()
        {
                ++inputState->line;
                inputState->column = 1;
        }

        /** Advance the current column number by an appropriate amount according
         * to the tabsize. This method needs to be explicitly called from the
         * lexer rules encountering tabs.
         */
        virtual void tab()
        {
                int c = getColumn();
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
                setColumn( nc );
        }
        /// set the tabsize. Returns the old tabsize
        int setTabsize( int size )
        {
                int oldsize = tabsize;
                tabsize = size;
                return oldsize;
        }
        /// Return the tabsize used by the scanner
        int getTabSize() const
        {
                return tabsize;
        }

        /** Report exception errors caught in nextToken() */
        virtual void reportError(const RecognitionException& e);

        /** Parser error-reporting function can be overridden in subclass */
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

        /** Parser warning-reporting function can be overridden in subclass */
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

        virtual InputBuffer& getInputBuffer()
        {
                return inputState->getInput();
        }

        virtual LexerSharedInputState getInputState()
        {
                return inputState;
        }

        /** set the input state for the lexer.
         * @note state is a reference counted object, hence no reference */
        virtual void setInputState(LexerSharedInputState state)
        {
                inputState = state;
        }

        /// Set the factory for created tokens
        virtual void setTokenObjectFactory(factory_type factory)
        {
                tokenFactory = factory;
        }

        /** Test the token text against the literals table
         * Override this method to perform a different literals test
         */
        virtual int testLiteralsTable(int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /** Test the text passed in against the literals table
         * Override this method to perform a different literals test
         * This is used primarily when you want to test a portion of
         * a token
         */
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /// Override this method to get more specific case handling
        virtual int toLower(int c) const
        {
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
                // this one is more structural. Maybe make this configurable.
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
        }

        /** This method is called by YourLexer::nextToken() when the lexer has
         *  hit EOF condition.  EOF is NOT a character.
         *  This method is not called if EOF is reached during
         *  syntactic predicate evaluation or during evaluation
         *  of normal lexical rules, which presumably would be
         *  an IOException.  This traps the "normal" EOF condition.
         *
         *  uponEOF() is called after the complete evaluation of
         *  the previous token and only if your parser asks
         *  for another token beyond that last non-EOF token.
         *
         *  You might want to throw token or char stream exceptions
         *  like: "Heh, premature eof" or a retry stream exception
         *  ("I found the end of this file, go back to referencing file").
         */
        virtual void uponEOF()
        {
        }

        /// Methods used to change tracing behavior
        virtual void traceIndent();
        virtual void traceIn(const char* rname);
        virtual void traceOut(const char* rname);

#ifndef NO_STATIC_CONSTS
        static const int EOF_CHAR = EOF;
#else
        enum {
                EOF_CHAR = EOF
        };
#endif
protected:
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
        /// flag indicating wether consume saves characters
        bool saveConsumedInput;
        factory_type tokenFactory;                              ///< Factory for tokens
        bool caseSensitive;                                             ///< Is this lexer case sensitive
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

        RefToken _returnToken;          ///< used to return tokens w/o using return val

        /// Input state, gives access to input stream, shared among different lexers
        LexerSharedInputState inputState;

        /** Used during filter mode to indicate that path is desired.
         * A subsequent scan error will report an error as usual
         * if acceptPath=true;
         */
        bool commitToPath;

        int tabsize;    ///< tab size the scanner uses.

        /// Create a new RefToken of type t
        virtual RefToken makeToken(int t)
        {
                RefToken tok = tokenFactory();
                tok->setType(t);
                tok->setColumn(inputState->tokenStartColumn);
                tok->setLine(inputState->tokenStartLine);
                return tok;
        }

        /** Tracer class, used when -traceLexer is passed to antlr
         */
        class Tracer {
        private:
                CharScanner* parser;
                const char* text;

                Tracer(const Tracer& other);                                    // undefined
                Tracer& operator=(const Tracer& other);         // undefined
        public:
                Tracer( CharScanner* p,const char* t )
                : parser(p), text(t)
                {
                        parser->traceIn(text);
                }
                ~Tracer()
                {
                        parser->traceOut(text);
                }
        };

        int traceDepth;
private:
        CharScanner( const CharScanner& other );                                        // undefined
        CharScanner& operator=( const CharScanner& other );     // undefined

#ifndef NO_STATIC_CONSTS
        static const int NO_CHAR = 0;
#else
        enum {
                NO_CHAR = 0
        };
#endif
};

inline int CharScanner::LA(unsigned int i)
{
        int c = inputState->getInput().LA(i);

        if ( caseSensitive )
                return c;
        else
                return toLower(c);      // VC 6 tolower bug caught in toLower.
}

inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
        if (scanner->getCaseSensitiveLiterals())
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
        else
        {
#ifdef NO_STRCASECMP
                return (stricmp(x.c_str(),y.c_str())<0);
#else
                return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
        }
}

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_CharScanner_hpp__
Revision:	2511
Committed:	Thu Dec 15 14:48:26 2005 UTC (18 years, 9 months ago) by gezelter
File size:	13547 byte(s)
Log Message:	Removing compiler dependencies and replacing them with real autoconf checks
#	Content
1	#ifndef INC_CharScanner_hpp__
2	#define INC_CharScanner_hpp__
3
4	/* ANTLR Translator Generator
5	* Project led by Terence Parr at http://www.jGuru.com
6	* Software rights: http://www.antlr.org/license.html
7	*
8	* $Id: CharScanner.hpp,v 1.2 2005-12-15 14:48:26 gezelter Exp $
9	*/
10
11	#include <antlr/config.hpp>
12
13	#include <map>
14
15	#ifdef HAS_NOT_CCTYPE_H
16	#include <ctype.h>
17	#else
18	#include <cctype>
19	#endif
20
21	#if ( _MSC_VER == 1200 )
22	// VC6 seems to need this
23	// note that this is not a standard C++ include file.
24	# include <stdio.h>
25	#endif
26
27	#include <antlr/TokenStream.hpp>
28	#include <antlr/RecognitionException.hpp>
29	#include <antlr/SemanticException.hpp>
30	#include <antlr/MismatchedCharException.hpp>
31	#include <antlr/InputBuffer.hpp>
32	#include <antlr/BitSet.hpp>
33	#include <antlr/LexerSharedInputState.hpp>
34
35	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
36	namespace antlr {
37	#endif
38
39	class ANTLR_API CharScanner;
40
41	ANTLR_C_USING(tolower)
42
43	#if !defined(HAVE_STRCASECMP) && defined(HAVE_STRICMP) && !defined(stricmp)
44	#define strcasecmp stricmp
45	#endif
46	#if !defined(HAVE_STRNCASECMP) && defined(HAVE_STRNICMP) && !defined(strnicmp)
47	#define strncasecmp strnicmp
48	#endif
49
50
51	#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
52	inline int strcasecmp(const char s1, const char s2)
53	{
54	while (true)
55	{
56	char c1 = tolower(*s1++),
57	c2 = tolower(*s2++);
58	if (c1 < c2) return -1;
59	if (c1 > c2) return 1;
60	if (c1 == 0) return 0;
61	}
62	}
63	#endif
64
65	/** Functor for the literals map
66	*/
67	class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
68	private:
69	const CharScanner* scanner;
70	public:
71	#ifdef NO_TEMPLATE_PARTS
72	CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
73	#endif
74	CharScannerLiteralsLess(const CharScanner* theScanner)
75	: scanner(theScanner)
76	{
77	}
78	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
79	// defaults are good enough..
80	// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
81	// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
82	};
83
84	/** Superclass of generated lexers
85	*/
86	class ANTLR_API CharScanner : public TokenStream {
87	protected:
88	typedef RefToken (*factory_type)();
89	public:
90	CharScanner(InputBuffer& cb, bool case_sensitive );
91	CharScanner(InputBuffer* cb, bool case_sensitive );
92	CharScanner(const LexerSharedInputState& state, bool case_sensitive );
93
94	virtual ~CharScanner()
95	{
96	}
97
98	virtual int LA(unsigned int i);
99
100	virtual void append(char c)
101	{
102	if (saveConsumedInput)
103	{
104	size_t l = text.length();
105
106	if ((l%256) == 0)
107	text.reserve(l+256);
108
109	text.replace(l,0,&c,1);
110	}
111	}
112
113	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
114	{
115	if( saveConsumedInput )
116	text += s;
117	}
118
119	virtual void commit()
120	{
121	inputState->getInput().commit();
122	}
123
124	virtual void consume()
125	{
126	if (inputState->guessing == 0)
127	{
128	int c = LA(1);
129	if (caseSensitive)
130	{
131	append(c);
132	}
133	else
134	{
135	// use input.LA(), not LA(), to get original case
136	// CharScanner.LA() would toLower it.
137	append(inputState->getInput().LA(1));
138	}
139
140	// RK: in a sense I don't like this automatic handling.
141	if (c == '\t')
142	tab();
143	else
144	inputState->column++;
145	}
146	inputState->getInput().consume();
147	}
148
149	/** Consume chars until one matches the given char */
150	virtual void consumeUntil(int c)
151	{
152	for(;;)
153	{
154	int la_1 = LA(1);
155	if( la_1 == EOF_CHAR \|\| la_1 == c )
156	break;
157	consume();
158	}
159	}
160
161	/** Consume chars until one matches the given set */
162	virtual void consumeUntil(const BitSet& set)
163	{
164	for(;;)
165	{
166	int la_1 = LA(1);
167	if( la_1 == EOF_CHAR \|\| set.member(la_1) )
168	break;
169	consume();
170	}
171	}
172
173	/// Mark the current position and return a id for it
174	virtual unsigned int mark()
175	{
176	return inputState->getInput().mark();
177	}
178	/// Rewind the scanner to a previously marked position
179	virtual void rewind(unsigned int pos)
180	{
181	inputState->getInput().rewind(pos);
182	}
183
184	/// See if input contains character 'c' throw MismatchedCharException if not
185	virtual void match(int c)
186	{
187	int la_1 = LA(1);
188	if ( la_1 != c )
189	throw MismatchedCharException(la_1, c, false, this);
190	consume();
191	}
192
193	/** See if input contains element from bitset b
194	* throw MismatchedCharException if not
195	*/
196	virtual void match(const BitSet& b)
197	{
198	int la_1 = LA(1);
199
200	if ( !b.member(la_1) )
201	throw MismatchedCharException( la_1, b, false, this );
202	consume();
203	}
204
205	/** See if input contains string 's' throw MismatchedCharException if not
206	* @note the string cannot match EOF
207	*/
208	virtual void match( const char* s )
209	{
210	while( *s != '\0' )
211	{
212	// the & 0xFF is here to prevent sign extension lateron
213	int la_1 = LA(1), c = (*s++ & 0xFF);
214
215	if ( la_1 != c )
216	throw MismatchedCharException(la_1, c, false, this);
217
218	consume();
219	}
220	}
221	/** See if input contains string 's' throw MismatchedCharException if not
222	* @note the string cannot match EOF
223	*/
224	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
225	{
226	size_t len = s.length();
227
228	for (size_t i = 0; i < len; i++)
229	{
230	// the & 0xFF is here to prevent sign extension lateron
231	int la_1 = LA(1), c = (s[i] & 0xFF);
232
233	if ( la_1 != c )
234	throw MismatchedCharException(la_1, c, false, this);
235
236	consume();
237	}
238	}
239	/** See if input does not contain character 'c'
240	* throw MismatchedCharException if not
241	*/
242	virtual void matchNot(int c)
243	{
244	int la_1 = LA(1);
245
246	if ( la_1 == c )
247	throw MismatchedCharException(la_1, c, true, this);
248
249	consume();
250	}
251	/** See if input contains character in range c1-c2
252	* throw MismatchedCharException if not
253	*/
254	virtual void matchRange(int c1, int c2)
255	{
256	int la_1 = LA(1);
257
258	if ( la_1 < c1 \|\| la_1 > c2 )
259	throw MismatchedCharException(la_1, c1, c2, false, this);
260
261	consume();
262	}
263
264	virtual bool getCaseSensitive() const
265	{
266	return caseSensitive;
267	}
268
269	virtual void setCaseSensitive(bool t)
270	{
271	caseSensitive = t;
272	}
273
274	virtual bool getCaseSensitiveLiterals() const=0;
275
276	/// Get the line the scanner currently is in (starts at 1)
277	virtual int getLine() const
278	{
279	return inputState->line;
280	}
281
282	/// set the line number
283	virtual void setLine(int l)
284	{
285	inputState->line = l;
286	}
287
288	/// Get the column the scanner currently is in (starts at 1)
289	virtual int getColumn() const
290	{
291	return inputState->column;
292	}
293	/// set the column number
294	virtual void setColumn(int c)
295	{
296	inputState->column = c;
297	}
298
299	/// get the filename for the file currently used
300	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
301	{
302	return inputState->filename;
303	}
304	/// Set the filename the scanner is using (used in error messages)
305	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
306	{
307	inputState->filename = f;
308	}
309
310	virtual bool getCommitToPath() const
311	{
312	return commitToPath;
313	}
314
315	virtual void setCommitToPath(bool commit)
316	{
317	commitToPath = commit;
318	}
319
320	/** return a copy of the current text buffer */
321	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
322	{
323	return text;
324	}
325
326	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
327	{
328	text = s;
329	}
330
331	virtual void resetText()
332	{
333	text = "";
334	inputState->tokenStartColumn = inputState->column;
335	inputState->tokenStartLine = inputState->line;
336	}
337
338	virtual RefToken getTokenObject() const
339	{
340	return _returnToken;
341	}
342
343	/** Used to keep track of line breaks, needs to be called from
344	* within generated lexers when a \n \r is encountered.
345	*/
346	virtual void newline()
347	{
348	++inputState->line;
349	inputState->column = 1;
350	}
351
352	/** Advance the current column number by an appropriate amount according
353	* to the tabsize. This method needs to be explicitly called from the
354	* lexer rules encountering tabs.
355	*/
356	virtual void tab()
357	{
358	int c = getColumn();
359	int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
360	setColumn( nc );
361	}
362	/// set the tabsize. Returns the old tabsize
363	int setTabsize( int size )
364	{
365	int oldsize = tabsize;
366	tabsize = size;
367	return oldsize;
368	}
369	/// Return the tabsize used by the scanner
370	int getTabSize() const
371	{
372	return tabsize;
373	}
374
375	/** Report exception errors caught in nextToken() */
376	virtual void reportError(const RecognitionException& e);
377
378	/** Parser error-reporting function can be overridden in subclass */
379	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
380
381	/** Parser warning-reporting function can be overridden in subclass */
382	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
383
384	virtual InputBuffer& getInputBuffer()
385	{
386	return inputState->getInput();
387	}
388
389	virtual LexerSharedInputState getInputState()
390	{
391	return inputState;
392	}
393
394	/** set the input state for the lexer.
395	* @note state is a reference counted object, hence no reference */
396	virtual void setInputState(LexerSharedInputState state)
397	{
398	inputState = state;
399	}
400
401	/// Set the factory for created tokens
402	virtual void setTokenObjectFactory(factory_type factory)
403	{
404	tokenFactory = factory;
405	}
406
407	/** Test the token text against the literals table
408	* Override this method to perform a different literals test
409	*/
410	virtual int testLiteralsTable(int ttype) const
411	{
412	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
413	if (i != literals.end())
414	ttype = (*i).second;
415	return ttype;
416	}
417
418	/** Test the text passed in against the literals table
419	* Override this method to perform a different literals test
420	* This is used primarily when you want to test a portion of
421	* a token
422	*/
423	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
424	{
425	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
426	if (i != literals.end())
427	ttype = (*i).second;
428	return ttype;
429	}
430
431	/// Override this method to get more specific case handling
432	virtual int toLower(int c) const
433	{
434	// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
435	// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
436	// this one is more structural. Maybe make this configurable.
437	return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
438	}
439
440	/** This method is called by YourLexer::nextToken() when the lexer has
441	* hit EOF condition. EOF is NOT a character.
442	* This method is not called if EOF is reached during
443	* syntactic predicate evaluation or during evaluation
444	* of normal lexical rules, which presumably would be
445	* an IOException. This traps the "normal" EOF condition.
446	*
447	* uponEOF() is called after the complete evaluation of
448	* the previous token and only if your parser asks
449	* for another token beyond that last non-EOF token.
450	*
451	* You might want to throw token or char stream exceptions
452	* like: "Heh, premature eof" or a retry stream exception
453	* ("I found the end of this file, go back to referencing file").
454	*/
455	virtual void uponEOF()
456	{
457	}
458
459	/// Methods used to change tracing behavior
460	virtual void traceIndent();
461	virtual void traceIn(const char* rname);
462	virtual void traceOut(const char* rname);
463
464	#ifndef NO_STATIC_CONSTS
465	static const int EOF_CHAR = EOF;
466	#else
467	enum {
468	EOF_CHAR = EOF
469	};
470	#endif
471	protected:
472	ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
473	/// flag indicating wether consume saves characters
474	bool saveConsumedInput;
475	factory_type tokenFactory; ///< Factory for tokens
476	bool caseSensitive; ///< Is this lexer case sensitive
477	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
478
479	RefToken _returnToken; ///< used to return tokens w/o using return val
480
481	/// Input state, gives access to input stream, shared among different lexers
482	LexerSharedInputState inputState;
483
484	/** Used during filter mode to indicate that path is desired.
485	* A subsequent scan error will report an error as usual
486	* if acceptPath=true;
487	*/
488	bool commitToPath;
489
490	int tabsize; ///< tab size the scanner uses.
491
492	/// Create a new RefToken of type t
493	virtual RefToken makeToken(int t)
494	{
495	RefToken tok = tokenFactory();
496	tok->setType(t);
497	tok->setColumn(inputState->tokenStartColumn);
498	tok->setLine(inputState->tokenStartLine);
499	return tok;
500	}
501
502	/** Tracer class, used when -traceLexer is passed to antlr
503	*/
504	class Tracer {
505	private:
506	CharScanner* parser;
507	const char* text;
508
509	Tracer(const Tracer& other); // undefined
510	Tracer& operator=(const Tracer& other); // undefined
511	public:
512	Tracer( CharScanner* p,const char* t )
513	: parser(p), text(t)
514	{
515	parser->traceIn(text);
516	}
517	~Tracer()
518	{
519	parser->traceOut(text);
520	}
521	};
522
523	int traceDepth;
524	private:
525	CharScanner( const CharScanner& other ); // undefined
526	CharScanner& operator=( const CharScanner& other ); // undefined
527
528	#ifndef NO_STATIC_CONSTS
529	static const int NO_CHAR = 0;
530	#else
531	enum {
532	NO_CHAR = 0
533	};
534	#endif
535	};
536
537	inline int CharScanner::LA(unsigned int i)
538	{
539	int c = inputState->getInput().LA(i);
540
541	if ( caseSensitive )
542	return c;
543	else
544	return toLower(c); // VC 6 tolower bug caught in toLower.
545	}
546
547	inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
548	{
549	if (scanner->getCaseSensitiveLiterals())
550	return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
551	else
552	{
553	#ifdef NO_STRCASECMP
554	return (stricmp(x.c_str(),y.c_str())<0);
555	#else
556	return (strcasecmp(x.c_str(),y.c_str())<0);
557	#endif
558	}
559	}
560
561	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
562	}
563	#endif
564
565	#endif //INC_CharScanner_hpp__