ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-2.0/src/antlr/TokenStreamRewriteEngine.hpp
Revision: 2469
Committed: Fri Dec 2 15:38:03 2005 UTC (18 years, 7 months ago) by tim
File size: 12539 byte(s)
Log Message:
End of the Link --> List
Return of the Oject-Oriented
replace yacc/lex parser with antlr parser

File Contents

# User Rev Content
1 tim 2469 #ifndef INC_TokenStreamRewriteEngine_hpp__
2     #define INC_TokenStreamRewriteEngine_hpp__
3    
4     /* ANTLR Translator Generator
5     * Project led by Terence Parr at http://www.jGuru.com
6     * Software rights: http://www.antlr.org/license.html
7     */
8    
9     #include <string>
10     #include <list>
11     #include <vector>
12     #include <map>
13     #include <utility>
14     #include <iostream>
15     #include <iterator>
16     #include <cassert>
17     #include <algorithm>
18    
19     #include <antlr/config.hpp>
20    
21     #include <antlr/TokenStream.hpp>
22     #include <antlr/TokenWithIndex.hpp>
23     #include <antlr/BitSet.hpp>
24    
25     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
26     namespace antlr {
27     #endif
28    
29     /** This token stream tracks the *entire* token stream coming from
30     * a lexer, but does not pass on the whitespace (or whatever else
31     * you want to discard) to the parser.
32     *
33     * This class can then be asked for the ith token in the input stream.
34     * Useful for dumping out the input stream exactly after doing some
35     * augmentation or other manipulations. Tokens are index from 0..n-1
36     *
37     * You can insert stuff, replace, and delete chunks. Note that the
38     * operations are done lazily--only if you convert the buffer to a
39     * String. This is very efficient because you are not moving data around
40     * all the time. As the buffer of tokens is converted to strings, the
41     * toString() method(s) check to see if there is an operation at the
42     * current index. If so, the operation is done and then normal String
43     * rendering continues on the buffer. This is like having multiple Turing
44     * machine instruction streams (programs) operating on a single input tape. :)
45     *
46     * Since the operations are done lazily at toString-time, operations do not
47     * screw up the token index values. That is, an insert operation at token
48     * index i does not change the index values for tokens i+1..n-1.
49     *
50     * Because operations never actually alter the buffer, you may always get
51     * the original token stream back without undoing anything. Since
52     * the instructions are queued up, you can easily simulate transactions and
53     * roll back any changes if there is an error just by removing instructions.
54     * For example,
55     *
56     * TokenStreamRewriteEngine rewriteEngine =
57     * new TokenStreamRewriteEngine(lexer);
58     * JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
59     * ...
60     * rewriteEngine.insertAfter("pass1", t, "foobar");}
61     * rewriteEngine.insertAfter("pass2", u, "start");}
62     * System.out.println(rewriteEngine.toString("pass1"));
63     * System.out.println(rewriteEngine.toString("pass2"));
64     *
65     * You can also have multiple "instruction streams" and get multiple
66     * rewrites from a single pass over the input. Just name the instruction
67     * streams and use that name again when printing the buffer. This could be
68     * useful for generating a C file and also its header file--all from the
69     * same buffer.
70     *
71     * If you don't use named rewrite streams, a "default" stream is used.
72     *
73     * Terence Parr, parrt@cs.usfca.edu
74     * University of San Francisco
75     * February 2004
76     */
77     class TokenStreamRewriteEngine : public TokenStream
78     {
79     public:
80     typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
81    
82     static const size_t MIN_TOKEN_INDEX = 0;
83     static const char* DEFAULT_PROGRAM_NAME;
84     static const int PROGRAM_INIT_SIZE = 100;
85    
86     struct tokenToStream {
87     tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
88     template <typename T> void operator() ( const T& t ) {
89     out << t->getText();
90     }
91     ANTLR_USE_NAMESPACE(std)ostream& out;
92     };
93    
94     class RewriteOperation {
95     protected:
96     RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
97     : index(idx), text(txt)
98     {
99     }
100     public:
101     virtual ~RewriteOperation()
102     {
103     }
104     /** Execute the rewrite operation by possibly adding to the buffer.
105     * Return the index of the next token to operate on.
106     */
107     virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
108     return index;
109     }
110     virtual size_t getIndex() const {
111     return index;
112     }
113     virtual const char* type() const {
114     return "RewriteOperation";
115     }
116     protected:
117     size_t index;
118     ANTLR_USE_NAMESPACE(std)string text;
119     };
120    
121     struct executeOperation {
122     ANTLR_USE_NAMESPACE(std)ostream& out;
123     executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
124     void operator () ( RewriteOperation* t ) {
125     t->execute(out);
126     }
127     };
128    
129     /// list of rewrite operations
130     typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
131     /// map program name to <program counter,program> tuple
132     typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
133    
134     class InsertBeforeOp : public RewriteOperation
135     {
136     public:
137     InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
138     : RewriteOperation(index, text)
139     {
140     }
141     virtual ~InsertBeforeOp() {}
142     virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
143     {
144     out << text;
145     return index;
146     }
147     virtual const char* type() const {
148     return "InsertBeforeOp";
149     }
150     };
151    
152     class ReplaceOp : public RewriteOperation
153     {
154     public:
155     ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
156     : RewriteOperation(from,text)
157     , lastIndex(to)
158     {
159     }
160     virtual ~ReplaceOp() {}
161     virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
162     out << text;
163     return lastIndex+1;
164     }
165     virtual const char* type() const {
166     return "ReplaceOp";
167     }
168     protected:
169     size_t lastIndex;
170     };
171    
172     class DeleteOp : public ReplaceOp {
173     public:
174     DeleteOp(size_t from, size_t to)
175     : ReplaceOp(from,to,"")
176     {
177     }
178     virtual const char* type() const {
179     return "DeleteOp";
180     }
181     };
182    
183     TokenStreamRewriteEngine(TokenStream& upstream);
184    
185     TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
186    
187     RefToken nextToken( void );
188    
189     void rollback(size_t instructionIndex) {
190     rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
191     }
192    
193     /** Rollback the instruction stream for a program so that
194     * the indicated instruction (via instructionIndex) is no
195     * longer in the stream. UNTESTED!
196     */
197     void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
198     size_t instructionIndex );
199    
200     void deleteProgram() {
201     deleteProgram(DEFAULT_PROGRAM_NAME);
202     }
203    
204     /** Reset the program so that no instructions exist */
205     void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
206     rollback(programName, MIN_TOKEN_INDEX);
207     }
208    
209     void insertAfter( RefTokenWithIndex t,
210     const ANTLR_USE_NAMESPACE(std)string& text )
211     {
212     insertAfter(DEFAULT_PROGRAM_NAME, t, text);
213     }
214    
215     void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
216     insertAfter(DEFAULT_PROGRAM_NAME, index, text);
217     }
218    
219     void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
220     RefTokenWithIndex t,
221     const ANTLR_USE_NAMESPACE(std)string& text )
222     {
223     insertAfter(programName, t->getIndex(), text);
224     }
225    
226     void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
227     size_t index,
228     const ANTLR_USE_NAMESPACE(std)string& text )
229     {
230     // to insert after, just insert before next index (even if past end)
231     insertBefore(programName,index+1, text);
232     }
233    
234     void insertBefore( RefTokenWithIndex t,
235     const ANTLR_USE_NAMESPACE(std)string& text )
236     {
237     // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
238     insertBefore(DEFAULT_PROGRAM_NAME, t, text);
239     }
240    
241     void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
242     insertBefore(DEFAULT_PROGRAM_NAME, index, text);
243     }
244    
245     void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
246     RefTokenWithIndex t,
247     const ANTLR_USE_NAMESPACE(std)string& text )
248     {
249     insertBefore(programName, t->getIndex(), text);
250     }
251    
252     void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
253     size_t index,
254     const ANTLR_USE_NAMESPACE(std)string& text )
255     {
256     addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
257     }
258    
259     void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
260     {
261     replace(DEFAULT_PROGRAM_NAME, index, index, text);
262     }
263    
264     void replace( size_t from, size_t to,
265     const ANTLR_USE_NAMESPACE(std)string& text)
266     {
267     replace(DEFAULT_PROGRAM_NAME, from, to, text);
268     }
269    
270     void replace( RefTokenWithIndex indexT,
271     const ANTLR_USE_NAMESPACE(std)string& text )
272     {
273     replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
274     }
275    
276     void replace( RefTokenWithIndex from,
277     RefTokenWithIndex to,
278     const ANTLR_USE_NAMESPACE(std)string& text )
279     {
280     replace(DEFAULT_PROGRAM_NAME, from, to, text);
281     }
282    
283     void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
284     size_t from, size_t to,
285     const ANTLR_USE_NAMESPACE(std)string& text )
286     {
287     addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
288     }
289    
290     void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
291     RefTokenWithIndex from,
292     RefTokenWithIndex to,
293     const ANTLR_USE_NAMESPACE(std)string& text )
294     {
295     replace(programName,
296     from->getIndex(),
297     to->getIndex(),
298     text);
299     }
300    
301     void remove(size_t index) {
302     remove(DEFAULT_PROGRAM_NAME, index, index);
303     }
304    
305     void remove(size_t from, size_t to) {
306     remove(DEFAULT_PROGRAM_NAME, from, to);
307     }
308    
309     void remove(RefTokenWithIndex indexT) {
310     remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
311     }
312    
313     void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
314     remove(DEFAULT_PROGRAM_NAME, from, to);
315     }
316    
317     void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
318     size_t from, size_t to)
319     {
320     replace(programName,from,to,"");
321     }
322    
323     void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
324     RefTokenWithIndex from, RefTokenWithIndex to )
325     {
326     replace(programName,from,to,"");
327     }
328    
329     void discard(int ttype) {
330     discardMask.add(ttype);
331     }
332    
333     RefToken getToken( size_t i )
334     {
335     return RefToken(tokens.at(i));
336     }
337    
338     size_t getTokenStreamSize() const {
339     return tokens.size();
340     }
341    
342     void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
343     ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
344     }
345    
346     void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
347     size_t start, size_t end ) const;
348    
349     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
350     return toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
351     }
352    
353     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
354     const ANTLR_USE_NAMESPACE(std)string& programName ) const
355     {
356     return toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
357     }
358    
359     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
360     size_t start, size_t end ) const
361     {
362     return toStream(out, DEFAULT_PROGRAM_NAME, start, end);
363     }
364    
365     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
366     const ANTLR_USE_NAMESPACE(std)string& programName,
367     size_t firstToken, size_t lastToken ) const;
368    
369     void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
370     return toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
371     }
372    
373     void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
374     size_t start, size_t end ) const;
375    
376     size_t getLastRewriteTokenIndex() const {
377     return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
378     }
379    
380     /** Return the last index for the program named programName
381     * return 0 if the program does not exist or the program is empty.
382     * (Note this is different from the java implementation that returns -1)
383     */
384     size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
385     program_map::const_iterator rewrites = programs.find(programName);
386    
387     if( rewrites == programs.end() )
388     return 0;
389    
390     const operation_list& prog = rewrites->second;
391     if( !prog.empty() )
392     {
393     operation_list::const_iterator last = prog.end();
394     --last;
395     return (*last)->getIndex();
396     }
397     return 0;
398     }
399    
400     protected:
401     /** If op.index > lastRewriteTokenIndexes, just add to the end.
402     * Otherwise, do linear */
403     void addToSortedRewriteList(RewriteOperation* op) {
404     addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
405     }
406    
407     void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
408     RewriteOperation* op );
409    
410     protected:
411     /** Who do we suck tokens from? */
412     TokenStream& stream;
413     /** track index of tokens */
414     size_t index;
415    
416     /** Track the incoming list of tokens */
417     token_list tokens;
418    
419     /** You may have multiple, named streams of rewrite operations.
420     * I'm calling these things "programs."
421     * Maps String (name) -> rewrite (List)
422     */
423     program_map programs;
424    
425     /** Which (whitespace) token(s) to throw out */
426     BitSet discardMask;
427     };
428    
429     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
430     }
431     #endif
432    
433     #endif