ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/OOPSE-2.0/src/antlr/TokenStreamRewriteEngine.hpp
Revision: 2469
Committed: Fri Dec 2 15:38:03 2005 UTC (18 years, 7 months ago) by tim
File size: 12539 byte(s)
Log Message:
End of the Link --> List
Return of the Oject-Oriented
replace yacc/lex parser with antlr parser

File Contents

# Content
1 #ifndef INC_TokenStreamRewriteEngine_hpp__
2 #define INC_TokenStreamRewriteEngine_hpp__
3
4 /* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 */
8
9 #include <string>
10 #include <list>
11 #include <vector>
12 #include <map>
13 #include <utility>
14 #include <iostream>
15 #include <iterator>
16 #include <cassert>
17 #include <algorithm>
18
19 #include <antlr/config.hpp>
20
21 #include <antlr/TokenStream.hpp>
22 #include <antlr/TokenWithIndex.hpp>
23 #include <antlr/BitSet.hpp>
24
25 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
26 namespace antlr {
27 #endif
28
29 /** This token stream tracks the *entire* token stream coming from
30 * a lexer, but does not pass on the whitespace (or whatever else
31 * you want to discard) to the parser.
32 *
33 * This class can then be asked for the ith token in the input stream.
34 * Useful for dumping out the input stream exactly after doing some
35 * augmentation or other manipulations. Tokens are index from 0..n-1
36 *
37 * You can insert stuff, replace, and delete chunks. Note that the
38 * operations are done lazily--only if you convert the buffer to a
39 * String. This is very efficient because you are not moving data around
40 * all the time. As the buffer of tokens is converted to strings, the
41 * toString() method(s) check to see if there is an operation at the
42 * current index. If so, the operation is done and then normal String
43 * rendering continues on the buffer. This is like having multiple Turing
44 * machine instruction streams (programs) operating on a single input tape. :)
45 *
46 * Since the operations are done lazily at toString-time, operations do not
47 * screw up the token index values. That is, an insert operation at token
48 * index i does not change the index values for tokens i+1..n-1.
49 *
50 * Because operations never actually alter the buffer, you may always get
51 * the original token stream back without undoing anything. Since
52 * the instructions are queued up, you can easily simulate transactions and
53 * roll back any changes if there is an error just by removing instructions.
54 * For example,
55 *
56 * TokenStreamRewriteEngine rewriteEngine =
57 * new TokenStreamRewriteEngine(lexer);
58 * JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
59 * ...
60 * rewriteEngine.insertAfter("pass1", t, "foobar");}
61 * rewriteEngine.insertAfter("pass2", u, "start");}
62 * System.out.println(rewriteEngine.toString("pass1"));
63 * System.out.println(rewriteEngine.toString("pass2"));
64 *
65 * You can also have multiple "instruction streams" and get multiple
66 * rewrites from a single pass over the input. Just name the instruction
67 * streams and use that name again when printing the buffer. This could be
68 * useful for generating a C file and also its header file--all from the
69 * same buffer.
70 *
71 * If you don't use named rewrite streams, a "default" stream is used.
72 *
73 * Terence Parr, parrt@cs.usfca.edu
74 * University of San Francisco
75 * February 2004
76 */
77 class TokenStreamRewriteEngine : public TokenStream
78 {
79 public:
80 typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
81
82 static const size_t MIN_TOKEN_INDEX = 0;
83 static const char* DEFAULT_PROGRAM_NAME;
84 static const int PROGRAM_INIT_SIZE = 100;
85
86 struct tokenToStream {
87 tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
88 template <typename T> void operator() ( const T& t ) {
89 out << t->getText();
90 }
91 ANTLR_USE_NAMESPACE(std)ostream& out;
92 };
93
94 class RewriteOperation {
95 protected:
96 RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
97 : index(idx), text(txt)
98 {
99 }
100 public:
101 virtual ~RewriteOperation()
102 {
103 }
104 /** Execute the rewrite operation by possibly adding to the buffer.
105 * Return the index of the next token to operate on.
106 */
107 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
108 return index;
109 }
110 virtual size_t getIndex() const {
111 return index;
112 }
113 virtual const char* type() const {
114 return "RewriteOperation";
115 }
116 protected:
117 size_t index;
118 ANTLR_USE_NAMESPACE(std)string text;
119 };
120
121 struct executeOperation {
122 ANTLR_USE_NAMESPACE(std)ostream& out;
123 executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
124 void operator () ( RewriteOperation* t ) {
125 t->execute(out);
126 }
127 };
128
129 /// list of rewrite operations
130 typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
131 /// map program name to <program counter,program> tuple
132 typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
133
134 class InsertBeforeOp : public RewriteOperation
135 {
136 public:
137 InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
138 : RewriteOperation(index, text)
139 {
140 }
141 virtual ~InsertBeforeOp() {}
142 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
143 {
144 out << text;
145 return index;
146 }
147 virtual const char* type() const {
148 return "InsertBeforeOp";
149 }
150 };
151
152 class ReplaceOp : public RewriteOperation
153 {
154 public:
155 ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
156 : RewriteOperation(from,text)
157 , lastIndex(to)
158 {
159 }
160 virtual ~ReplaceOp() {}
161 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
162 out << text;
163 return lastIndex+1;
164 }
165 virtual const char* type() const {
166 return "ReplaceOp";
167 }
168 protected:
169 size_t lastIndex;
170 };
171
172 class DeleteOp : public ReplaceOp {
173 public:
174 DeleteOp(size_t from, size_t to)
175 : ReplaceOp(from,to,"")
176 {
177 }
178 virtual const char* type() const {
179 return "DeleteOp";
180 }
181 };
182
183 TokenStreamRewriteEngine(TokenStream& upstream);
184
185 TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
186
187 RefToken nextToken( void );
188
189 void rollback(size_t instructionIndex) {
190 rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
191 }
192
193 /** Rollback the instruction stream for a program so that
194 * the indicated instruction (via instructionIndex) is no
195 * longer in the stream. UNTESTED!
196 */
197 void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
198 size_t instructionIndex );
199
200 void deleteProgram() {
201 deleteProgram(DEFAULT_PROGRAM_NAME);
202 }
203
204 /** Reset the program so that no instructions exist */
205 void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
206 rollback(programName, MIN_TOKEN_INDEX);
207 }
208
209 void insertAfter( RefTokenWithIndex t,
210 const ANTLR_USE_NAMESPACE(std)string& text )
211 {
212 insertAfter(DEFAULT_PROGRAM_NAME, t, text);
213 }
214
215 void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
216 insertAfter(DEFAULT_PROGRAM_NAME, index, text);
217 }
218
219 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
220 RefTokenWithIndex t,
221 const ANTLR_USE_NAMESPACE(std)string& text )
222 {
223 insertAfter(programName, t->getIndex(), text);
224 }
225
226 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
227 size_t index,
228 const ANTLR_USE_NAMESPACE(std)string& text )
229 {
230 // to insert after, just insert before next index (even if past end)
231 insertBefore(programName,index+1, text);
232 }
233
234 void insertBefore( RefTokenWithIndex t,
235 const ANTLR_USE_NAMESPACE(std)string& text )
236 {
237 // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
238 insertBefore(DEFAULT_PROGRAM_NAME, t, text);
239 }
240
241 void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
242 insertBefore(DEFAULT_PROGRAM_NAME, index, text);
243 }
244
245 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
246 RefTokenWithIndex t,
247 const ANTLR_USE_NAMESPACE(std)string& text )
248 {
249 insertBefore(programName, t->getIndex(), text);
250 }
251
252 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
253 size_t index,
254 const ANTLR_USE_NAMESPACE(std)string& text )
255 {
256 addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
257 }
258
259 void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
260 {
261 replace(DEFAULT_PROGRAM_NAME, index, index, text);
262 }
263
264 void replace( size_t from, size_t to,
265 const ANTLR_USE_NAMESPACE(std)string& text)
266 {
267 replace(DEFAULT_PROGRAM_NAME, from, to, text);
268 }
269
270 void replace( RefTokenWithIndex indexT,
271 const ANTLR_USE_NAMESPACE(std)string& text )
272 {
273 replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
274 }
275
276 void replace( RefTokenWithIndex from,
277 RefTokenWithIndex to,
278 const ANTLR_USE_NAMESPACE(std)string& text )
279 {
280 replace(DEFAULT_PROGRAM_NAME, from, to, text);
281 }
282
283 void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
284 size_t from, size_t to,
285 const ANTLR_USE_NAMESPACE(std)string& text )
286 {
287 addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
288 }
289
290 void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
291 RefTokenWithIndex from,
292 RefTokenWithIndex to,
293 const ANTLR_USE_NAMESPACE(std)string& text )
294 {
295 replace(programName,
296 from->getIndex(),
297 to->getIndex(),
298 text);
299 }
300
301 void remove(size_t index) {
302 remove(DEFAULT_PROGRAM_NAME, index, index);
303 }
304
305 void remove(size_t from, size_t to) {
306 remove(DEFAULT_PROGRAM_NAME, from, to);
307 }
308
309 void remove(RefTokenWithIndex indexT) {
310 remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
311 }
312
313 void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
314 remove(DEFAULT_PROGRAM_NAME, from, to);
315 }
316
317 void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
318 size_t from, size_t to)
319 {
320 replace(programName,from,to,"");
321 }
322
323 void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
324 RefTokenWithIndex from, RefTokenWithIndex to )
325 {
326 replace(programName,from,to,"");
327 }
328
329 void discard(int ttype) {
330 discardMask.add(ttype);
331 }
332
333 RefToken getToken( size_t i )
334 {
335 return RefToken(tokens.at(i));
336 }
337
338 size_t getTokenStreamSize() const {
339 return tokens.size();
340 }
341
342 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
343 ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
344 }
345
346 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
347 size_t start, size_t end ) const;
348
349 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
350 return toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
351 }
352
353 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
354 const ANTLR_USE_NAMESPACE(std)string& programName ) const
355 {
356 return toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
357 }
358
359 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
360 size_t start, size_t end ) const
361 {
362 return toStream(out, DEFAULT_PROGRAM_NAME, start, end);
363 }
364
365 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
366 const ANTLR_USE_NAMESPACE(std)string& programName,
367 size_t firstToken, size_t lastToken ) const;
368
369 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
370 return toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
371 }
372
373 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
374 size_t start, size_t end ) const;
375
376 size_t getLastRewriteTokenIndex() const {
377 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
378 }
379
380 /** Return the last index for the program named programName
381 * return 0 if the program does not exist or the program is empty.
382 * (Note this is different from the java implementation that returns -1)
383 */
384 size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
385 program_map::const_iterator rewrites = programs.find(programName);
386
387 if( rewrites == programs.end() )
388 return 0;
389
390 const operation_list& prog = rewrites->second;
391 if( !prog.empty() )
392 {
393 operation_list::const_iterator last = prog.end();
394 --last;
395 return (*last)->getIndex();
396 }
397 return 0;
398 }
399
400 protected:
401 /** If op.index > lastRewriteTokenIndexes, just add to the end.
402 * Otherwise, do linear */
403 void addToSortedRewriteList(RewriteOperation* op) {
404 addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
405 }
406
407 void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
408 RewriteOperation* op );
409
410 protected:
411 /** Who do we suck tokens from? */
412 TokenStream& stream;
413 /** track index of tokens */
414 size_t index;
415
416 /** Track the incoming list of tokens */
417 token_list tokens;
418
419 /** You may have multiple, named streams of rewrite operations.
420 * I'm calling these things "programs."
421 * Maps String (name) -> rewrite (List)
422 */
423 program_map programs;
424
425 /** Which (whitespace) token(s) to throw out */
426 BitSet discardMask;
427 };
428
429 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
430 }
431 #endif
432
433 #endif