OpenMD 3.0
Molecular Dynamics in the Open
Loading...
Searching...
No Matches
TokenStreamRewriteEngine.hpp
1#ifndef INC_TokenStreamRewriteEngine_hpp__
2#define INC_TokenStreamRewriteEngine_hpp__
3
4/* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 */
8
9#include <string>
10#include <list>
11#include <vector>
12#include <map>
13#include <utility>
14#include <iostream>
15#include <iterator>
16#include <cassert>
17#include <algorithm>
18
19#include <antlr/config.hpp>
20
21#include <antlr/TokenStream.hpp>
22#include <antlr/TokenWithIndex.hpp>
23#include <antlr/BitSet.hpp>
24
25#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
26namespace antlr {
27#endif
28
29/** This token stream tracks the *entire* token stream coming from
30 * a lexer, but does not pass on the whitespace (or whatever else
31 * you want to discard) to the parser.
32 *
33 * This class can then be asked for the ith token in the input stream.
34 * Useful for dumping out the input stream exactly after doing some
35 * augmentation or other manipulations. Tokens are index from 0..n-1
36 *
37 * You can insert stuff, replace, and delete chunks. Note that the
38 * operations are done lazily--only if you convert the buffer to a
39 * String. This is very efficient because you are not moving data around
40 * all the time. As the buffer of tokens is converted to strings, the
41 * toString() method(s) check to see if there is an operation at the
42 * current index. If so, the operation is done and then normal String
43 * rendering continues on the buffer. This is like having multiple Turing
44 * machine instruction streams (programs) operating on a single input tape. :)
45 *
46 * Since the operations are done lazily at toString-time, operations do not
47 * screw up the token index values. That is, an insert operation at token
48 * index i does not change the index values for tokens i+1..n-1.
49 *
50 * Because operations never actually alter the buffer, you may always get
51 * the original token stream back without undoing anything. Since
52 * the instructions are queued up, you can easily simulate transactions and
53 * roll back any changes if there is an error just by removing instructions.
54 * For example,
55 *
56 * TokenStreamRewriteEngine rewriteEngine =
57 * new TokenStreamRewriteEngine(lexer);
58 * JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
59 * ...
60 * rewriteEngine.insertAfter("pass1", t, "foobar");}
61 * rewriteEngine.insertAfter("pass2", u, "start");}
62 * System.out.println(rewriteEngine.toString("pass1"));
63 * System.out.println(rewriteEngine.toString("pass2"));
64 *
65 * You can also have multiple "instruction streams" and get multiple
66 * rewrites from a single pass over the input. Just name the instruction
67 * streams and use that name again when printing the buffer. This could be
68 * useful for generating a C file and also its header file--all from the
69 * same buffer.
70 *
71 * If you don't use named rewrite streams, a "default" stream is used.
72 *
73 * Terence Parr, parrt@cs.usfca.edu
74 * University of San Francisco
75 * February 2004
76 */
78{
79public:
80 typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
81 static const char* DEFAULT_PROGRAM_NAME;
82#ifndef NO_STATIC_CONSTS
83 static const size_t MIN_TOKEN_INDEX;
84 static const int PROGRAM_INIT_SIZE;
85#else
86 enum {
87 MIN_TOKEN_INDEX = 0,
88 PROGRAM_INIT_SIZE = 100
89 };
90#endif
91
93 tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
94 template <typename T> void operator() ( const T& t ) {
95 out << t->getText();
96 }
97 ANTLR_USE_NAMESPACE(std)ostream& out;
98 };
99
101 protected:
102 RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
103 : index(idx), text(txt)
104 {
105 }
106 public:
107 virtual ~RewriteOperation()
108 {
109 }
110 /** Execute the rewrite operation by possibly adding to the buffer.
111 * Return the index of the next token to operate on.
112 */
113 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
114 return index;
115 }
116 virtual size_t getIndex() const {
117 return index;
118 }
119 virtual const char* type() const {
120 return "RewriteOperation";
121 }
122 protected:
123 size_t index;
124 ANTLR_USE_NAMESPACE(std)string text;
125 };
126
128 ANTLR_USE_NAMESPACE(std)ostream& out;
129 executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
130 void operator () ( RewriteOperation* t ) {
131 t->execute(out);
132 }
133 };
134
135 /// list of rewrite operations
136 typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
137 /// map program name to <program counter,program> tuple
138 typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
139
141 {
142 public:
143 InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
144 : RewriteOperation(index, text)
145 {
146 }
147 virtual ~InsertBeforeOp() {}
148 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
149 {
150 out << text;
151 return index;
152 }
153 virtual const char* type() const {
154 return "InsertBeforeOp";
155 }
156 };
157
159 {
160 public:
161 ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
162 : RewriteOperation(from,text)
163 , lastIndex(to)
164 {
165 }
166 virtual ~ReplaceOp() {}
167 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
168 out << text;
169 return lastIndex+1;
170 }
171 virtual const char* type() const {
172 return "ReplaceOp";
173 }
174 protected:
175 size_t lastIndex;
176 };
177
178 class DeleteOp : public ReplaceOp {
179 public:
180 DeleteOp(size_t from, size_t to)
181 : ReplaceOp(from,to,"")
182 {
183 }
184 virtual const char* type() const {
185 return "DeleteOp";
186 }
187 };
188
190
191 TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
192
193 RefToken nextToken( void );
194
195 void rollback(size_t instructionIndex) {
196 rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
197 }
198
199 /** Rollback the instruction stream for a program so that
200 * the indicated instruction (via instructionIndex) is no
201 * longer in the stream. UNTESTED!
202 */
203 void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
204 size_t instructionIndex );
205
206 void deleteProgram() {
207 deleteProgram(DEFAULT_PROGRAM_NAME);
208 }
209
210 /** Reset the program so that no instructions exist */
211 void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
212 rollback(programName, MIN_TOKEN_INDEX);
213 }
214
215 void insertAfter( RefTokenWithIndex t,
216 const ANTLR_USE_NAMESPACE(std)string& text )
217 {
218 insertAfter(DEFAULT_PROGRAM_NAME, t, text);
219 }
220
221 void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
222 insertAfter(DEFAULT_PROGRAM_NAME, index, text);
223 }
224
225 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
226 RefTokenWithIndex t,
227 const ANTLR_USE_NAMESPACE(std)string& text )
228 {
229 insertAfter(programName, t->getIndex(), text);
230 }
231
232 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
233 size_t index,
234 const ANTLR_USE_NAMESPACE(std)string& text )
235 {
236 // to insert after, just insert before next index (even if past end)
237 insertBefore(programName,index+1, text);
238 }
239
240 void insertBefore( RefTokenWithIndex t,
241 const ANTLR_USE_NAMESPACE(std)string& text )
242 {
243 // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
244 insertBefore(DEFAULT_PROGRAM_NAME, t, text);
245 }
246
247 void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
248 insertBefore(DEFAULT_PROGRAM_NAME, index, text);
249 }
250
251 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
252 RefTokenWithIndex t,
253 const ANTLR_USE_NAMESPACE(std)string& text )
254 {
255 insertBefore(programName, t->getIndex(), text);
256 }
257
258 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
259 size_t index,
260 const ANTLR_USE_NAMESPACE(std)string& text )
261 {
262 addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
263 }
264
265 void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
266 {
267 replace(DEFAULT_PROGRAM_NAME, index, index, text);
268 }
269
270 void replace( size_t from, size_t to,
271 const ANTLR_USE_NAMESPACE(std)string& text)
272 {
273 replace(DEFAULT_PROGRAM_NAME, from, to, text);
274 }
275
276 void replace( RefTokenWithIndex indexT,
277 const ANTLR_USE_NAMESPACE(std)string& text )
278 {
279 replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
280 }
281
282 void replace( RefTokenWithIndex from,
283 RefTokenWithIndex to,
284 const ANTLR_USE_NAMESPACE(std)string& text )
285 {
286 replace(DEFAULT_PROGRAM_NAME, from, to, text);
287 }
288
289 void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
290 size_t from, size_t to,
291 const ANTLR_USE_NAMESPACE(std)string& text )
292 {
293 addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
294 }
295
296 void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
297 RefTokenWithIndex from,
298 RefTokenWithIndex to,
299 const ANTLR_USE_NAMESPACE(std)string& text )
300 {
301 replace(programName,
302 from->getIndex(),
303 to->getIndex(),
304 text);
305 }
306
307 void remove(size_t index) {
308 remove(DEFAULT_PROGRAM_NAME, index, index);
309 }
310
311 void remove(size_t from, size_t to) {
312 remove(DEFAULT_PROGRAM_NAME, from, to);
313 }
314
315 void remove(RefTokenWithIndex indexT) {
316 remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
317 }
318
319 void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
320 remove(DEFAULT_PROGRAM_NAME, from, to);
321 }
322
323 void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
324 size_t from, size_t to)
325 {
326 replace(programName,from,to,"");
327 }
328
329 void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
330 RefTokenWithIndex from, RefTokenWithIndex to )
331 {
332 replace(programName,from,to,"");
333 }
334
335 void discard(int ttype) {
336 discardMask.add(ttype);
337 }
338
339 RefToken getToken( size_t i )
340 {
341 return RefToken(tokens.at(i));
342 }
343
344 size_t getTokenStreamSize() const {
345 return tokens.size();
346 }
347
348 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
349 ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
350 }
351
352 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
353 size_t start, size_t end ) const;
354
355 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
356 toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
357 }
358
359 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
360 const ANTLR_USE_NAMESPACE(std)string& programName ) const
361 {
362 toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
363 }
364
365 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
366 size_t start, size_t end ) const
367 {
368 toStream(out, DEFAULT_PROGRAM_NAME, start, end);
369 }
370
371 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
372 const ANTLR_USE_NAMESPACE(std)string& programName,
373 size_t firstToken, size_t lastToken ) const;
374
375 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
376 toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
377 }
378
379 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
380 size_t start, size_t end ) const;
381
382 size_t getLastRewriteTokenIndex() const {
383 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
384 }
385
386 /** Return the last index for the program named programName
387 * return 0 if the program does not exist or the program is empty.
388 * (Note this is different from the java implementation that returns -1)
389 */
390 size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
391 program_map::const_iterator rewrites = programs.find(programName);
392
393 if( rewrites == programs.end() )
394 return 0;
395
396 const operation_list& prog = rewrites->second;
397 if( !prog.empty() )
398 {
399 operation_list::const_iterator last = prog.end();
400 --last;
401 return (*last)->getIndex();
402 }
403 return 0;
404 }
405
406protected:
407 /** If op.index > lastRewriteTokenIndexes, just add to the end.
408 * Otherwise, do linear */
410 addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
411 }
412
413 void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
414 RewriteOperation* op );
415
416protected:
417 /** Who do we suck tokens from? */
419 /** track index of tokens */
420 size_t index;
421
422 /** Track the incoming list of tokens */
423 token_list tokens;
424
425 /** You may have multiple, named streams of rewrite operations.
426 * I'm calling these things "programs."
427 * Maps String (name) -> rewrite (List)
428 */
430
431 /** Which (whitespace) token(s) to throw out */
433};
434
435#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
436}
437#endif
438
439#endif
A BitSet to replace java.util.BitSet.
Definition BitSet.hpp:40
This interface allows any object to pretend it is a stream of tokens.
virtual size_t execute(std ::ostream &out)
Execute the rewrite operation by possibly adding to the buffer.
virtual size_t execute(std ::ostream &out)
Execute the rewrite operation by possibly adding to the buffer.
virtual size_t execute(std ::ostream &)
Execute the rewrite operation by possibly adding to the buffer.
This token stream tracks the entire token stream coming from a lexer, but does not pass on the whites...
std ::list< RewriteOperation * > operation_list
list of rewrite operations
void addToSortedRewriteList(RewriteOperation *op)
If op.index > lastRewriteTokenIndexes, just add to the end.
TokenStream & stream
Who do we suck tokens from?
BitSet discardMask
Which (whitespace) token(s) to throw out.
size_t getLastRewriteTokenIndex(const std ::string &programName) const
Return the last index for the program named programName return 0 if the program does not exist or the...
void rollback(const std ::string &programName, size_t instructionIndex)
Rollback the instruction stream for a program so that the indicated instruction (via instructionIndex...
void deleteProgram(const std ::string &programName)
Reset the program so that no instructions exist.
token_list tokens
Track the incoming list of tokens.
program_map programs
You may have multiple, named streams of rewrite operations.
std ::map< std ::string, operation_list > program_map
map program name to <program counter,program> tuple