| 1 | #ifndef INC_TokenStreamRewriteEngine_hpp__ | 
| 2 | #define INC_TokenStreamRewriteEngine_hpp__ | 
| 3 |  | 
| 4 | /* ANTLR Translator Generator | 
| 5 | * Project led by Terence Parr at http://www.jGuru.com | 
| 6 | * Software rights: http://www.antlr.org/license.html | 
| 7 | */ | 
| 8 |  | 
| 9 | #include <string> | 
| 10 | #include <list> | 
| 11 | #include <vector> | 
| 12 | #include <map> | 
| 13 | #include <utility> | 
| 14 | #include <iostream> | 
| 15 | #include <iterator> | 
| 16 | #include <cassert> | 
| 17 | #include <algorithm> | 
| 18 |  | 
| 19 | #include <antlr/config.hpp> | 
| 20 |  | 
| 21 | #include <antlr/TokenStream.hpp> | 
| 22 | #include <antlr/TokenWithIndex.hpp> | 
| 23 | #include <antlr/BitSet.hpp> | 
| 24 |  | 
| 25 | #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE | 
| 26 | namespace antlr { | 
| 27 | #endif | 
| 28 |  | 
| 29 | /** This token stream tracks the *entire* token stream coming from | 
| 30 | *  a lexer, but does not pass on the whitespace (or whatever else | 
| 31 | *  you want to discard) to the parser. | 
| 32 | * | 
| 33 | *  This class can then be asked for the ith token in the input stream. | 
| 34 | *  Useful for dumping out the input stream exactly after doing some | 
| 35 | *  augmentation or other manipulations.  Tokens are index from 0..n-1 | 
| 36 | * | 
| 37 | *  You can insert stuff, replace, and delete chunks.  Note that the | 
| 38 | *  operations are done lazily--only if you convert the buffer to a | 
| 39 | *  String.  This is very efficient because you are not moving data around | 
| 40 | *  all the time.  As the buffer of tokens is converted to strings, the | 
| 41 | *  toString() method(s) check to see if there is an operation at the | 
| 42 | *  current index.  If so, the operation is done and then normal String | 
| 43 | *  rendering continues on the buffer.  This is like having multiple Turing | 
| 44 | *  machine instruction streams (programs) operating on a single input tape. :) | 
| 45 | * | 
| 46 | *  Since the operations are done lazily at toString-time, operations do not | 
| 47 | *  screw up the token index values.  That is, an insert operation at token | 
| 48 | *  index i does not change the index values for tokens i+1..n-1. | 
| 49 | * | 
| 50 | *  Because operations never actually alter the buffer, you may always get | 
| 51 | *  the original token stream back without undoing anything.  Since | 
| 52 | *  the instructions are queued up, you can easily simulate transactions and | 
| 53 | *  roll back any changes if there is an error just by removing instructions. | 
| 54 | *  For example, | 
| 55 | * | 
| 56 | *              TokenStreamRewriteEngine rewriteEngine = | 
| 57 | *                      new TokenStreamRewriteEngine(lexer); | 
| 58 | *      JavaRecognizer parser = new JavaRecognizer(rewriteEngine); | 
| 59 | *      ... | 
| 60 | *      rewriteEngine.insertAfter("pass1", t, "foobar");} | 
| 61 | *              rewriteEngine.insertAfter("pass2", u, "start");} | 
| 62 | *              System.out.println(rewriteEngine.toString("pass1")); | 
| 63 | *              System.out.println(rewriteEngine.toString("pass2")); | 
| 64 | * | 
| 65 | *  You can also have multiple "instruction streams" and get multiple | 
| 66 | *  rewrites from a single pass over the input.  Just name the instruction | 
| 67 | *  streams and use that name again when printing the buffer.  This could be | 
| 68 | *  useful for generating a C file and also its header file--all from the | 
| 69 | *  same buffer. | 
| 70 | * | 
| 71 | *  If you don't use named rewrite streams, a "default" stream is used. | 
| 72 | * | 
| 73 | *  Terence Parr, parrt@cs.usfca.edu | 
| 74 | *  University of San Francisco | 
| 75 | *  February 2004 | 
| 76 | */ | 
| 77 | class TokenStreamRewriteEngine : public TokenStream | 
| 78 | { | 
| 79 | public: | 
| 80 | typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list; | 
| 81 |  | 
| 82 | static const size_t MIN_TOKEN_INDEX = 0; | 
| 83 | static const char* DEFAULT_PROGRAM_NAME; | 
| 84 | static const int PROGRAM_INIT_SIZE = 100; | 
| 85 |  | 
| 86 | struct tokenToStream { | 
| 87 | tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {} | 
| 88 | template <typename T> void operator() ( const T& t ) { | 
| 89 | out << t->getText(); | 
| 90 | } | 
| 91 | ANTLR_USE_NAMESPACE(std)ostream& out; | 
| 92 | }; | 
| 93 |  | 
| 94 | class RewriteOperation { | 
| 95 | protected: | 
| 96 | RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt ) | 
| 97 | : index(idx), text(txt) | 
| 98 | { | 
| 99 | } | 
| 100 | public: | 
| 101 | virtual ~RewriteOperation() | 
| 102 | { | 
| 103 | } | 
| 104 | /** Execute the rewrite operation by possibly adding to the buffer. | 
| 105 | *  Return the index of the next token to operate on. | 
| 106 | */ | 
| 107 | virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) { | 
| 108 | return index; | 
| 109 | } | 
| 110 | virtual size_t getIndex() const { | 
| 111 | return index; | 
| 112 | } | 
| 113 | virtual const char* type() const { | 
| 114 | return "RewriteOperation"; | 
| 115 | } | 
| 116 | protected: | 
| 117 | size_t index; | 
| 118 | ANTLR_USE_NAMESPACE(std)string text; | 
| 119 | }; | 
| 120 |  | 
| 121 | struct executeOperation { | 
| 122 | ANTLR_USE_NAMESPACE(std)ostream& out; | 
| 123 | executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {} | 
| 124 | void operator () ( RewriteOperation* t ) { | 
| 125 | t->execute(out); | 
| 126 | } | 
| 127 | }; | 
| 128 |  | 
| 129 | /// list of rewrite operations | 
| 130 | typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list; | 
| 131 | /// map program name to <program counter,program> tuple | 
| 132 | typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map; | 
| 133 |  | 
| 134 | class InsertBeforeOp : public RewriteOperation | 
| 135 | { | 
| 136 | public: | 
| 137 | InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 138 | : RewriteOperation(index, text) | 
| 139 | { | 
| 140 | } | 
| 141 | virtual ~InsertBeforeOp() {} | 
| 142 | virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) | 
| 143 | { | 
| 144 | out << text; | 
| 145 | return index; | 
| 146 | } | 
| 147 | virtual const char* type() const { | 
| 148 | return "InsertBeforeOp"; | 
| 149 | } | 
| 150 | }; | 
| 151 |  | 
| 152 | class ReplaceOp : public RewriteOperation | 
| 153 | { | 
| 154 | public: | 
| 155 | ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text) | 
| 156 | : RewriteOperation(from,text) | 
| 157 | , lastIndex(to) | 
| 158 | { | 
| 159 | } | 
| 160 | virtual ~ReplaceOp() {} | 
| 161 | virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) { | 
| 162 | out << text; | 
| 163 | return lastIndex+1; | 
| 164 | } | 
| 165 | virtual const char* type() const { | 
| 166 | return "ReplaceOp"; | 
| 167 | } | 
| 168 | protected: | 
| 169 | size_t lastIndex; | 
| 170 | }; | 
| 171 |  | 
| 172 | class DeleteOp : public ReplaceOp { | 
| 173 | public: | 
| 174 | DeleteOp(size_t from, size_t to) | 
| 175 | : ReplaceOp(from,to,"") | 
| 176 | { | 
| 177 | } | 
| 178 | virtual const char* type() const { | 
| 179 | return "DeleteOp"; | 
| 180 | } | 
| 181 | }; | 
| 182 |  | 
| 183 | TokenStreamRewriteEngine(TokenStream& upstream); | 
| 184 |  | 
| 185 | TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize); | 
| 186 |  | 
| 187 | RefToken nextToken( void ); | 
| 188 |  | 
| 189 | void rollback(size_t instructionIndex) { | 
| 190 | rollback(DEFAULT_PROGRAM_NAME, instructionIndex); | 
| 191 | } | 
| 192 |  | 
| 193 | /** Rollback the instruction stream for a program so that | 
| 194 | *  the indicated instruction (via instructionIndex) is no | 
| 195 | *  longer in the stream.  UNTESTED! | 
| 196 | */ | 
| 197 | void rollback(const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 198 | size_t instructionIndex ); | 
| 199 |  | 
| 200 | void deleteProgram() { | 
| 201 | deleteProgram(DEFAULT_PROGRAM_NAME); | 
| 202 | } | 
| 203 |  | 
| 204 | /** Reset the program so that no instructions exist */ | 
| 205 | void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) { | 
| 206 | rollback(programName, MIN_TOKEN_INDEX); | 
| 207 | } | 
| 208 |  | 
| 209 | void insertAfter( RefTokenWithIndex t, | 
| 210 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 211 | { | 
| 212 | insertAfter(DEFAULT_PROGRAM_NAME, t, text); | 
| 213 | } | 
| 214 |  | 
| 215 | void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { | 
| 216 | insertAfter(DEFAULT_PROGRAM_NAME, index, text); | 
| 217 | } | 
| 218 |  | 
| 219 | void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 220 | RefTokenWithIndex t, | 
| 221 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 222 | { | 
| 223 | insertAfter(programName, t->getIndex(), text); | 
| 224 | } | 
| 225 |  | 
| 226 | void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 227 | size_t index, | 
| 228 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 229 | { | 
| 230 | // to insert after, just insert before next index (even if past end) | 
| 231 | insertBefore(programName,index+1, text); | 
| 232 | } | 
| 233 |  | 
| 234 | void insertBefore( RefTokenWithIndex t, | 
| 235 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 236 | { | 
| 237 | // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl; | 
| 238 | insertBefore(DEFAULT_PROGRAM_NAME, t, text); | 
| 239 | } | 
| 240 |  | 
| 241 | void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { | 
| 242 | insertBefore(DEFAULT_PROGRAM_NAME, index, text); | 
| 243 | } | 
| 244 |  | 
| 245 | void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 246 | RefTokenWithIndex t, | 
| 247 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 248 | { | 
| 249 | insertBefore(programName, t->getIndex(), text); | 
| 250 | } | 
| 251 |  | 
| 252 | void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 253 | size_t index, | 
| 254 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 255 | { | 
| 256 | addToSortedRewriteList(programName, new InsertBeforeOp(index,text)); | 
| 257 | } | 
| 258 |  | 
| 259 | void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) | 
| 260 | { | 
| 261 | replace(DEFAULT_PROGRAM_NAME, index, index, text); | 
| 262 | } | 
| 263 |  | 
| 264 | void replace( size_t from, size_t to, | 
| 265 | const ANTLR_USE_NAMESPACE(std)string& text) | 
| 266 | { | 
| 267 | replace(DEFAULT_PROGRAM_NAME, from, to, text); | 
| 268 | } | 
| 269 |  | 
| 270 | void replace( RefTokenWithIndex indexT, | 
| 271 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 272 | { | 
| 273 | replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text); | 
| 274 | } | 
| 275 |  | 
| 276 | void replace( RefTokenWithIndex from, | 
| 277 | RefTokenWithIndex to, | 
| 278 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 279 | { | 
| 280 | replace(DEFAULT_PROGRAM_NAME, from, to, text); | 
| 281 | } | 
| 282 |  | 
| 283 | void replace(const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 284 | size_t from, size_t to, | 
| 285 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 286 | { | 
| 287 | addToSortedRewriteList(programName,new ReplaceOp(from, to, text)); | 
| 288 | } | 
| 289 |  | 
| 290 | void replace( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 291 | RefTokenWithIndex from, | 
| 292 | RefTokenWithIndex to, | 
| 293 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 294 | { | 
| 295 | replace(programName, | 
| 296 | from->getIndex(), | 
| 297 | to->getIndex(), | 
| 298 | text); | 
| 299 | } | 
| 300 |  | 
| 301 | void remove(size_t index) { | 
| 302 | remove(DEFAULT_PROGRAM_NAME, index, index); | 
| 303 | } | 
| 304 |  | 
| 305 | void remove(size_t from, size_t to) { | 
| 306 | remove(DEFAULT_PROGRAM_NAME, from, to); | 
| 307 | } | 
| 308 |  | 
| 309 | void remove(RefTokenWithIndex indexT) { | 
| 310 | remove(DEFAULT_PROGRAM_NAME, indexT, indexT); | 
| 311 | } | 
| 312 |  | 
| 313 | void remove(RefTokenWithIndex from, RefTokenWithIndex to) { | 
| 314 | remove(DEFAULT_PROGRAM_NAME, from, to); | 
| 315 | } | 
| 316 |  | 
| 317 | void remove( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 318 | size_t from, size_t to) | 
| 319 | { | 
| 320 | replace(programName,from,to,""); | 
| 321 | } | 
| 322 |  | 
| 323 | void remove( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 324 | RefTokenWithIndex from, RefTokenWithIndex to ) | 
| 325 | { | 
| 326 | replace(programName,from,to,""); | 
| 327 | } | 
| 328 |  | 
| 329 | void discard(int ttype) { | 
| 330 | discardMask.add(ttype); | 
| 331 | } | 
| 332 |  | 
| 333 | RefToken getToken( size_t i ) | 
| 334 | { | 
| 335 | return RefToken(tokens.at(i)); | 
| 336 | } | 
| 337 |  | 
| 338 | size_t getTokenStreamSize() const { | 
| 339 | return tokens.size(); | 
| 340 | } | 
| 341 |  | 
| 342 | void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { | 
| 343 | ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) ); | 
| 344 | } | 
| 345 |  | 
| 346 | void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 347 | size_t start, size_t end ) const; | 
| 348 |  | 
| 349 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { | 
| 350 | return toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); | 
| 351 | } | 
| 352 |  | 
| 353 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 354 | const ANTLR_USE_NAMESPACE(std)string& programName ) const | 
| 355 | { | 
| 356 | return toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize()); | 
| 357 | } | 
| 358 |  | 
| 359 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 360 | size_t start, size_t end ) const | 
| 361 | { | 
| 362 | return toStream(out, DEFAULT_PROGRAM_NAME, start, end); | 
| 363 | } | 
| 364 |  | 
| 365 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 366 | const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 367 | size_t firstToken, size_t lastToken ) const; | 
| 368 |  | 
| 369 | void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { | 
| 370 | return toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); | 
| 371 | } | 
| 372 |  | 
| 373 | void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 374 | size_t start, size_t end ) const; | 
| 375 |  | 
| 376 | size_t getLastRewriteTokenIndex() const { | 
| 377 | return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); | 
| 378 | } | 
| 379 |  | 
| 380 | /** Return the last index for the program named programName | 
| 381 | * return 0 if the program does not exist or the program is empty. | 
| 382 | * (Note this is different from the java implementation that returns -1) | 
| 383 | */ | 
| 384 | size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const { | 
| 385 | program_map::const_iterator rewrites = programs.find(programName); | 
| 386 |  | 
| 387 | if( rewrites == programs.end() ) | 
| 388 | return 0; | 
| 389 |  | 
| 390 | const operation_list& prog = rewrites->second; | 
| 391 | if( !prog.empty() ) | 
| 392 | { | 
| 393 | operation_list::const_iterator last = prog.end(); | 
| 394 | --last; | 
| 395 | return (*last)->getIndex(); | 
| 396 | } | 
| 397 | return 0; | 
| 398 | } | 
| 399 |  | 
| 400 | protected: | 
| 401 | /** If op.index > lastRewriteTokenIndexes, just add to the end. | 
| 402 | *  Otherwise, do linear */ | 
| 403 | void addToSortedRewriteList(RewriteOperation* op) { | 
| 404 | addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op); | 
| 405 | } | 
| 406 |  | 
| 407 | void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 408 | RewriteOperation* op ); | 
| 409 |  | 
| 410 | protected: | 
| 411 | /** Who do we suck tokens from? */ | 
| 412 | TokenStream& stream; | 
| 413 | /** track index of tokens */ | 
| 414 | size_t index; | 
| 415 |  | 
| 416 | /** Track the incoming list of tokens */ | 
| 417 | token_list tokens; | 
| 418 |  | 
| 419 | /** You may have multiple, named streams of rewrite operations. | 
| 420 | *  I'm calling these things "programs." | 
| 421 | *  Maps String (name) -> rewrite (List) | 
| 422 | */ | 
| 423 | program_map programs; | 
| 424 |  | 
| 425 | /** Which (whitespace) token(s) to throw out */ | 
| 426 | BitSet discardMask; | 
| 427 | }; | 
| 428 |  | 
| 429 | #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE | 
| 430 | } | 
| 431 | #endif | 
| 432 |  | 
| 433 | #endif |