| 1 | #ifndef INC_TokenStreamRewriteEngine_hpp__ | 
| 2 | #define INC_TokenStreamRewriteEngine_hpp__ | 
| 3 |  | 
| 4 | /* ANTLR Translator Generator | 
| 5 | * Project led by Terence Parr at http://www.jGuru.com | 
| 6 | * Software rights: http://www.antlr.org/license.html | 
| 7 | */ | 
| 8 |  | 
| 9 | #include <string> | 
| 10 | #include <list> | 
| 11 | #include <vector> | 
| 12 | #include <map> | 
| 13 | #include <utility> | 
| 14 | #include <iostream> | 
| 15 | #include <iterator> | 
| 16 | #include <cassert> | 
| 17 | #include <algorithm> | 
| 18 |  | 
| 19 | #include <antlr/config.hpp> | 
| 20 |  | 
| 21 | #include <antlr/TokenStream.hpp> | 
| 22 | #include <antlr/TokenWithIndex.hpp> | 
| 23 | #include <antlr/BitSet.hpp> | 
| 24 |  | 
| 25 | #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE | 
| 26 | namespace antlr { | 
| 27 | #endif | 
| 28 |  | 
| 29 | /** This token stream tracks the *entire* token stream coming from | 
| 30 | *       a lexer, but does not pass on the whitespace (or whatever else | 
| 31 | *       you want to discard) to the parser. | 
| 32 | * | 
| 33 | *       This class can then be asked for the ith token in the input stream. | 
| 34 | *       Useful for dumping out the input stream exactly after doing some | 
| 35 | *       augmentation or other manipulations.   Tokens are index from 0..n-1 | 
| 36 | * | 
| 37 | *       You can insert stuff, replace, and delete chunks.       Note that the | 
| 38 | *       operations are done lazily--only if you convert the buffer to a | 
| 39 | *       String.         This is very efficient because you are not moving data around | 
| 40 | *       all the time.   As the buffer of tokens is converted to strings, the | 
| 41 | *       toString() method(s) check to see if there is an operation at the | 
| 42 | *       current index.  If so, the operation is done and then normal String | 
| 43 | *       rendering continues on the buffer.      This is like having multiple Turing | 
| 44 | *       machine instruction streams (programs) operating on a single input tape. :) | 
| 45 | * | 
| 46 | *       Since the operations are done lazily at toString-time, operations do not | 
| 47 | *       screw up the token index values.  That is, an insert operation at token | 
| 48 | *       index i does not change the index values for tokens i+1..n-1. | 
| 49 | * | 
| 50 | *       Because operations never actually alter the buffer, you may always get | 
| 51 | *       the original token stream back without undoing anything.  Since | 
| 52 | *       the instructions are queued up, you can easily simulate transactions and | 
| 53 | *       roll back any changes if there is an error just by removing instructions. | 
| 54 | *       For example, | 
| 55 | * | 
| 56 | *                      TokenStreamRewriteEngine rewriteEngine = | 
| 57 | *                              new TokenStreamRewriteEngine(lexer); | 
| 58 | *                JavaRecognizer parser = new JavaRecognizer(rewriteEngine); | 
| 59 | *                ... | 
| 60 | *                rewriteEngine.insertAfter("pass1", t, "foobar");} | 
| 61 | *                      rewriteEngine.insertAfter("pass2", u, "start");} | 
| 62 | *                      System.out.println(rewriteEngine.toString("pass1")); | 
| 63 | *                      System.out.println(rewriteEngine.toString("pass2")); | 
| 64 | * | 
| 65 | *       You can also have multiple "instruction streams" and get multiple | 
| 66 | *       rewrites from a single pass over the input.     Just name the instruction | 
| 67 | *       streams and use that name again when printing the buffer.      This could be | 
| 68 | *       useful for generating a C file and also its header file--all from the | 
| 69 | *       same buffer. | 
| 70 | * | 
| 71 | *       If you don't use named rewrite streams, a "default" stream is used. | 
| 72 | * | 
| 73 | *       Terence Parr, parrt@cs.usfca.edu | 
| 74 | *       University of San Francisco | 
| 75 | *       February 2004 | 
| 76 | */ | 
| 77 | class TokenStreamRewriteEngine : public TokenStream | 
| 78 | { | 
| 79 | public: | 
| 80 | typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list; | 
| 81 | static const char* DEFAULT_PROGRAM_NAME; | 
| 82 | #ifndef NO_STATIC_CONSTS | 
| 83 | static const size_t MIN_TOKEN_INDEX; | 
| 84 | static const int PROGRAM_INIT_SIZE; | 
| 85 | #else | 
| 86 | enum { | 
| 87 | MIN_TOKEN_INDEX = 0, | 
| 88 | PROGRAM_INIT_SIZE = 100 | 
| 89 | }; | 
| 90 | #endif | 
| 91 |  | 
| 92 | struct tokenToStream { | 
| 93 | tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {} | 
| 94 | template <typename T> void operator() ( const T& t ) { | 
| 95 | out << t->getText(); | 
| 96 | } | 
| 97 | ANTLR_USE_NAMESPACE(std)ostream& out; | 
| 98 | }; | 
| 99 |  | 
| 100 | class RewriteOperation { | 
| 101 | protected: | 
| 102 | RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt ) | 
| 103 | : index(idx), text(txt) | 
| 104 | { | 
| 105 | } | 
| 106 | public: | 
| 107 | virtual ~RewriteOperation() | 
| 108 | { | 
| 109 | } | 
| 110 | /** Execute the rewrite operation by possibly adding to the buffer. | 
| 111 | *       Return the index of the next token to operate on. | 
| 112 | */ | 
| 113 | virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) { | 
| 114 | return index; | 
| 115 | } | 
| 116 | virtual size_t getIndex() const { | 
| 117 | return index; | 
| 118 | } | 
| 119 | virtual const char* type() const { | 
| 120 | return "RewriteOperation"; | 
| 121 | } | 
| 122 | protected: | 
| 123 | size_t index; | 
| 124 | ANTLR_USE_NAMESPACE(std)string text; | 
| 125 | }; | 
| 126 |  | 
| 127 | struct executeOperation { | 
| 128 | ANTLR_USE_NAMESPACE(std)ostream& out; | 
| 129 | executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {} | 
| 130 | void operator () ( RewriteOperation* t ) { | 
| 131 | t->execute(out); | 
| 132 | } | 
| 133 | }; | 
| 134 |  | 
| 135 | /// list of rewrite operations | 
| 136 | typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list; | 
| 137 | /// map program name to <program counter,program> tuple | 
| 138 | typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map; | 
| 139 |  | 
| 140 | class InsertBeforeOp : public RewriteOperation | 
| 141 | { | 
| 142 | public: | 
| 143 | InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 144 | : RewriteOperation(index, text) | 
| 145 | { | 
| 146 | } | 
| 147 | virtual ~InsertBeforeOp() {} | 
| 148 | virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) | 
| 149 | { | 
| 150 | out << text; | 
| 151 | return index; | 
| 152 | } | 
| 153 | virtual const char* type() const { | 
| 154 | return "InsertBeforeOp"; | 
| 155 | } | 
| 156 | }; | 
| 157 |  | 
| 158 | class ReplaceOp : public RewriteOperation | 
| 159 | { | 
| 160 | public: | 
| 161 | ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text) | 
| 162 | : RewriteOperation(from,text) | 
| 163 | , lastIndex(to) | 
| 164 | { | 
| 165 | } | 
| 166 | virtual ~ReplaceOp() {} | 
| 167 | virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) { | 
| 168 | out << text; | 
| 169 | return lastIndex+1; | 
| 170 | } | 
| 171 | virtual const char* type() const { | 
| 172 | return "ReplaceOp"; | 
| 173 | } | 
| 174 | protected: | 
| 175 | size_t lastIndex; | 
| 176 | }; | 
| 177 |  | 
| 178 | class DeleteOp : public ReplaceOp { | 
| 179 | public: | 
| 180 | DeleteOp(size_t from, size_t to) | 
| 181 | : ReplaceOp(from,to,"") | 
| 182 | { | 
| 183 | } | 
| 184 | virtual const char* type() const { | 
| 185 | return "DeleteOp"; | 
| 186 | } | 
| 187 | }; | 
| 188 |  | 
| 189 | TokenStreamRewriteEngine(TokenStream& upstream); | 
| 190 |  | 
| 191 | TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize); | 
| 192 |  | 
| 193 | RefToken nextToken( void ); | 
| 194 |  | 
| 195 | void rollback(size_t instructionIndex) { | 
| 196 | rollback(DEFAULT_PROGRAM_NAME, instructionIndex); | 
| 197 | } | 
| 198 |  | 
| 199 | /** Rollback the instruction stream for a program so that | 
| 200 | *       the indicated instruction (via instructionIndex) is no | 
| 201 | *       longer in the stream.  UNTESTED! | 
| 202 | */ | 
| 203 | void rollback(const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 204 | size_t instructionIndex ); | 
| 205 |  | 
| 206 | void deleteProgram() { | 
| 207 | deleteProgram(DEFAULT_PROGRAM_NAME); | 
| 208 | } | 
| 209 |  | 
| 210 | /** Reset the program so that no instructions exist */ | 
| 211 | void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) { | 
| 212 | rollback(programName, MIN_TOKEN_INDEX); | 
| 213 | } | 
| 214 |  | 
| 215 | void insertAfter( RefTokenWithIndex t, | 
| 216 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 217 | { | 
| 218 | insertAfter(DEFAULT_PROGRAM_NAME, t, text); | 
| 219 | } | 
| 220 |  | 
| 221 | void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { | 
| 222 | insertAfter(DEFAULT_PROGRAM_NAME, index, text); | 
| 223 | } | 
| 224 |  | 
| 225 | void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 226 | RefTokenWithIndex t, | 
| 227 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 228 | { | 
| 229 | insertAfter(programName, t->getIndex(), text); | 
| 230 | } | 
| 231 |  | 
| 232 | void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 233 | size_t index, | 
| 234 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 235 | { | 
| 236 | // to insert after, just insert before next index (even if past end) | 
| 237 | insertBefore(programName,index+1, text); | 
| 238 | } | 
| 239 |  | 
| 240 | void insertBefore( RefTokenWithIndex t, | 
| 241 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 242 | { | 
| 243 | // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl; | 
| 244 | insertBefore(DEFAULT_PROGRAM_NAME, t, text); | 
| 245 | } | 
| 246 |  | 
| 247 | void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { | 
| 248 | insertBefore(DEFAULT_PROGRAM_NAME, index, text); | 
| 249 | } | 
| 250 |  | 
| 251 | void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 252 | RefTokenWithIndex t, | 
| 253 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 254 | { | 
| 255 | insertBefore(programName, t->getIndex(), text); | 
| 256 | } | 
| 257 |  | 
| 258 | void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 259 | size_t index, | 
| 260 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 261 | { | 
| 262 | addToSortedRewriteList(programName, new InsertBeforeOp(index,text)); | 
| 263 | } | 
| 264 |  | 
| 265 | void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) | 
| 266 | { | 
| 267 | replace(DEFAULT_PROGRAM_NAME, index, index, text); | 
| 268 | } | 
| 269 |  | 
| 270 | void replace( size_t from, size_t to, | 
| 271 | const ANTLR_USE_NAMESPACE(std)string& text) | 
| 272 | { | 
| 273 | replace(DEFAULT_PROGRAM_NAME, from, to, text); | 
| 274 | } | 
| 275 |  | 
| 276 | void replace( RefTokenWithIndex indexT, | 
| 277 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 278 | { | 
| 279 | replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text); | 
| 280 | } | 
| 281 |  | 
| 282 | void replace( RefTokenWithIndex from, | 
| 283 | RefTokenWithIndex to, | 
| 284 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 285 | { | 
| 286 | replace(DEFAULT_PROGRAM_NAME, from, to, text); | 
| 287 | } | 
| 288 |  | 
| 289 | void replace(const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 290 | size_t from, size_t to, | 
| 291 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 292 | { | 
| 293 | addToSortedRewriteList(programName,new ReplaceOp(from, to, text)); | 
| 294 | } | 
| 295 |  | 
| 296 | void replace( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 297 | RefTokenWithIndex from, | 
| 298 | RefTokenWithIndex to, | 
| 299 | const ANTLR_USE_NAMESPACE(std)string& text ) | 
| 300 | { | 
| 301 | replace(programName, | 
| 302 | from->getIndex(), | 
| 303 | to->getIndex(), | 
| 304 | text); | 
| 305 | } | 
| 306 |  | 
| 307 | void remove(size_t index) { | 
| 308 | remove(DEFAULT_PROGRAM_NAME, index, index); | 
| 309 | } | 
| 310 |  | 
| 311 | void remove(size_t from, size_t to) { | 
| 312 | remove(DEFAULT_PROGRAM_NAME, from, to); | 
| 313 | } | 
| 314 |  | 
| 315 | void remove(RefTokenWithIndex indexT) { | 
| 316 | remove(DEFAULT_PROGRAM_NAME, indexT, indexT); | 
| 317 | } | 
| 318 |  | 
| 319 | void remove(RefTokenWithIndex from, RefTokenWithIndex to) { | 
| 320 | remove(DEFAULT_PROGRAM_NAME, from, to); | 
| 321 | } | 
| 322 |  | 
| 323 | void remove( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 324 | size_t from, size_t to) | 
| 325 | { | 
| 326 | replace(programName,from,to,""); | 
| 327 | } | 
| 328 |  | 
| 329 | void remove( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 330 | RefTokenWithIndex from, RefTokenWithIndex to ) | 
| 331 | { | 
| 332 | replace(programName,from,to,""); | 
| 333 | } | 
| 334 |  | 
| 335 | void discard(int ttype) { | 
| 336 | discardMask.add(ttype); | 
| 337 | } | 
| 338 |  | 
| 339 | RefToken getToken( size_t i ) | 
| 340 | { | 
| 341 | return RefToken(tokens.at(i)); | 
| 342 | } | 
| 343 |  | 
| 344 | size_t getTokenStreamSize() const { | 
| 345 | return tokens.size(); | 
| 346 | } | 
| 347 |  | 
| 348 | void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { | 
| 349 | ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) ); | 
| 350 | } | 
| 351 |  | 
| 352 | void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 353 | size_t start, size_t end ) const; | 
| 354 |  | 
| 355 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { | 
| 356 | toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); | 
| 357 | } | 
| 358 |  | 
| 359 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 360 | const ANTLR_USE_NAMESPACE(std)string& programName ) const | 
| 361 | { | 
| 362 | toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize()); | 
| 363 | } | 
| 364 |  | 
| 365 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 366 | size_t start, size_t end ) const | 
| 367 | { | 
| 368 | toStream(out, DEFAULT_PROGRAM_NAME, start, end); | 
| 369 | } | 
| 370 |  | 
| 371 | void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 372 | const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 373 | size_t firstToken, size_t lastToken ) const; | 
| 374 |  | 
| 375 | void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { | 
| 376 | toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); | 
| 377 | } | 
| 378 |  | 
| 379 | void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out, | 
| 380 | size_t start, size_t end ) const; | 
| 381 |  | 
| 382 | size_t getLastRewriteTokenIndex() const { | 
| 383 | return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); | 
| 384 | } | 
| 385 |  | 
| 386 | /** Return the last index for the program named programName | 
| 387 | * return 0 if the program does not exist or the program is empty. | 
| 388 | * (Note this is different from the java implementation that returns -1) | 
| 389 | */ | 
| 390 | size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const { | 
| 391 | program_map::const_iterator rewrites = programs.find(programName); | 
| 392 |  | 
| 393 | if( rewrites == programs.end() ) | 
| 394 | return 0; | 
| 395 |  | 
| 396 | const operation_list& prog = rewrites->second; | 
| 397 | if( !prog.empty() ) | 
| 398 | { | 
| 399 | operation_list::const_iterator last = prog.end(); | 
| 400 | --last; | 
| 401 | return (*last)->getIndex(); | 
| 402 | } | 
| 403 | return 0; | 
| 404 | } | 
| 405 |  | 
| 406 | protected: | 
| 407 | /** If op.index > lastRewriteTokenIndexes, just add to the end. | 
| 408 | *       Otherwise, do linear */ | 
| 409 | void addToSortedRewriteList(RewriteOperation* op) { | 
| 410 | addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op); | 
| 411 | } | 
| 412 |  | 
| 413 | void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName, | 
| 414 | RewriteOperation* op ); | 
| 415 |  | 
| 416 | protected: | 
| 417 | /** Who do we suck tokens from? */ | 
| 418 | TokenStream& stream; | 
| 419 | /** track index of tokens */ | 
| 420 | size_t index; | 
| 421 |  | 
| 422 | /** Track the incoming list of tokens */ | 
| 423 | token_list tokens; | 
| 424 |  | 
| 425 | /** You may have multiple, named streams of rewrite operations. | 
| 426 | *  I'm calling these things "programs." | 
| 427 | *  Maps String (name) -> rewrite (List) | 
| 428 | */ | 
| 429 | program_map programs; | 
| 430 |  | 
| 431 | /** Which (whitespace) token(s) to throw out */ | 
| 432 | BitSet discardMask; | 
| 433 | }; | 
| 434 |  | 
| 435 | #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE | 
| 436 | } | 
| 437 | #endif | 
| 438 |  | 
| 439 | #endif |