src/utils/StringTokenizer.cpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 *
 * SUPPORT OPEN SCIENCE!  If you use OpenMD or its source code in your
 * research, please cite the appropriate papers when you publish your
 * work.  Good starting points are:
 *                                                                      
 * [1]  Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).             
 * [2]  Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).          
 * [3]  Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).          
 * [4]  Kuang & Gezelter,  J. Chem. Phys. 133, 164101 (2010).
 * [5]  Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
 */
 
#include <iostream>
#include <iterator>
#include <sstream>
#include "utils/StringTokenizer.hpp"

namespace OpenMD {


  StringTokenizer::StringTokenizer(const std::string & str, const std::string & delim) 
    : tokenString_(str), delim_(delim), returnTokens_(false),
      currentPos_(tokenString_.begin()), end_(tokenString_.end()){

  }

  StringTokenizer::StringTokenizer(std::string::const_iterator& first, std::string::const_iterator& last,
                                   const std::string & delim)  
    : tokenString_(first, last) , delim_(delim), returnTokens_(false),
      currentPos_(tokenString_.begin()), end_(tokenString_.end()) {

  }

  StringTokenizer::StringTokenizer(const std::string&str, const std::string&delim,
                                   bool returnTokens)
    : tokenString_(str), delim_(delim), returnTokens_(returnTokens),
      currentPos_(tokenString_.begin()), end_(tokenString_.end()) {

  }

  bool StringTokenizer::isDelimiter(const char c) {
    return delim_.find(c) == std::string::npos ? false : true;
  }

  int StringTokenizer::countTokens() {
    
    std::string::const_iterator tmpIter = currentPos_;    
    int numToken = 0;

    while (true) {

      //skip delimiter first
      while( tmpIter != end_ && isDelimiter(*tmpIter)) {
        ++tmpIter;

        if (returnTokens_) {
          //if delimiter is consider as token
          ++numToken;
        }
      }
        
      if (tmpIter == end_) {
        break;
      }
        
      //encount a token here
      while ( tmpIter != end_ && !isDelimiter(*tmpIter) ) {
        ++tmpIter;
      }

      ++numToken;

    }

    return numToken;
  }

  bool StringTokenizer::hasMoreTokens() {
    
    if (currentPos_ == end_) {
      return false;
    } else if (returnTokens_) {
      return true;
    } else {
      std::string::const_iterator i = currentPos_;

      //walk through the remaining string to check whether it contains
      //non-delimeter or not
      while(i != end_ && isDelimiter(*i)) {
        ++i;
      }

      return i != end_ ? true : false;
    }
  }

  std::string StringTokenizer::nextToken() {
    std::string result;
    
    if(currentPos_ != end_) {
      std::insert_iterator<std::string> insertIter(result, result.begin());

      while( currentPos_ != end_ && isDelimiter(*currentPos_)) {

        if (returnTokens_) {
          *insertIter++ = *currentPos_++;
          return result;
        }
            
        ++currentPos_;
      }

      while (currentPos_ != end_ && !isDelimiter(*currentPos_)) {
        *insertIter++ = *currentPos_++;
      }
        
    }
    
    return result;
  }

  void StringTokenizer::skipToken() {

    if(currentPos_ != end_) {
      while( currentPos_ != end_ && isDelimiter(*currentPos_)) {

        if (returnTokens_) {
          *currentPos_++;
          return;
        }
            
        ++currentPos_;
      }

      while (currentPos_ != end_ && !isDelimiter(*currentPos_)) {
        *currentPos_++;
      }
    }
  }

  bool StringTokenizer::nextTokenAsBool() {
    std::string token = nextToken();
    std::istringstream iss(token);
    bool result;
    
    if (iss >> result) {
      return result;
    } else {
      std::cerr << "unable to convert " << token << " to a bool" << std::endl;
      return false;
    }
  }
 
  //Since libstdc++(GCC 3.2) has an i/ostream::operator>>/<<(streambuf*) bug (Bug 9318)
  //Instead of using iostream facility, we use C library
  int StringTokenizer::nextTokenAsInt() {
    std::string token = nextToken();
   
    return atoi(token.c_str());
  }

  float StringTokenizer::nextTokenAsFloat() {
    std::string token = nextToken();
    convertFortranNumber(token);
    return (float) (atof(token.c_str()));
  }

  RealType StringTokenizer::nextTokenAsDouble() {
    std::string token = nextToken();
    convertFortranNumber(token);
    return atof(token.c_str());
  }

  std::string  StringTokenizer::peekNextToken() {
    std::string result;
    std::string::const_iterator tmpIter = currentPos_;
    
    if(tmpIter != end_) {
      std::insert_iterator<std::string> insertIter(result, result.begin());

      while(tmpIter != end_ && isDelimiter(*tmpIter)) {

        if (returnTokens_) {
          *insertIter++ = *tmpIter++;
          return result;
        }
            
        ++tmpIter;
      }

      while (tmpIter != end_ && !isDelimiter(*tmpIter)) {
        *insertIter++ = *tmpIter++;
      }
    }
    
    return result;    
  }

  std::vector<std::string>  StringTokenizer::getAllTokens() {
    std::vector<std::string> tokens;
    while (hasMoreTokens()) {
      tokens.push_back(nextToken());
    }
    return tokens;
  }
  void StringTokenizer::convertFortranNumber(std::string& fortranNumber) {
    std::string::iterator i;
    for(i = fortranNumber.begin(); i != fortranNumber.end(); ++i) {
      if (*i == 'd' || *i == 'D') {
        *i = 'E';
      }
    }
  }

  std::string  StringTokenizer::getRemainingString() {
    std::string result;
    std::string::const_iterator tmpIter = currentPos_;
    if(tmpIter != end_) {
      std::insert_iterator<std::string> insertIter(result, result.begin());
      
      while (tmpIter != end_) {
        *insertIter++ = *tmpIter++;
      }
    }
    
    return result;
  }

  
}//end namespace OpenMD

Revision:	2073
Committed:	Sat Mar 7 23:52:07 2015 UTC (10 years, 7 months ago) by gezelter
File size:	7252 byte(s)
Log Message:	added a skipToken function to StringTokenizer, and used this to remove some silly warnings on compilation.
#	Content
1	/*
2	* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3	*
4	* The University of Notre Dame grants you ("Licensee") a
5	* non-exclusive, royalty free, license to use, modify and
6	* redistribute this software in source and binary code form, provided
7	* that the following conditions are met:
8	*
9	* 1. Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	*
12	* 2. Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the
15	* distribution.
16	*
17	* This software is provided "AS IS," without a warranty of any
18	* kind. All express or implied conditions, representations and
19	* warranties, including any implied warranty of merchantability,
20	* fitness for a particular purpose or non-infringement, are hereby
21	* excluded. The University of Notre Dame and its licensors shall not
22	* be liable for any damages suffered by licensee as a result of
23	* using, modifying or distributing the software or its
24	* derivatives. In no event will the University of Notre Dame or its
25	* licensors be liable for any lost revenue, profit or data, or for
26	* direct, indirect, special, consequential, incidental or punitive
27	* damages, however caused and regardless of the theory of liability,
28	* arising out of the use of or inability to use software, even if the
29	* University of Notre Dame has been advised of the possibility of
30	* such damages.
31	*
32	* SUPPORT OPEN SCIENCE! If you use OpenMD or its source code in your
33	* research, please cite the appropriate papers when you publish your
34	* work. Good starting points are:
35	*
36	* [1] Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).
37	* [2] Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).
38	* [3] Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).
39	* [4] Kuang & Gezelter, J. Chem. Phys. 133, 164101 (2010).
40	* [5] Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
41	*/
42
43	#include <iostream>
44	#include <iterator>
45	#include <sstream>
46	#include "utils/StringTokenizer.hpp"
47
48	namespace OpenMD {
49
50
51	StringTokenizer::StringTokenizer(const std::string & str, const std::string & delim)
52	: tokenString_(str), delim_(delim), returnTokens_(false),
53	currentPos_(tokenString_.begin()), end_(tokenString_.end()){
54
55	}
56
57	StringTokenizer::StringTokenizer(std::string::const_iterator& first, std::string::const_iterator& last,
58	const std::string & delim)
59	: tokenString_(first, last) , delim_(delim), returnTokens_(false),
60	currentPos_(tokenString_.begin()), end_(tokenString_.end()) {
61
62	}
63
64	StringTokenizer::StringTokenizer(const std::string&str, const std::string&delim,
65	bool returnTokens)
66	: tokenString_(str), delim_(delim), returnTokens_(returnTokens),
67	currentPos_(tokenString_.begin()), end_(tokenString_.end()) {
68
69	}
70
71	bool StringTokenizer::isDelimiter(const char c) {
72	return delim_.find(c) == std::string::npos ? false : true;
73	}
74
75	int StringTokenizer::countTokens() {
76
77	std::string::const_iterator tmpIter = currentPos_;
78	int numToken = 0;
79
80	while (true) {
81
82	//skip delimiter first
83	while( tmpIter != end_ && isDelimiter(*tmpIter)) {
84	++tmpIter;
85
86	if (returnTokens_) {
87	//if delimiter is consider as token
88	++numToken;
89	}
90	}
91
92	if (tmpIter == end_) {
93	break;
94	}
95
96	//encount a token here
97	while ( tmpIter != end_ && !isDelimiter(*tmpIter) ) {
98	++tmpIter;
99	}
100
101	++numToken;
102
103	}
104
105	return numToken;
106	}
107
108	bool StringTokenizer::hasMoreTokens() {
109
110	if (currentPos_ == end_) {
111	return false;
112	} else if (returnTokens_) {
113	return true;
114	} else {
115	std::string::const_iterator i = currentPos_;
116
117	//walk through the remaining string to check whether it contains
118	//non-delimeter or not
119	while(i != end_ && isDelimiter(*i)) {
120	++i;
121	}
122
123	return i != end_ ? true : false;
124	}
125	}
126
127	std::string StringTokenizer::nextToken() {
128	std::string result;
129
130	if(currentPos_ != end_) {
131	std::insert_iterator<std::string> insertIter(result, result.begin());
132
133	while( currentPos_ != end_ && isDelimiter(*currentPos_)) {
134
135	if (returnTokens_) {
136	insertIter++ = currentPos_++;
137	return result;
138	}
139
140	++currentPos_;
141	}
142
143	while (currentPos_ != end_ && !isDelimiter(*currentPos_)) {
144	insertIter++ = currentPos_++;
145	}
146
147	}
148
149	return result;
150	}
151
152	void StringTokenizer::skipToken() {
153
154	if(currentPos_ != end_) {
155	while( currentPos_ != end_ && isDelimiter(*currentPos_)) {
156
157	if (returnTokens_) {
158	*currentPos_++;
159	return;
160	}
161
162	++currentPos_;
163	}
164
165	while (currentPos_ != end_ && !isDelimiter(*currentPos_)) {
166	*currentPos_++;
167	}
168	}
169	}
170
171	bool StringTokenizer::nextTokenAsBool() {
172	std::string token = nextToken();
173	std::istringstream iss(token);
174	bool result;
175
176	if (iss >> result) {
177	return result;
178	} else {
179	std::cerr << "unable to convert " << token << " to a bool" << std::endl;
180	return false;
181	}
182	}
183
184	//Since libstdc++(GCC 3.2) has an i/ostream::operator>>/<<(streambuf*) bug (Bug 9318)
185	//Instead of using iostream facility, we use C library
186	int StringTokenizer::nextTokenAsInt() {
187	std::string token = nextToken();
188
189	return atoi(token.c_str());
190	}
191
192	float StringTokenizer::nextTokenAsFloat() {
193	std::string token = nextToken();
194	convertFortranNumber(token);
195	return (float) (atof(token.c_str()));
196	}
197
198	RealType StringTokenizer::nextTokenAsDouble() {
199	std::string token = nextToken();
200	convertFortranNumber(token);
201	return atof(token.c_str());
202	}
203
204	std::string StringTokenizer::peekNextToken() {
205	std::string result;
206	std::string::const_iterator tmpIter = currentPos_;
207
208	if(tmpIter != end_) {
209	std::insert_iterator<std::string> insertIter(result, result.begin());
210
211	while(tmpIter != end_ && isDelimiter(*tmpIter)) {
212
213	if (returnTokens_) {
214	insertIter++ = tmpIter++;
215	return result;
216	}
217
218	++tmpIter;
219	}
220
221	while (tmpIter != end_ && !isDelimiter(*tmpIter)) {
222	insertIter++ = tmpIter++;
223	}
224	}
225
226	return result;
227	}
228
229	std::vector<std::string> StringTokenizer::getAllTokens() {
230	std::vector<std::string> tokens;
231	while (hasMoreTokens()) {
232	tokens.push_back(nextToken());
233	}
234	return tokens;
235	}
236	void StringTokenizer::convertFortranNumber(std::string& fortranNumber) {
237	std::string::iterator i;
238	for(i = fortranNumber.begin(); i != fortranNumber.end(); ++i) {
239	if (i == 'd' \|\| i == 'D') {
240	*i = 'E';
241	}
242	}
243	}
244
245	std::string StringTokenizer::getRemainingString() {
246	std::string result;
247	std::string::const_iterator tmpIter = currentPos_;
248	if(tmpIter != end_) {
249	std::insert_iterator<std::string> insertIter(result, result.begin());
250
251	while (tmpIter != end_) {
252	insertIter++ = tmpIter++;
253	}
254	}
255
256	return result;
257	}
258
259
260	}//end namespace OpenMD
261