src/utils/StringTokenizer.hpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Acknowledgement of the program authors must be made in any
 *    publication of scientific results based in part on use of the
 *    program.  An acceptable form of acknowledgement is citation of
 *    the article in which the program was described (Matthew
 *    A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
 *    J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
 *    Parallel Simulation Engine for Molecular Dynamics,"
 *    J. Comput. Chem. 26, pp. 252-271 (2005))
 *
 * 2. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 3. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 */
 
/**
 * @file StringTokenizer.hpp
 * @author tlin
 * @date 09/20/2004
 * @time 11:30am
 * @version 1.0
 */

#ifndef UTIL_STRINGTOKENIZER_HPP
#define UTIL_STRINGTOKENIZER_HPP

#include <string>
#include <stdlib.h>
#include <vector>
#include "config.h"
namespace oopse {

  /**
   * @class StringTokenizer.hpp "util/StringTokenizer.hpp"
   * @brief The string tokenizer class allows an application to break a string into tokens
   * The set of delimiters (the characters that separate tokens) may be specified either 
   * at creation time or on a per-token basis. 
   * An instance of StringTokenizer behaves in one of two ways, depending on whether it was 
   * created with the returnTokens flag having the value true or false.
   */
  class StringTokenizer {
  public:

    /**
     * Constructs a string tokenizer for the specified string. The characters in the delim argument
     * are the delimiters for separating tokens. characters are skipped and only serve as 
     * separators between tokens.
     * @param str a string to be parsed.
     * @param delim the delimiters, default value is " ;\t\n\r".
     * @note this is still a little bit java like implementation. Pure c++ one should use TokenIterator.
     * Boost's tokenizer class is one of them 
     */
    StringTokenizer(const std::string & str,
                    const std::string & delim = " ;\t\n\r");

    /**
     * Constructs a string tokenizer for an iterator range [first, last). The characters in the delim argument
     * are the delimiters for separating tokens. characters are skipped and only serve as 
     * separators between tokens.
     * @param first begin iterator
     * @param last end iterator
     * @param delim the delimiters, default value is " ;\t\n\r".
     * @note this is still a little bit java like implementation. Pure c++ one should use TokenIterator.
     * Boost's tokenizer class is one of them 
     */
    StringTokenizer(std::string::const_iterator& first, std::string::const_iterator& last,
                    const std::string & delim = " ;\t\n\r");

    /**
     * Constructs a string tokenizer for the specified string. The characters in the delim argument
     * are the delimiters for separating tokens. 
     * If the returnTokens flag is true, then the delimiter characters are also returned as tokens. 
     * Each delimiter is returned as a string of length one. If the flag is false, the delimiter 
     * characters are skipped and only serve as separators between tokens.
     * @param str a string to be parsed. 
     * @param delim the delimiters. 
     * @param returnTokens flag indicating whether to return the delimiters as tokens.
     */
    StringTokenizer(const std::string&str, const std::string&delim,
                    bool returnTokens);

    /**
     * Calculates the number of times that this tokenizer's nextToken method can be called 
     * before it generates an exception.
     * @return the number of tokens remaining in the string using the current delimiter set.
     */
    int countTokens();

    /**
     * Tests if there are more tokens available from this tokenizer's string.
     * @return true if there are more tokens available from this tokenizer's string, false otherwise
     */
    bool hasMoreTokens();

    /**
     * Returns the next token from this string tokenizer.
     * @return the next token from this string tokenizer.
     * @exception NoSuchElementException if there are no more tokens in this tokenizer's string
     */
    std::string nextToken();

    //actually, nextToken Can be template function
    //template <typename ReturnType>
    //ReturnType nextToken();
        
    /**
     * Returns the next token from this string tokenizer as a bool.
     * @return the next token from this string tokenizer  as a bool.
     */
    bool nextTokenAsBool();

    /**
     * Returns the next token from this string tokenizer as an integer.
     * @return the next token from this string tokenizer  as an integer.
     */
    int nextTokenAsInt();

    /**
     * Returns the next token from this string tokenizer as a float.
     * @return the next token from this string tokenizer as a float.
     */
    float nextTokenAsFloat();

    /**
     * Returns the next token from this string tokenizer as a RealType.
     * @return the next token from this string tokenizer as a RealType.
     */
    RealType nextTokenAsDouble();

    /**
     * Returns the next token without advancing the position of the StringTokenizer.
     * @return the next token
     */
    std::string  peekNextToken();

    /**
     * Returns the current delimiter set of this string tokenizer
     * @return the current delimiter set
     */
    const std::string& getDelimiters() {
      return delim_;
    }

    /** 
     * Returns the original string before tokenizing.
     * @return the original string before tokenizing 
     */
    const std::string& getOriginal() {
      return tokenString_;
    }

    /** 
     * Returns all of the tokens
     * @return all of the tokens
     */
    std::vector<std::string> getAllTokens();
  private:

    /**
     * Test if character is in current delimiter set.
     * @param c character to be tested
     * @return true if character is in current delimiter set, flase otherwise.
     */
    bool isDelimiter(const char c);

    /** convert a fortran number to a c/c++ number */
    void convertFortranNumber(std::string& fortranNumber);
         

    std::string tokenString_;

    std::string delim_;         /**< current delimiter set of this string tokenizer */

    bool returnTokens_; /**< flag indicating whether to return the delimiters as tokens */

    std::string::const_iterator currentPos_;
    std::string::const_iterator end_;
  };

}                               //namespace oopse

#endif                          //UTIL_STRINGTOKENIZER_HPP
Revision:	3437
Committed:	Wed Jul 30 18:11:19 2008 UTC (17 years, 3 months ago) by gezelter
File size:	7893 byte(s)
Log Message:	Many fixes
#	Content
1	/*
2	* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3	*
4	* The University of Notre Dame grants you ("Licensee") a
5	* non-exclusive, royalty free, license to use, modify and
6	* redistribute this software in source and binary code form, provided
7	* that the following conditions are met:
8	*
9	* 1. Acknowledgement of the program authors must be made in any
10	* publication of scientific results based in part on use of the
11	* program. An acceptable form of acknowledgement is citation of
12	* the article in which the program was described (Matthew
13	* A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14	* J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15	* Parallel Simulation Engine for Molecular Dynamics,"
16	* J. Comput. Chem. 26, pp. 252-271 (2005))
17	*
18	* 2. Redistributions of source code must retain the above copyright
19	* notice, this list of conditions and the following disclaimer.
20	*
21	* 3. Redistributions in binary form must reproduce the above copyright
22	* notice, this list of conditions and the following disclaimer in the
23	* documentation and/or other materials provided with the
24	* distribution.
25	*
26	* This software is provided "AS IS," without a warranty of any
27	* kind. All express or implied conditions, representations and
28	* warranties, including any implied warranty of merchantability,
29	* fitness for a particular purpose or non-infringement, are hereby
30	* excluded. The University of Notre Dame and its licensors shall not
31	* be liable for any damages suffered by licensee as a result of
32	* using, modifying or distributing the software or its
33	* derivatives. In no event will the University of Notre Dame or its
34	* licensors be liable for any lost revenue, profit or data, or for
35	* direct, indirect, special, consequential, incidental or punitive
36	* damages, however caused and regardless of the theory of liability,
37	* arising out of the use of or inability to use software, even if the
38	* University of Notre Dame has been advised of the possibility of
39	* such damages.
40	*/
41
42	/**
43	* @file StringTokenizer.hpp
44	* @author tlin
45	* @date 09/20/2004
46	* @time 11:30am
47	* @version 1.0
48	*/
49
50	#ifndef UTIL_STRINGTOKENIZER_HPP
51	#define UTIL_STRINGTOKENIZER_HPP
52
53	#include <string>
54	#include <stdlib.h>
55	#include <vector>
56	#include "config.h"
57	namespace oopse {
58
59	/**
60	* @class StringTokenizer.hpp "util/StringTokenizer.hpp"
61	* @brief The string tokenizer class allows an application to break a string into tokens
62	* The set of delimiters (the characters that separate tokens) may be specified either
63	* at creation time or on a per-token basis.
64	* An instance of StringTokenizer behaves in one of two ways, depending on whether it was
65	* created with the returnTokens flag having the value true or false.
66	*/
67	class StringTokenizer {
68	public:
69
70	/**
71	* Constructs a string tokenizer for the specified string. The characters in the delim argument
72	* are the delimiters for separating tokens. characters are skipped and only serve as
73	* separators between tokens.
74	* @param str a string to be parsed.
75	* @param delim the delimiters, default value is " ;\t\n\r".
76	* @note this is still a little bit java like implementation. Pure c++ one should use TokenIterator.
77	* Boost's tokenizer class is one of them
78	*/
79	StringTokenizer(const std::string & str,
80	const std::string & delim = " ;\t\n\r");
81
82	/**
83	* Constructs a string tokenizer for an iterator range [first, last). The characters in the delim argument
84	* are the delimiters for separating tokens. characters are skipped and only serve as
85	* separators between tokens.
86	* @param first begin iterator
87	* @param last end iterator
88	* @param delim the delimiters, default value is " ;\t\n\r".
89	* @note this is still a little bit java like implementation. Pure c++ one should use TokenIterator.
90	* Boost's tokenizer class is one of them
91	*/
92	StringTokenizer(std::string::const_iterator& first, std::string::const_iterator& last,
93	const std::string & delim = " ;\t\n\r");
94
95	/**
96	* Constructs a string tokenizer for the specified string. The characters in the delim argument
97	* are the delimiters for separating tokens.
98	* If the returnTokens flag is true, then the delimiter characters are also returned as tokens.
99	* Each delimiter is returned as a string of length one. If the flag is false, the delimiter
100	* characters are skipped and only serve as separators between tokens.
101	* @param str a string to be parsed.
102	* @param delim the delimiters.
103	* @param returnTokens flag indicating whether to return the delimiters as tokens.
104	*/
105	StringTokenizer(const std::string&str, const std::string&delim,
106	bool returnTokens);
107
108	/**
109	* Calculates the number of times that this tokenizer's nextToken method can be called
110	* before it generates an exception.
111	* @return the number of tokens remaining in the string using the current delimiter set.
112	*/
113	int countTokens();
114
115	/**
116	* Tests if there are more tokens available from this tokenizer's string.
117	* @return true if there are more tokens available from this tokenizer's string, false otherwise
118	*/
119	bool hasMoreTokens();
120
121	/**
122	* Returns the next token from this string tokenizer.
123	* @return the next token from this string tokenizer.
124	* @exception NoSuchElementException if there are no more tokens in this tokenizer's string
125	*/
126	std::string nextToken();
127
128	//actually, nextToken Can be template function
129	//template <typename ReturnType>
130	//ReturnType nextToken();
131
132	/**
133	* Returns the next token from this string tokenizer as a bool.
134	* @return the next token from this string tokenizer as a bool.
135	*/
136	bool nextTokenAsBool();
137
138	/**
139	* Returns the next token from this string tokenizer as an integer.
140	* @return the next token from this string tokenizer as an integer.
141	*/
142	int nextTokenAsInt();
143
144	/**
145	* Returns the next token from this string tokenizer as a float.
146	* @return the next token from this string tokenizer as a float.
147	*/
148	float nextTokenAsFloat();
149
150	/**
151	* Returns the next token from this string tokenizer as a RealType.
152	* @return the next token from this string tokenizer as a RealType.
153	*/
154	RealType nextTokenAsDouble();
155
156	/**
157	* Returns the next token without advancing the position of the StringTokenizer.
158	* @return the next token
159	*/
160	std::string peekNextToken();
161
162	/**
163	* Returns the current delimiter set of this string tokenizer
164	* @return the current delimiter set
165	*/
166	const std::string& getDelimiters() {
167	return delim_;
168	}
169
170	/**
171	* Returns the original string before tokenizing.
172	* @return the original string before tokenizing
173	*/
174	const std::string& getOriginal() {
175	return tokenString_;
176	}
177
178	/**
179	* Returns all of the tokens
180	* @return all of the tokens
181	*/
182	std::vector<std::string> getAllTokens();
183	private:
184
185	/**
186	* Test if character is in current delimiter set.
187	* @param c character to be tested
188	* @return true if character is in current delimiter set, flase otherwise.
189	*/
190	bool isDelimiter(const char c);
191
192	/** convert a fortran number to a c/c++ number */
193	void convertFortranNumber(std::string& fortranNumber);
194
195
196	std::string tokenString_;
197
198	std::string delim_; /*< current delimiter set of this string tokenizer /
199
200	bool returnTokens_; /*< flag indicating whether to return the delimiters as tokens /
201
202	std::string::const_iterator currentPos_;
203	std::string::const_iterator end_;
204	};
205
206	} //namespace oopse
207
208	#endif //UTIL_STRINGTOKENIZER_HPP