| 1 |
/********************************************************************** |
| 2 |
tokenst.cpp - Tokenize a string. |
| 3 |
|
| 4 |
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
| 5 |
Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison |
| 6 |
|
| 7 |
This file is part of the Open Babel project. |
| 8 |
For more information, see <http://openbabel.sourceforge.net/> |
| 9 |
|
| 10 |
This program is free software; you can redistribute it and/or modify |
| 11 |
it under the terms of the GNU General Public License as published by |
| 12 |
the Free Software Foundation version 2 of the License. |
| 13 |
|
| 14 |
This program is distributed in the hope that it will be useful, |
| 15 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 |
GNU General Public License for more details. |
| 18 |
***********************************************************************/ |
| 19 |
|
| 20 |
#ifdef WIN32 |
| 21 |
#pragma warning (disable : 4786) |
| 22 |
#endif |
| 23 |
|
| 24 |
#include <algorithm> |
| 25 |
#include <vector> |
| 26 |
#include <string> |
| 27 |
|
| 28 |
#include "babelconfig.hpp" |
| 29 |
|
| 30 |
using namespace std; |
| 31 |
/* |
| 32 |
OBAPI bool tokenize(vector<string> &, const char *, const char *); |
| 33 |
OBAPI char *trim_spaces(char *string); |
| 34 |
OBAPI bool tokenize(vector<string> &vcr, string &s, const char *delimstr,int limit=-1); |
| 35 |
*/ |
| 36 |
namespace OpenBabel |
| 37 |
{ |
| 38 |
|
| 39 |
//! Break a string (supplied as the second argument) into tokens, returned |
| 40 |
//! in the first argument. Tokens are determined by the delimiters supplied |
| 41 |
//! (defaults to whitespace (i.e., spaces, tabs, newlines) |
| 42 |
OBAPI bool tokenize(vector<string> &vcr, const char *buf, const char *delimstr) |
| 43 |
{ |
| 44 |
vcr.clear(); |
| 45 |
string s = buf; |
| 46 |
s += "\n"; |
| 47 |
size_t startpos=0,endpos=0; |
| 48 |
|
| 49 |
for (;;) |
| 50 |
{ |
| 51 |
startpos = s.find_first_not_of(delimstr,startpos); |
| 52 |
endpos = s.find_first_of(delimstr,startpos); |
| 53 |
|
| 54 |
if (endpos <= s.size() && startpos <= s.size()) |
| 55 |
vcr.push_back(s.substr(startpos,endpos-startpos)); |
| 56 |
else |
| 57 |
break; |
| 58 |
|
| 59 |
startpos = endpos+1; |
| 60 |
} |
| 61 |
|
| 62 |
return(true); |
| 63 |
} |
| 64 |
|
| 65 |
//! Trim any trailing spaces at the end of the supplied string. |
| 66 |
OBAPI char *trim_spaces(char *string) |
| 67 |
{ |
| 68 |
int length; |
| 69 |
|
| 70 |
length = strlen(string); |
| 71 |
if (length == 0) |
| 72 |
return string; |
| 73 |
|
| 74 |
while ((length > 0) && (string[0] == ' ')) |
| 75 |
{ |
| 76 |
string++; |
| 77 |
--length; |
| 78 |
} |
| 79 |
|
| 80 |
if (length > 0) |
| 81 |
{ |
| 82 |
while ((length > 0) && (string[length-1] == ' ')) |
| 83 |
{ |
| 84 |
string[length-1] = '\0'; |
| 85 |
--length; |
| 86 |
} |
| 87 |
} |
| 88 |
|
| 89 |
return(string); |
| 90 |
} |
| 91 |
|
| 92 |
//! Break a string (supplied as the second argument) into tokens, returned |
| 93 |
//! in the first argument. Tokens are determined by the delimiters supplied |
| 94 |
//! (defaults to whitespace (i.e., spaces, tabs, newlines) |
| 95 |
//! Only breaks at most 'limit' tokens and the last item in the vector may |
| 96 |
//! include un-parsed tokens. |
| 97 |
OBAPI bool tokenize(vector<string> &vcr, string &s, const char *delimstr, int limit) |
| 98 |
{ |
| 99 |
vcr.clear(); |
| 100 |
size_t startpos=0,endpos=0; |
| 101 |
|
| 102 |
int matched=0; |
| 103 |
unsigned int s_size = s.size(); |
| 104 |
for (;;) |
| 105 |
{ |
| 106 |
startpos = s.find_first_not_of(delimstr,startpos); |
| 107 |
endpos = s.find_first_of(delimstr,startpos); |
| 108 |
if (endpos <= s_size && startpos <= s_size) |
| 109 |
{ |
| 110 |
vcr.push_back(s.substr(startpos,endpos-startpos)); |
| 111 |
|
| 112 |
matched++; |
| 113 |
if (matched == limit) |
| 114 |
{ |
| 115 |
startpos = endpos+1; |
| 116 |
vcr.push_back(s.substr(startpos,s_size)); |
| 117 |
break; |
| 118 |
} |
| 119 |
} |
| 120 |
else |
| 121 |
{ |
| 122 |
if (startpos < s_size) |
| 123 |
vcr.push_back(s.substr(startpos,s_size-startpos)); |
| 124 |
break; |
| 125 |
} |
| 126 |
|
| 127 |
startpos = endpos+1; |
| 128 |
} |
| 129 |
return(true); |
| 130 |
} |
| 131 |
|
| 132 |
OBAPI void Trim(string& txt) |
| 133 |
{ |
| 134 |
string::size_type pos = txt.find_last_not_of(" \t\n\r"); |
| 135 |
if(pos!=string::npos) |
| 136 |
txt.erase(pos+1); |
| 137 |
else |
| 138 |
txt.erase(); |
| 139 |
|
| 140 |
pos = txt.find_first_not_of(" \t\n\r"); |
| 141 |
if(pos!=string::npos) |
| 142 |
txt.erase(0, pos); |
| 143 |
else |
| 144 |
txt.erase(); |
| 145 |
} |
| 146 |
|
| 147 |
} // end namespace OpenBabel |
| 148 |
|
| 149 |
//! \file tokenst.cpp |
| 150 |
//! \brief Tokenize a string. |