Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
string_util.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file utility/string_util.hh
11 ///
12 /// @brief Some std::string helper functions.
13 /// @author Sergey Lyskov
14 /// @uathor Jared Adolf-Bryfogle (jadolfbr@gmail.com)
15 
16 #ifndef INCLUDED_utility_string_util_hh
17 #define INCLUDED_utility_string_util_hh
18 
19 // Utility headers
20 #include <utility/vector1.hh>
21 #include <utility/exit.hh>
22 
23 // ObjexxFCL headers
25 
26 #include <utility/stream_util.hh>
27 
28 // Boost headers
29 #include <boost/algorithm/string/erase.hpp>
30 
31 // C++ headers
32 #include <list>
33 #include <set>
34 #include <sstream>
35 #include <string>
36 #include <vector>
37 #include <typeinfo>
38 
39 namespace utility {
40 
41 //These are useful string utilities from the ObjexxFCL namespace - transclude them here so you have a one-stop shop for string functions.
42 
49 
50 template <class T>
51 inline std::string
52 to_string (const T & t)
53 {
54  std::ostringstream ss;
55  ss << t;
56  return ss.str();
57 }
58 
59 template <class T>
60 inline T const
61 from_string (std::string const & s, T )
62 {
63  T t;
64  std::istringstream ss(s);
65  ss >> t;
66  if ( ss.fail() ) {
67  const char* type = typeid(T).name();
68  utility_exit_with_message("cannot convert string "+s+" to type "+type);
69  }
70 
71  return t;
72 }
73 
74 template <class T>
75 inline utility::vector1<T> const
76 string_split (std::string const &in,char splitchar,T)
77 {
78  utility::vector1<T> parts;
79  if ( in.size()==0 ) {
80  return parts;
81  }
82 
83  size_t i(0), j(0);
84  while ( j != std::string::npos ) {
85  j = in.find( splitchar, i );
86  std::string item = in.substr(i,j-i);
87  T t;
88  std::istringstream ss(item);
89  ss >> t;
90  if ( ss.fail() ) {
91  const char* type = typeid(T).name();
92  utility_exit_with_message("cannot convert string '"+item+"' to type "+type);
93  }
94 
95  parts.push_back( t );
96  i = j+1;
97  }
98  return parts;
99 }
100 
101 
102 /// @brief split given std::string using ' ' symbol.
104 split(const std::string &s);
105 
106 /// @brief split given std::string using whitespace as a separator.
107 /// Unlike string_split_multi_delim(), any group of mixed whitespace counts only as a single seperator.
109 split_whitespace(const std::string &s);
110 
111 /// @details Split string by new line symbols, return vector of string.
112 std::vector< std::string > split_by_newlines( std::string const & s );
113 
114 /// @brief combine strings with anything
115 std::string join(utility::vector1<std::string> const & s, std::string const & connector);
116 
117 /// @brief combine vector with anything
118 template<class T>
119 std::string join(utility::vector1<T> const & vector, std::string const & connector)
120 {
121  std::ostringstream os;
122  typename utility::vector1<T>::const_iterator begin= vector.begin();
123  os << *begin++;
124  for ( ; begin != vector.end(); ++begin ) {
125  os<< connector<< *begin;
126  }
127  return os.str();
128 }
129 
130 /// @brief combine strings with anything
131 std::string join(std::vector<std::string> const & s, std::string const & connector);
132 
133 /// @brief Join vector of strings in to single string
134 template< platform::SSize L>
135 std::string join(vectorL<L, std::string> const & s, std::string const & connector){
136  std::ostringstream os;
138  os << *begin++;
139  for ( ; begin != s.end(); ++begin ) {
140  os<< connector<< *begin;
141  }
142  return os.str();
143 }
144 
145 
146 /// @brief replace space separations in a string with a connector such as '_'
147 std::string
148 replace_spaces(std::string const & string_w_spaces, std::string const & replacement);
149 
150 /// @brief split given std::string using ' ' symbol.
151 std::list< std::string >
152 split_to_list(const std::string &s);
153 
154 /// @brief split given std::string to a set using ' ' symbol.
155 std::set< std::string >
156 split_to_set(std::string const & s);
157 
158 /// @details split to vector1< std::string > using arbitrary split character
160 string_split( std::string const & in, char splitchar = ' ' );
161 
162 /// @brief split to vector1< std::string > using arbitrary split character, but no empty strings (closer to python string::split)
164 string_split_simple( std::string const & in, char splitchar = ' ' );
165 
166 /// @details split to vector< std::string > using any of arbitrary split characters
168 string_split_multi_delim( std::string const & in, std::string splitchars = " \t" );
169 
170 /// @brief convert a string to a float, returns -1 on failure
171 float
172 string2float( std::string st );
173 
174 /// @brief convert a string to an int, returns -1 on failure
175 int
176 string2int( std::string st );
177 
178 /// @brief convert a string to a Size, returns numeric::get_undefined_size() on failure
180 string2Size( std::string st );
181 
182 /// @brief convert a string to a Real, returns numeric::get_undefined_real() on failure
184 string2Real( std::string st );
185 
186 /// @brief convert a Real to string at a number of decimal places, optionally pad left.
187 std::string
188 Real2string( platform::Real, std::size_t const decimal_places);
189 
190 /// @breif convert a Real to a string, padding left with spaces until total number of char on left is equal to pad_lef_n
191 std::string
192 fmt_real( platform::Real, platform::Size const pad_left_newlen, std::size_t const decimal_places);
193 
194 // @brief Reads an unsigned int from string <x>, writing the result
195 // to output parameter <y>, which must be non-NULL. If the read was not
196 // successful, this function call has no effect on the value of <y> that
197 // was present prior to invokation.
198 void
199 string2uint(const std::string& x, unsigned int *y);
200 
201 /// @brief True iff haystack starts with needle
202 bool
203 startswith(std::string const & haystack, std::string const & needle);
204 
205 /// @brief True iff haystack ends with needle
206 bool
207 endswith(std::string const & haystack, std::string const & needle);
208 
209 void
210 slurp(std::istream & in, std::string & out);
211 
212 /// @brief Remove any charachters in "drop" from the front and back of the string.
213 /// Use strip() for the value-return version
214 void trim( std::string & s, const std::string & drop = " " );
215 
216 /// @brief Return a copy of the string with leading and trailing characters removed
217 std::string strip(std::string const & source, char c=' ');
218 
219 /// @brief Return a copy of the string with leading and trailing characters removed
220 /// Any charachters in drop will be removed
221 /// For the in place version, see trim()
222 std::string strip(std::string const & source, std::string const & drop);
223 
224 /// @brief Ambiguious with the trim( std::string & s ) -- Deprecated:
225 /// use strip() instead for return-value trimming
226 inline
227 std::string
228 trim( std::string const & s, std::string const & drop = " " ) {
229  return strip( s, drop );
230 }
231 
232 /// @brief compares two strings ignoring leading and trailing spaces
233 bool
234 trimmed_compare( std::string const & s1, std::string const & s2 );
235 
236 
237 ///@brief Add char to the left of the string
238 std::string
239 pad_left( std::string s, platform::Size const newlen, char pad_with=' ');
240 
241 /// @brief Add char to the right of a string
242 std::string
243 pad_right( std::string s, platform::Size const newlen, char pad_with=' ');
244 
245 ///@brief Add char to the left of the string
246 template <class T>
247 std::string
248 pad_left( const T & t, platform::Size const newlen, char pad_width= ' '){
249  std::string s = to_string( t );
250  return pad_left( s, newlen, pad_width );
251 }
252 
253 /// @brief Add char to the right of a string
254 template <class T>
255 std::string
256 pad_right( const T & t, platform::Size const newlen, char pad_width= ' '){
257  std::string s = to_string( t );
258  return pad_right( s, newlen, pad_width);
259 }
260 
261 
262 // @brief return true of the string has only [0-9], ,'+','-','.' or '[Ee]'
263 bool is_string_numeric(std::string const & input);
264 
265 /// @brief Read the entire contents of a file into a string. All end-of-line characters are replaced
266 /// by "\n". Throws a utility::excn::EXCN_msg_exception if the file cannot be opened.
267 std::string
268 file_contents( std::string const & file_name );
269 
270 std::string
271 file_basename( std::string const & full_path );
272 
273 // "/foo/bar/baz" => "baz"
274 // "/foo/bar/baz.cc" => "baz.cc"
275 std::string
276 filename(const std::string& path);
277 
278 // "/foo/bar/baz" => "/foo/bar/"
279 std::string
280 pathname(const std::string& path);
281 
282 
283 /// @brief find all environment variables with the form ${VARIABLE}
284 /// and replace with the contents of that environment variable.
285 /// if the environment variable does not exist, return string::npos
286 std::string
288 
289 /// @brief Compares two strings, ignoring spaces. Useful for comparing atom
290 /// name strings which have pdb-alignment built into them. Slightly dangerous
291 /// if you consider the fact that atom names in the PDB are different for
292 /// different indentation rules: ' CA ' is c-alpha. 'CA ' is calcium.
293 inline
294 bool
295 same_ignoring_spaces( std::string const & s1, std::string const & s2 ) {
296  std::string t1 = boost::algorithm::erase_all_copy(s1, " ");
297  std::string t2 = boost::algorithm::erase_all_copy(s2, " ");
298  return t1 == t2;
299 }
300 
301 //@brief compute the sha1 hash of a string and return it as a string in hexadecimal form
302 std::string
303 string_to_sha1(std::string const & input_string);
304 
305 inline
306 void
307 replace_in( std::string & s, const char from, const char *to )
308 {
309  // fix string
310  for ( unsigned int c = 0; c < s.length(); ++c ) {
311  if ( s[c] == from ) s.replace(c,1,to);
312  }
313 }
314 
315 /// @brief Generate new string from 'source' by replacing all occurrences of 'from' to 'to' string.
316 std::string
317 replace_in( std::string const & source, std::string const & from, std::string const & to );
318 
319 
320 
321 
322 /// @brief String accepted as a true value?
323 bool inline
324 is_true_string( std::string const & value_str )
325 {
326  return (
327  ( value_str == "true" ) ||
328  ( value_str == "True" ) ||
329  ( value_str == "TRUE" ) ||
330  ( value_str == "t" ) ||
331  ( value_str == "T" ) ||
332  ( value_str == "1" ) ||
333  ( value_str == "on" ) ||
334  ( value_str == "On" ) ||
335  ( value_str == "ON" ) ||
336  ( value_str == "y" ) ||
337  ( value_str == "Y" ) ||
338  ( value_str == "yes" ) ||
339  ( value_str == "Yes" ) ||
340  ( value_str == "YES" ) );
341 }
342 
343 /// @brief String accepted as a false value?
344 bool inline
345 is_false_string( std::string const & value_str )
346 {
347  return (
348  ( value_str == "false" ) ||
349  ( value_str == "False" ) ||
350  ( value_str == "FALSE" ) ||
351  ( value_str == "f" ) ||
352  ( value_str == "F" ) ||
353  ( value_str == "0" ) ||
354  ( value_str == "off" ) ||
355  ( value_str == "Off" ) ||
356  ( value_str == "OFF" ) ||
357  ( value_str == "n" ) ||
358  ( value_str == "N" ) ||
359  ( value_str == "no" ) ||
360  ( value_str == "No" ) ||
361  ( value_str == "NO" ) );
362 }
363 
364 /// @brief Compactifies vectors of ints: 1 2 3 9 10 11 to "1-3 9-11"
365 std::string
367  char const delimiter = ' ' );
368 
369 // Compactifies vectors of ints and chars (resnum and chain): 1A 2A 3A 9B 10B 11B to "A:1-3 B:9-11"
370 std::string
372  utility::vector1< char > chain_vector,
373  char const delimiter = ' ' );
374 
375 std::string
377  utility::vector1< std::string > segid_vector,
378  char const delimiter = ' ');
379 
380 std::string
381 make_tag( utility::vector1< int > res_vector );
382 
383 /// @brief converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains.
384 std::pair< std::vector< int >, std::vector< char > >
385 get_resnum_and_chain( std::string const & s, bool & string_is_ok );
386 
387 /// @brief converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains.
388 std::pair< std::vector< int >, std::vector< char > >
389 get_resnum_and_chain( std::string const & s );
390 
391 /// @brief helper function for get_resnum_and_chain
392 bool
393 get_resnum_and_chain_from_one_tag( std::string const & tag,
394  std::vector< int > & resnum,
395  std::vector< char > & chains );
396 
397 /// @brief converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains.
398 std::pair< std::vector< int >, std::vector< std::string > >
399 get_resnum_and_segid( std::string const & s, bool & string_is_ok );
400 
401 /// @brief helper function for get_resnum_and_chain
402 bool
403 get_resnum_and_segid_from_one_tag( std::string const & tag,
404  std::vector< int > & resnum,
405  std::vector< std::string > & chains );
406 
409 
410 } // namespace utility
411 
412 #endif // INCLUDED_utility_string_util_HH
super::const_iterator const_iterator
Definition: vectorL.hh:74
void trim(std::string &s, const std::string &drop)
Remove any charachters in "drop" from the front and back of the string. Use strip() for the value-ret...
Definition: string_util.cc:315
#define utility_exit_with_message(m)
Exit with file + line + message.
Definition: exit.hh:47
def vector
Definition: Equations.py:5
char lowercased(char const c)
Lowercased Copy of a Character.
void slurp(std::istream &in, std::string &out)
Definition: string_util.cc:305
platform::Size string2Size(std::string st)
convert a string to a Size, returns numeric::get_undefined_size() on failure
Definition: string_util.cc:230
std::string stripped_whitespace(std::string const &s)
Whitespace Stripped from a string's Tails Copy of a string.
float string2float(std::string st)
convert a string to a float, returns -1 on failure
Definition: string_util.cc:208
void string2uint(const std::string &x, unsigned int *y)
Definition: string_util.cc:271
std::string & strip_whitespace(std::string &s)
Strip Whitespace from a string's Tails.
def x
bool same_ignoring_spaces(std::string const &s1, std::string const &s2)
Compares two strings, ignoring spaces. Useful for comparing atom name strings which have pdb-alignmen...
Definition: string_util.hh:295
utility::vector1< std::string > string_split_multi_delim(std::string const &in, std::string splitchars)
Definition: string_util.cc:193
bool endswith(std::string const &haystack, std::string const &needle)
True iff haystack ends with needle.
Definition: string_util.cc:299
std::pair< std::vector< int >, std::vector< std::string > > get_resnum_and_segid(std::string const &s, bool &string_is_ok)
converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains...
Definition: string_util.cc:652
std::vector with L-based indexing
Definition: vectorL.fwd.hh:42
std::string Real2string(platform::Real num, std::size_t const decimal_places)
convert a Real to string at a number of decimal places, optionally pad left.
Definition: string_util.cc:251
std::set< std::string > split_to_set(const std::string &s)
split given std::string to a set using ' ' symbol.
Definition: string_util.cc:140
char & lowercase(char &c)
Lowercase a Character.
utility::vector1< std::string > string_split(std::string const &in, char splitchar)
Definition: string_util.cc:160
utility::vector1< std::string > split_whitespace(const std::string &s)
split given std::string using whitespace as a separator. Unlike string_split_multi_delim(), any group of mixed whitespace counts only as a single seperator.
Definition: string_util.cc:58
bool get_resnum_and_segid_from_one_tag(std::string const &tag, std::vector< int > &resnum, std::vector< std::string > &segids)
helper function for get_resnum_and_chain
Definition: string_util.cc:734
std::string replace_spaces(std::string const &string_w_spaces, std::string const &replacement)
replace space separations in a string with a connector such as '_'
Definition: string_util.cc:113
utility::vector1< std::string > split(const std::string &s)
split given std::string using ' ' symbol.
Definition: string_util.cc:38
std::string file_basename(const std::string &full_path)
Definition: string_util.cc:428
platform::Real string2Real(std::string st)
convert a string to a Real, returns numeric::get_undefined_real() on failure
Definition: string_util.cc:241
std::string join(utility::vector1< std::string > const &s, std::string const &connector)
combine strings with anything
Definition: string_util.cc:93
T const from_string(std::string const &s, T)
Definition: string_util.hh:61
std::string string_to_sha1(std::string const &input_string)
Definition: string_util.cc:472
bool trimmed_compare(std::string const &s1, std::string const &s2)
compares two strings ignoring leading and trailing spaces
Definition: string_util.cc:278
std::list< std::string > split_to_list(const std::string &s)
split given std::string using ' ' symbol.
Definition: string_util.cc:120
member1 value
Definition: Tag.cc:296
std::string pad_left(std::string s, platform::Size const newlen, char pad_with)
Add char to the left of the string.
Definition: string_util.cc:331
Program exit functions and macros.
std::vector< std::string > split_by_newlines(std::string const &s)
Definition: string_util.cc:71
std::string make_segtag_with_dashes(utility::vector1< int > res_vector, utility::vector1< std::string > segid_vector, char const delimiter)
Definition: string_util.cc:566
std::string pad_right(std::string s, platform::Size const newlen, char pad_with)
Add char to the right of a string.
Definition: string_util.cc:340
std::string make_tag_with_dashes(utility::vector1< int > res_vector, char const delimiter)
Compactifies vectors of ints: 1 2 3 9 10 11 to "1-3 9-11".
Definition: string_util.cc:508
bool is_false_string(std::string const &value_str)
String accepted as a false value?
Definition: string_util.hh:345
bool get_resnum_and_chain_from_one_tag(std::string const &tag, std::vector< int > &resnum, std::vector< char > &chains)
helper function for get_resnum_and_chain
Definition: string_util.cc:695
std::string fmt_real(platform::Real num, platform::Size pad_left_n, std::size_t const decimal_places)
convert a Real to a string, padding left with spaces until total number of char on left is equal to p...
Definition: string_util.cc:259
bool is_string_numeric(std::string const &input)
Definition: string_util.cc:390
std::string replace_in(std::string const &name_in, std::string const &find_string, std::string const &replace_string)
Generate new string from 'source' by replacing all occurrences of 'from' to 'to' string.
Definition: string_util.cc:769
std::string filename(const std::string &path)
Definition: string_util.cc:432
super::const_iterator const_iterator
Definition: vector1.hh:62
list resnum
if line_edit[13:14]=='P': #Nucleic acid? Skip.
utility::vector1< std::string > string_split_simple(std::string const &in, char splitchar)
split to vector1< std::string > using arbitrary split character, but no empty strings (closer to pyth...
Definition: string_util.cc:175
int string2int(std::string st)
convert a string to an int, returns -1 on failure
Definition: string_util.cc:219
std::string pathname(const std::string &path)
Definition: string_util.cc:437
std::string replace_environment_variables(std::string input)
find all environment variables with the form ${VARIABLE} and replace with the contents of that enviro...
Definition: string_util.cc:441
vector1: std::vector with 1-based indexing
bool startswith(std::string const &haystack, std::string const &needle)
True iff haystack starts with needle.
Definition: string_util.cc:293
platform::Size get_num_digits(platform::Size value)
Definition: string_util.cc:764
utility::keys::lookup::begin< KeyType > const begin
std::string file_contents(std::string const &file_name)
Read the entire contents of a file into a string. All end-of-line characters are replaced by "\n"...
Definition: string_util.cc:405
rule< Scanner, string_closure::context_t > name
Definition: Tag.cc:376
std::string make_tag(utility::vector1< int > res_vector)
Definition: string_util.cc:611
bool is_true_string(std::string const &value_str)
String accepted as a true value?
Definition: string_util.hh:324
std::string to_string(const T &t)
Definition: string_util.hh:52
std::pair< std::vector< int >, std::vector< char > > get_resnum_and_chain(std::string const &s, bool &string_is_ok)
converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains...
Definition: string_util.cc:630
rule< Scanner, tag_closure::context_t > tag
Definition: Tag.cc:373
std::size_t Size
Definition: types.hh:37
Implemention of ostream operator << for various common types.
char uppercased(char const c)
Uppercased Copy of a Character.
char & uppercase(char &c)
Uppercase a Character.
def y
std::set< char > chains
std::string strip(std::string const &s, std::string const &drop)
Return a copy of the string with leading and trailing characters removed Any charachters in drop will...
Definition: string_util.cc:323