Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
string_util.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file utility/string_util.hh
11 ///
12 /// @brief Some std::string helper functions.
13 /// @author Sergey Lyskov
14 #ifndef INCLUDED_utility_string_util_hh
15 #define INCLUDED_utility_string_util_hh
16 
17 // Utility headers
18 #include <utility/vector1.hh>
19 #include <utility/exit.hh>
20 
21 // ObjexxFCL headers
23 
24 #include <utility/stream_util.hh>
25 
26 // Boost headers
27 #include <boost/algorithm/string/erase.hpp>
28 
29 // C++ headers
30 #include <list>
31 #include <set>
32 #include <sstream>
33 #include <string>
34 #include <vector>
35 #include <typeinfo>
36 
37 namespace utility {
38 
39 //These are useful string utilities from the ObjexxFCL namespace - transclude them here so you have a one-stop shop for string functions.
40 
47 
48 
49 /// @brief Reads the contents of <filename> into <contents>, preserving newline
50 /// characters. Aborts if an error is encoutered.
51 void ReadFromFileOrDie(const std::string& filename, std::string* contents);
52 
53 /// @brief split given std::string using ' ' symbol.
54 utility::vector1< std::string > split(const std::string &s);
55 
56 /// @brief split given std::string using whitespace as a separator.
57 /// Unlike string_split_multi_delim(), any group of mixed whitespace counts only as a single seperator.
59 
60 /// @brief combine strings with anything
61 std::string join(utility::vector1<std::string> const & s, std::string const & connector);
62 
63 /// @brief combine vector with anything
64 template<class T>
65 std::string join(utility::vector1<T> const & vector, std::string const & connector)
66 {
67  std::ostringstream os;
68  typename utility::vector1<T>::const_iterator begin= vector.begin();
69  os << *begin++;
70  for ( ; begin != vector.end(); ++begin ) {
71  os<< connector<< *begin;
72  }
73  return os.str();
74 }
75 
76 /// @brief combine strings with anything
77 std::string join(std::vector<std::string> const & s, std::string const & connector);
78 
79 /// @brief Join vector of strings in to single string
80 template< platform::SSize L>
81 std::string join(vectorL<L, std::string> const & s, std::string const & connector){
82  std::ostringstream os;
84  os << *begin++;
85  for ( ; begin != s.end(); ++begin ) {
86  os<< connector<< *begin;
87  }
88  return os.str();
89 }
90 
91 
92 /// @brief replace space separations in a string with a connector such as '_'
93 std::string replace_spaces(std::string const & string_w_spaces, std::string const & replacement);
94 
95 /// @brief split given std::string using ' ' symbol.
96 std::list< std::string > split_to_list(const std::string &s);
97 
98 /// @brief split given std::string to a set using ' ' symbol.
99 std::set< std::string > split_to_set(std::string const & s);
100 
101 /// @details split to vector1< std::string > using arbitrary split character
103 string_split( std::string const & in, char splitchar = ' ' );
104 
105 /// @brief split to vector1< std::string > using arbitrary split character, but no empty strings (closer to python string::split)
107 string_split_simple( std::string const & in, char splitchar = ' ' );
108 
109 /// @details split to vector< std::string > using any of arbitrary split characters
111 string_split_multi_delim( std::string const & in, std::string splitchars = " \t" );
112 
113 /// @brief convert a string to a float, returns -1 on failure
114 float string2float( std::string st );
115 
116 /// @brief convert a string to an int, returns -1 on failure
117 int string2int( std::string st );
118 
119 /// @brief convert a string to a Size, returns numeric::get_undefined_size() on failure
120 platform::Size string2Size( std::string st );
121 
122 /// @brief convert a string to a Real, returns numeric::get_undefined_real() on failure
123 platform::Real string2Real( std::string st );
124 
125 // @brief Reads an unsigned int from string <x>, writing the result
126 // to output parameter <y>, which must be non-NULL. If the read was not
127 // successful, this function call has no effect on the value of <y> that
128 // was present prior to invokation.
129 void string2uint(const std::string& x, unsigned int *y);
130 
131 /// @brief True iff haystack starts with needle
132 bool startswith(std::string const & haystack, std::string const & needle);
133 
134 /// @brief True iff haystack ends with needle
135 bool endswith(std::string const & haystack, std::string const & needle);
136 
137 void slurp(std::istream & in, std::string & out);
138 
139 void trim( std::string & s, const std::string & drop = " " );
140 
141 /// @brief create a new string that drops all the unwanted substrings of
142 /// the original string.
143 std::string
144 trim( std::string const & s, std::string const & drop = " " );
145 
146 /// @brief compares two strings ignoring leading and trailing spaces
147 bool trimmed_compare( std::string const & s1, std::string const & s2 );
148 
149 /// @brief adds spaces to a left aligned string until a given length is reached
150 void add_spaces_left_align( std::string & st, std::size_t const newlen );
151 
152 /// @brief adds spaces to a right aligned string until a given length is reached
153 void add_spaces_right_align( std::string & st, std::size_t const newlen );
154 
155 // @brief return true of the string has only [0-9], ,'+','-','.' or '[Ee]'
156 bool is_string_numeric(std::string const & input);
157 
158 /// @brief Read the entire contents of a file into a string. All end-of-line characters are replaced
159 /// by "\n". Throws a utility::excn::EXCN_msg_exception if the file cannot be opened.
160 std::string file_contents( std::string const & file_name );
161 
162 std::string file_basename( std::string const & full_path );
163 
164 // "/foo/bar/baz" => "baz"
165 // "/foo/bar/baz.cc" => "baz.cc"
166 std::string filename(const std::string& path);
167 
168 // "/foo/bar/baz" => "/foo/bar/"
169 std::string pathname(const std::string& path);
170 
171 
172 /// @brief find all environment variables with the form ${VARIABLE}
173 /// and replace with the contents of that environment variable.
174 /// if the environment variable does not exist, return string::npos
175 std::string replace_environment_variables(std::string input);
176 
177 /// @brief Compares two strings, ignoring spaces. Useful for comparing atom
178 /// name strings which have pdb-alignment built into them. Slightly dangerous
179 /// if you consider the fact that atom names in the PDB are different for
180 /// different indentation rules: ' CA ' is c-alpha. 'CA ' is calcium.
181 inline
182 bool same_ignoring_spaces( std::string const & s1, std::string const & s2 ) {
183  std::string t1 = boost::algorithm::erase_all_copy(s1, " ");
184  std::string t2 = boost::algorithm::erase_all_copy(s2, " ");
185  return t1 == t2;
186 }
187 
188 //@brief compute the sha1 hash of a string and return it as a string in hexadecimal form
189 std::string string_to_sha1(std::string const & input_string);
190 
191 inline
192 void replace_in( std::string & s, const char from, const char *to )
193 {
194  // fix string
195  for ( unsigned int c = 0; c < s.length(); ++c ) {
196  if ( s[c] == from ) s.replace(c,1,to);
197  }
198 }
199 
200 /// @brief Generate new string from 'source' by replacing all occurrences of 'from' to 'to' string.
201 std::string replace_in( std::string const & source, std::string const & from, std::string const & to );
202 
203 
204 template <class T>
205 inline std::string to_string (const T & t)
206 {
207  std::ostringstream ss;
208  ss << t;
209  return ss.str();
210 }
211 
212 template <class T>
213 inline T const from_string (std::string const & s, T )
214 {
215  T t;
216  std::istringstream ss(s);
217  ss >> t;
218  if ( ss.fail() ) {
219  const char* type = typeid(T).name();
220  utility_exit_with_message("cannot convert string "+s+" to type "+type);
221  }
222 
223  return t;
224 }
225 
226 template <class T>
227 inline utility::vector1<T> const string_split (std::string const &in,char splitchar,T)
228 {
229  utility::vector1<T> parts;
230  if ( in.size()==0 ) {
231  return parts;
232  }
233 
234  size_t i(0), j(0);
235  while ( j != std::string::npos ) {
236  j = in.find( splitchar, i );
237  std::string item = in.substr(i,j-i);
238  T t;
239  std::istringstream ss(item);
240  ss >> t;
241  if ( ss.fail() ) {
242  const char* type = typeid(T).name();
243  utility_exit_with_message("cannot convert string '"+item+"' to type "+type);
244  }
245 
246  parts.push_back( t );
247  i = j+1;
248  }
249  return parts;
250 }
251 
252 /// @brief String accepted as a true value?
253 bool inline
254 is_true_string( std::string const & value_str )
255 {
256  return (
257  ( value_str == "true" ) ||
258  ( value_str == "True" ) ||
259  ( value_str == "TRUE" ) ||
260  ( value_str == "t" ) ||
261  ( value_str == "T" ) ||
262  ( value_str == "1" ) ||
263  ( value_str == "on" ) ||
264  ( value_str == "On" ) ||
265  ( value_str == "ON" ) ||
266  ( value_str == "y" ) ||
267  ( value_str == "Y" ) ||
268  ( value_str == "yes" ) ||
269  ( value_str == "Yes" ) ||
270  ( value_str == "YES" ) );
271 }
272 
273 /// @brief String accepted as a false value?
274 bool inline
275 is_false_string( std::string const & value_str )
276 {
277  return (
278  ( value_str == "false" ) ||
279  ( value_str == "False" ) ||
280  ( value_str == "FALSE" ) ||
281  ( value_str == "f" ) ||
282  ( value_str == "F" ) ||
283  ( value_str == "0" ) ||
284  ( value_str == "off" ) ||
285  ( value_str == "Off" ) ||
286  ( value_str == "OFF" ) ||
287  ( value_str == "n" ) ||
288  ( value_str == "N" ) ||
289  ( value_str == "no" ) ||
290  ( value_str == "No" ) ||
291  ( value_str == "NO" ) );
292 }
293 
294 /// @brief Compactifies vectors of ints: 1 2 3 9 10 11 to "1-3 9-11"
295 std::string
297  char const delimiter = ' ' );
298 
299 // Compactifies vectors of ints and chars (resnum and chain): 1A 2A 3A 9B 10B 11B to "A:1-3 B:9-11"
300 std::string
302  utility::vector1< char > chain_vector,
303  char const delimiter = ' ' );
304 
305 std::string
307  utility::vector1< std::string > segid_vector,
308  char const delimiter = ' ');
309 
310 std::string
311 make_tag( utility::vector1< int > res_vector );
312 
313 /// @brief converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains.
314 std::pair< std::vector< int >, std::vector< char > >
315 get_resnum_and_chain( std::string const & s, bool & string_is_ok );
316 
317 /// @brief converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains.
318 std::pair< std::vector< int >, std::vector< char > >
319 get_resnum_and_chain( std::string const & s );
320 
321 /// @brief helper function for get_resnum_and_chain
322 bool
323 get_resnum_and_chain_from_one_tag( std::string const & tag,
324  std::vector< int > & resnum,
325  std::vector< char > & chains );
326 
327 /// @brief converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains.
328 std::pair< std::vector< int >, std::vector< std::string > >
329 get_resnum_and_segid( std::string const & s, bool & string_is_ok );
330 
331 /// @brief helper function for get_resnum_and_chain
332 bool
333 get_resnum_and_segid_from_one_tag( std::string const & tag,
334  std::vector< int > & resnum,
335  std::vector< std::string > & chains );
336 
339 
340 
341 /// @brief Return a copy of the string with leading and trailing characters removed
342 std::string strip(std::string const & source, char c=' ');
343 
344 } // namespace utility
345 
346 #endif // INCLUDED_utility_string_util_HH
super::const_iterator const_iterator
Definition: vectorL.hh:74
#define utility_exit_with_message(m)
Exit with file + line + message.
Definition: exit.hh:47
def vector
Definition: Equations.py:5
char lowercased(char const c)
Lowercased Copy of a Character.
void slurp(std::istream &in, std::string &out)
Definition: string_util.cc:286
platform::Size string2Size(std::string st)
convert a string to a Size, returns numeric::get_undefined_size() on failure
Definition: string_util.cc:228
std::string stripped_whitespace(std::string const &s)
Whitespace Stripped from a string's Tails Copy of a string.
float string2float(std::string st)
convert a string to a float, returns -1 on failure
Definition: string_util.cc:206
void string2uint(const std::string &x, unsigned int *y)
Definition: string_util.cc:252
std::string & strip_whitespace(std::string &s)
Strip Whitespace from a string's Tails.
def x
void trim(std::string &s, const std::string &drop)
Definition: string_util.cc:296
bool same_ignoring_spaces(std::string const &s1, std::string const &s2)
Compares two strings, ignoring spaces. Useful for comparing atom name strings which have pdb-alignmen...
Definition: string_util.hh:182
utility::vector1< std::string > string_split_multi_delim(std::string const &in, std::string splitchars)
Definition: string_util.cc:191
bool endswith(std::string const &haystack, std::string const &needle)
True iff haystack ends with needle.
Definition: string_util.cc:280
std::pair< std::vector< int >, std::vector< std::string > > get_resnum_and_segid(std::string const &s, bool &string_is_ok)
converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains...
Definition: string_util.cc:593
std::vector with L-based indexing
Definition: vectorL.fwd.hh:42
std::string strip(std::string const &source, char c)
Return a copy of the string with leading and trailing characters removed.
Definition: string_util.cc:729
std::set< std::string > split_to_set(const std::string &s)
split given std::string to a set using ' ' symbol.
Definition: string_util.cc:138
char & lowercase(char &c)
Lowercase a Character.
utility::vector1< std::string > string_split(std::string const &in, char splitchar)
Definition: string_util.cc:158
utility::vector1< std::string > split_whitespace(const std::string &s)
split given std::string using whitespace as a separator. Unlike string_split_multi_delim(), any group of mixed whitespace counts only as a single seperator.
Definition: string_util.cc:79
bool get_resnum_and_segid_from_one_tag(std::string const &tag, std::vector< int > &resnum, std::vector< std::string > &segids)
helper function for get_resnum_and_chain
Definition: string_util.cc:675
std::string replace_spaces(std::string const &string_w_spaces, std::string const &replacement)
replace space separations in a string with a connector such as '_'
Definition: string_util.cc:111
utility::vector1< std::string > split(const std::string &s)
split given std::string using ' ' symbol.
Definition: string_util.cc:59
std::string file_basename(const std::string &full_path)
Definition: string_util.cc:369
platform::Real string2Real(std::string st)
convert a string to a Real, returns numeric::get_undefined_real() on failure
Definition: string_util.cc:239
std::string join(utility::vector1< std::string > const &s, std::string const &connector)
combine strings with anything
Definition: string_util.cc:91
T const from_string(std::string const &s, T)
Definition: string_util.hh:213
std::string string_to_sha1(std::string const &input_string)
Definition: string_util.cc:413
bool trimmed_compare(std::string const &s1, std::string const &s2)
compares two strings ignoring leading and trailing spaces
Definition: string_util.cc:259
std::list< std::string > split_to_list(const std::string &s)
split given std::string using ' ' symbol.
Definition: string_util.cc:118
member1 value
Definition: Tag.cc:296
Program exit functions and macros.
void add_spaces_left_align(std::string &st, std::size_t const newlen)
adds spaces to a left aligned string until a given length is reached
Definition: string_util.cc:311
std::string make_segtag_with_dashes(utility::vector1< int > res_vector, utility::vector1< std::string > segid_vector, char const delimiter)
Definition: string_util.cc:507
std::string make_tag_with_dashes(utility::vector1< int > res_vector, char const delimiter)
Compactifies vectors of ints: 1 2 3 9 10 11 to "1-3 9-11".
Definition: string_util.cc:449
bool is_false_string(std::string const &value_str)
String accepted as a false value?
Definition: string_util.hh:275
bool get_resnum_and_chain_from_one_tag(std::string const &tag, std::vector< int > &resnum, std::vector< char > &chains)
helper function for get_resnum_and_chain
Definition: string_util.cc:636
bool is_string_numeric(std::string const &input)
Definition: string_util.cc:331
std::string replace_in(std::string const &name_in, std::string const &find_string, std::string const &replace_string)
Generate new string from 'source' by replacing all occurrences of 'from' to 'to' string.
Definition: string_util.cc:710
std::string filename(const std::string &path)
Definition: string_util.cc:373
super::const_iterator const_iterator
Definition: vector1.hh:62
list resnum
if line_edit[13:14]=='P': #Nucleic acid? Skip.
void ReadFromFileOrDie(const std::string &filename, std::string *contents)
Reads the contents of into , preserving newline characters. Aborts if an error i...
Definition: string_util.cc:36
utility::vector1< std::string > string_split_simple(std::string const &in, char splitchar)
split to vector1< std::string > using arbitrary split character, but no empty strings (closer to pyth...
Definition: string_util.cc:173
void add_spaces_right_align(std::string &st, std::size_t const newlen)
adds spaces to a right aligned string until a given length is reached
Definition: string_util.cc:321
int string2int(std::string st)
convert a string to an int, returns -1 on failure
Definition: string_util.cc:217
std::string pathname(const std::string &path)
Definition: string_util.cc:378
std::string replace_environment_variables(std::string input)
find all environment variables with the form ${VARIABLE} and replace with the contents of that enviro...
Definition: string_util.cc:382
vector1: std::vector with 1-based indexing
bool startswith(std::string const &haystack, std::string const &needle)
True iff haystack starts with needle.
Definition: string_util.cc:274
platform::Size get_num_digits(platform::Size value)
Definition: string_util.cc:705
utility::keys::lookup::begin< KeyType > const begin
std::string file_contents(std::string const &file_name)
Read the entire contents of a file into a string. All end-of-line characters are replaced by "\n"...
Definition: string_util.cc:346
rule< Scanner, string_closure::context_t > name
Definition: Tag.cc:376
std::string make_tag(utility::vector1< int > res_vector)
Definition: string_util.cc:552
bool is_true_string(std::string const &value_str)
String accepted as a true value?
Definition: string_util.hh:254
std::string to_string(const T &t)
Definition: string_util.hh:205
std::pair< std::vector< int >, std::vector< char > > get_resnum_and_chain(std::string const &s, bool &string_is_ok)
converts string like "1-3 20-22" or "A:1-5 B:20-22" to vectors containing resnums and chains...
Definition: string_util.cc:571
rule< Scanner, tag_closure::context_t > tag
Definition: Tag.cc:373
std::size_t Size
Definition: types.hh:37
Implemention of ostream operator << for various common types.
char uppercased(char const c)
Uppercased Copy of a Character.
char & uppercase(char &c)
Uppercase a Character.
def y
std::set< char > chains