Gemmi C++ API
Loading...
Searching...
No Matches
util.hpp
Go to the documentation of this file.
1// Copyright 2017 Global Phasing Ltd.
2//
3// Utilities. Mostly for working with strings and vectors.
4
5#ifndef GEMMI_UTIL_HPP_
6#define GEMMI_UTIL_HPP_
7
8#include <cassert>
9#include <cctype> // for isspace
10#include <cstring> // for strncmp
11#include <algorithm> // for equal, find, remove_if
12#include <iterator> // for begin, end, make_move_iterator
13#include <string>
14#include <vector>
15
16namespace gemmi {
17
18// ##### string helpers #####
19
20inline void append_to_str(std::string& out, int v) { out += std::to_string(v); }
21inline void append_to_str(std::string& out, size_t v) { out += std::to_string(v); }
22template<typename T>
23void append_to_str(std::string& out, const T& v) { out += v; }
24
25inline void cat_to(std::string&) {}
26template <typename T, typename... Args>
27void cat_to(std::string& out, const T& value, Args const&... args) {
28 append_to_str(out, value);
29 cat_to(out, args...);
30}
31template <class... Args>
32std::string cat(Args const&... args) {
33 std::string out;
34 cat_to(out, args...);
35 return out;
36}
37
38inline bool starts_with(const std::string& str, const std::string& prefix) {
39 size_t sl = prefix.length();
40 return str.length() >= sl && str.compare(0, sl, prefix) == 0;
41}
42
43template<size_t N> bool starts_with(const char* a, const char (&b)[N]) {
44 return std::strncmp(a, b, N-1) == 0;
45}
46
47inline bool ends_with(const std::string& str, const std::string& suffix) {
48 size_t sl = suffix.length();
49 return str.length() >= sl && str.compare(str.length() - sl, sl, suffix) == 0;
50}
51
52// can be faster than std::tolower() b/c it takes char not int
53inline char lower(char c) {
54 if (c >= 'A' && c <= 'Z')
55 return c | 0x20;
56 return c;
57}
58
59// works as expected only for a-zA-Z
60inline char alpha_up(char c) { return c & ~0x20; }
61
62inline std::string to_lower(std::string str) {
63 for (char& c : str)
64 if (c >= 'A' && c <= 'Z')
65 c |= 0x20;
66 return str;
67}
68
69inline std::string to_upper(std::string str) {
70 for (char& c : str)
71 if (c >= 'a' && c <= 'z')
72 c &= ~0x20;
73 return str;
74}
75
76// case-insensitive character comparison
77inline bool isame(char a, char b) {
78 return a == b || ((a^b) == 0x20 && (a|0x20) >= 'a' && (a|0x20) <= 'z');
79}
80
81// Case-insensitive comparisons. The second arg must be lowercase.
82
83inline bool iequal_from(const std::string& str, size_t offset, const std::string& low) {
84 return str.length() == low.length() + offset &&
85 std::equal(std::begin(low), std::end(low), str.begin() + offset,
86 [](char c1, char c2) { return c1 == lower(c2); });
87}
88
89inline bool iequal(const std::string& str, const std::string& low) {
90 return iequal_from(str, 0, low);
91}
92
93inline bool istarts_with(const std::string& str, const std::string& prefix) {
94 return str.length() >= prefix.length() &&
95 std::equal(std::begin(prefix), std::end(prefix), str.begin(),
96 [](char c1, char c2) { return c1 == lower(c2); });
97}
98inline bool iends_with(const std::string& str, const std::string& suffix) {
99 size_t sl = suffix.length();
100 return str.length() >= sl &&
101 std::equal(std::begin(suffix), std::end(suffix), str.end() - sl,
102 [](char c1, char c2) { return c1 == lower(c2); });
103}
104
105inline bool giends_with(const std::string& str, const std::string& suffix) {
106 return iends_with(str, suffix) || iends_with(str, suffix + ".gz");
107}
108
109inline std::string trim_str(const std::string& str) {
110 const std::string ws = " \r\n\t";
111 std::string::size_type first = str.find_first_not_of(ws);
112 if (first == std::string::npos)
113 return std::string{};
114 std::string::size_type last = str.find_last_not_of(ws);
115 return str.substr(first, last - first + 1);
116}
117
118inline std::string rtrim_str(const std::string& str) {
119 std::string::size_type last = str.find_last_not_of(" \r\n\t");
120 return str.substr(0, last == std::string::npos ? 0 : last + 1);
121}
122
123// end is after the last character of the string (typically \0)
124inline const char* rtrim_cstr(const char* start, const char* end=nullptr) {
125 if (!start)
126 return nullptr;
127 if (!end) {
128 end = start;
129 while (*end != '\0')
130 ++end;
131 }
132 while (end > start && std::isspace(end[-1]))
133 --end;
134 return end;
135}
136
137namespace impl {
138inline size_t length(char) { return 1; }
139inline size_t length(const std::string& s) { return s.length(); }
140}
141
142// takes a single separator (usually char or string);
143// may return empty fields
144template<typename S>
145void split_str_into(const std::string& str, S sep,
146 std::vector<std::string>& result) {
147 std::size_t start = 0, end;
148 while ((end = str.find(sep, start)) != std::string::npos) {
149 result.emplace_back(str, start, end - start);
150 start = end + impl::length(sep);
151 }
152 result.emplace_back(str, start);
153}
154
155template<typename S>
156std::vector<std::string> split_str(const std::string& str, S sep) {
157 std::vector<std::string> result;
159 return result;
160}
161
162// _multi variants takes multiple 1-char separators as a string;
163// discards empty fields
164inline void split_str_into_multi(const std::string& str, const char* seps,
165 std::vector<std::string>& result) {
166 std::size_t start = str.find_first_not_of(seps);
167 while (start != std::string::npos) {
168 std::size_t end = str.find_first_of(seps, start);
169 result.emplace_back(str, start, end - start);
170 start = str.find_first_not_of(seps, end);
171 }
172}
173
174inline std::vector<std::string> split_str_multi(const std::string& str,
175 const char* seps=" \t") {
176 std::vector<std::string> result;
178 return result;
179}
180
181template<typename T, typename S, typename F>
182std::string join_str(T begin, T end, const S& sep, const F& getter) {
183 std::string r;
184 bool first = true;
185 for (T i = begin; i != end; ++i) {
186 if (!first)
187 r += sep;
188 r += getter(*i);
189 first = false;
190 }
191 return r;
192}
193
194template<typename T, typename S>
195std::string join_str(T begin, T end, const S& sep) {
196 return join_str(begin, end, sep, [](const std::string& t) { return t; });
197}
198
199template<typename T, typename S, typename F>
200std::string join_str(const T& iterable, const S& sep, const F& getter) {
201 return join_str(iterable.begin(), iterable.end(), sep, getter);
202}
203
204template<typename T, typename S>
205std::string join_str(const T& iterable, const S& sep) {
206 return join_str(iterable.begin(), iterable.end(), sep);
207}
208
209template<typename T, typename S>
210void string_append_sep(std::string& str, S sep, const T& item) {
211 if (!str.empty())
212 str += sep;
213 str += item;
214}
215
216inline void replace_all(std::string &s,
217 const std::string &old, const std::string &new_) {
218 std::string::size_type pos = 0;
219 while ((pos = s.find(old, pos)) != std::string::npos) {
220 s.replace(pos, old.size(), new_);
221 pos += new_.size();
222 }
223}
224
225// list is a comma separated string
226inline bool is_in_list(const std::string& name, const std::string& list,
227 char sep=',') {
228 if (name.length() >= list.length())
229 return name == list;
230 for (size_t start=0, end=0; end != std::string::npos; start=end+1) {
231 end = list.find(sep, start);
232 if (list.compare(start, end - start, name) == 0)
233 return true;
234 }
235 return false;
236}
237
238// ##### vector helpers #####
239
240template <class T>
241bool in_vector(const T& x, const std::vector<T>& v) {
242 return std::find(v.begin(), v.end(), x) != v.end();
243}
244
245template <typename F, typename T>
246bool in_vector_f(F f, const std::vector<T>& v) {
247 return std::find_if(v.begin(), v.end(), f) != v.end();
248}
249
250template <class T>
251T* vector_end_ptr(std::vector<T>& v) { return v.data() + v.size(); }
252template <class T>
253const T* vector_end_ptr(const std::vector<T>& v) { return v.data() + v.size(); }
254
255template <class T>
256void vector_move_extend(std::vector<T>& dst, std::vector<T>&& src) {
257 if (dst.empty())
258 dst = std::move(src);
259 else
260 dst.insert(dst.end(), std::make_move_iterator(src.begin()),
261 std::make_move_iterator(src.end()));
262}
263
264// wrapper around the erase-remove idiom
265template <class T, typename F>
266void vector_remove_if(std::vector<T>& v, F&& condition) {
267 v.erase(std::remove_if(v.begin(), v.end(), condition), v.end());
268}
269
272template <class T>
273void vector_insert_columns(std::vector<T>& data, size_t old_width,
274 size_t length, size_t n, size_t pos, T new_value) {
275 assert(data.size() == old_width * length);
276 assert(pos <= old_width);
277 data.resize(data.size() + n * length);
278 typename std::vector<T>::iterator dst = data.end();
279 for (size_t i = length; i-- != 0; ) {
280 for (size_t j = old_width; j-- != pos; )
281 *--dst = data[i * old_width + j];
282 for (size_t j = n; j-- != 0; )
283 *--dst = new_value;
284 for (size_t j = pos; j-- != 0; )
285 *--dst = data[i * old_width + j];
286 }
287 assert(dst == data.begin());
288}
291template <class T>
292void vector_remove_column(std::vector<T>& data, size_t new_width, size_t pos) {
293 assert(pos <= new_width);
294 for (size_t source = pos + 1; source < data.size(); ++source)
295 for (size_t i = 0; i < new_width && source < data.size(); ++i)
296 data[pos++] = data[source++];
297 data.resize(pos);
298}
299
300
301// ##### other helpers #####
302
303// Numeric ID used for case-insensitive comparison of 4 letters.
304// s must have 4 chars or 3 chars + NUL, ' ' and NUL are equivalent in s.
305constexpr int ialpha4_id(const char* s) {
306 return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) & ~0x20202020;
307}
308// Numeric ID used for case-insensitive comparison of 3 letters.
309constexpr int ialpha3_id(const char* s) {
310 return (s[0] << 16 | s[1] << 8 | s[2]) & ~0x20202020;
311}
312
313} // namespace gemmi
314#endif
T * vector_end_ptr(std::vector< T > &v)
Definition util.hpp:251
void append_to_str(std::string &out, int v)
Definition util.hpp:20
bool iequal_from(const std::string &str, size_t offset, const std::string &low)
Definition util.hpp:83
char lower(char c)
Definition util.hpp:53
bool ends_with(const std::string &str, const std::string &suffix)
Definition util.hpp:47
void vector_remove_if(std::vector< T > &v, F &&condition)
Definition util.hpp:266
bool isame(char a, char b)
Definition util.hpp:77
const char * rtrim_cstr(const char *start, const char *end=nullptr)
Definition util.hpp:124
constexpr int ialpha3_id(const char *s)
Definition util.hpp:309
bool in_vector_f(F f, const std::vector< T > &v)
Definition util.hpp:246
bool istarts_with(const std::string &str, const std::string &prefix)
Definition util.hpp:93
std::string rtrim_str(const std::string &str)
Definition util.hpp:118
void vector_insert_columns(std::vector< T > &data, size_t old_width, size_t length, size_t n, size_t pos, T new_value)
Definition util.hpp:273
std::string join_str(T begin, T end, const S &sep, const F &getter)
Definition util.hpp:182
bool in_vector(const T &x, const std::vector< T > &v)
Definition util.hpp:241
bool iends_with(const std::string &str, const std::string &suffix)
Definition util.hpp:98
void split_str_into_multi(const std::string &str, const char *seps, std::vector< std::string > &result)
Definition util.hpp:164
std::string to_lower(std::string str)
Definition util.hpp:62
void string_append_sep(std::string &str, S sep, const T &item)
Definition util.hpp:210
std::string to_upper(std::string str)
Definition util.hpp:69
std::string cat(Args const &... args)
Definition util.hpp:32
bool is_in_list(const std::string &name, const std::string &list, char sep=',')
Definition util.hpp:226
bool giends_with(const std::string &str, const std::string &suffix)
Definition util.hpp:105
void cat_to(std::string &)
Definition util.hpp:25
bool starts_with(const std::string &str, const std::string &prefix)
Definition util.hpp:38
std::vector< std::string > split_str_multi(const std::string &str, const char *seps=" \t")
Definition util.hpp:174
std::vector< std::string > split_str(const std::string &str, S sep)
Definition util.hpp:156
void vector_remove_column(std::vector< T > &data, size_t new_width, size_t pos)
Definition util.hpp:292
void split_str_into(const std::string &str, S sep, std::vector< std::string > &result)
Definition util.hpp:145
bool iequal(const std::string &str, const std::string &low)
Definition util.hpp:89
std::string trim_str(const std::string &str)
Definition util.hpp:109
void replace_all(std::string &s, const std::string &old, const std::string &new_)
Definition util.hpp:216
constexpr int ialpha4_id(const char *s)
Definition util.hpp:305
void vector_move_extend(std::vector< T > &dst, std::vector< T > &&src)
Definition util.hpp:256
char alpha_up(char c)
Definition util.hpp:60