5#ifndef GEMMI_SELECT_HPP_
6#define GEMMI_SELECT_HPP_
35 std::string
str()
const {
41 bool has(
const std::string& name)
const {
51 bool has(
char flag)
const {
54 bool invert = (
pattern[0] ==
'!');
55 bool found = (
pattern.find(flag, invert ? 1 : 0) != std::string::npos);
56 return invert ? !found : found;
68 std::string
str()
const {
71 s = std::to_string(
seqnum);
136 std::string
cid =
"/";
138 cid += std::to_string(
mdl);
183 return mdl == 0 || std::to_string(
mdl) == model.
name;
210 return {*
this,
st.models};
213 return {*
this, model.
chains};
219 return {*
this, residue.
atoms};
226 for (
Residue& res : chain.residues) {
228 for (
Atom& atom : res.atoms) {
230 return {&chain, &res, &atom};
234 return {
nullptr,
nullptr,
nullptr};
238 for (
Model& model :
st.models) {
241 return {&model, cra};
243 return {
nullptr, {
nullptr,
nullptr,
nullptr}};
274 for (
auto&
child :
t.children())
278 [&](
typename T::child_type& c) { return c.children().empty(); });
290 vector_remove_if(
t.children(), [&](
typename T::child_type& c) { return !matches(c); });
291 for (
auto&
child :
t.children())
300inline GEMMI_COLD void wrong_syntax(
const std::string& cid,
size_t pos,
301 const char* info=
nullptr) {
302 std::string msg =
"Invalid selection syntax";
306 cat_to(msg,
" near \"", cid.substr(pos, 8),
'"');
311inline int determine_omitted_cid_fields(
const std::string& cid) {
314 if (std::isdigit(cid[0]) || cid[0] ==
'.' || cid[0] ==
'(' || cid[0] ==
'-')
316 size_t sep = cid.find_first_of(
"/([:;");
317 if (sep == std::string::npos || cid[sep] ==
'/' || cid[sep] ==
';')
324inline Selection::List make_cid_list(
const std::string& cid,
size_t pos,
size_t end,
325 const char* disallowed_chars=
"-[]()!/*.:;") {
326 Selection::List list;
327 list.
all = (cid[pos] ==
'*');
328 list.inverted = (cid[pos] ==
'!');
329 if (list.all || list.inverted)
331 list.list = cid.substr(pos, end - pos);
333 size_t idx = list.list.find_first_of(disallowed_chars);
334 if (idx != std::string::npos)
335 wrong_syntax(cid, pos + idx,
cat(
" ('", list.list[idx],
"' in a list)").c_str());
339inline void parse_cid_elements(
const std::string& cid,
size_t pos,
340 std::vector<char>& elements) {
344 bool inverted =
false;
345 if (cid[pos] ==
'!') {
349 elements.resize((
size_t)
El::END,
char(inverted));
351 size_t sep = cid.find_first_of(
",]", pos);
352 if (sep == pos || sep > pos + 2)
353 wrong_syntax(cid, 0,
"in [...]");
354 char elem_str[2] = {cid[pos], sep > pos+1 ? cid[pos+1] :
'\0'};
356 if (el ==
El::X && (
alpha_up(elem_str[0]) !=
'X' || elem_str[1] !=
'\0'))
357 wrong_syntax(cid, 0,
" (invalid element in [...])");
358 elements[el.ordinal()] = char(!inverted);
365inline Selection::SequenceId parse_cid_seqid(
const std::string& cid,
size_t& pos,
366 int default_seqnum) {
367 size_t initial_pos = pos;
368 int seqnum = default_seqnum;
370 if (cid[pos] ==
'*') {
373 }
else if (std::isdigit(cid[pos])) {
375 seqnum = std::strtol(&cid[pos], &endptr, 10);
376 pos = endptr - &cid[0];
380 if (initial_pos != pos && (std::isalpha(cid[pos]) || cid[pos] ==
'*'))
382 return {seqnum, icode};
385inline Selection::AtomInequality parse_atom_inequality(
const std::string& cid,
386 size_t pos,
size_t end) {
387 Selection::AtomInequality r;
388 if (cid[pos] !=
'q' && cid[pos] !=
'b')
389 wrong_syntax(cid, pos);
390 r.property = cid[pos];
392 while (cid[pos] ==
' ')
396 else if (cid[pos] ==
'>')
398 else if (cid[pos] ==
'=')
401 wrong_syntax(cid, pos);
404 if (result.ec != std::errc())
405 wrong_syntax(cid, pos,
" (expected number)");
406 pos = size_t(result.ptr - cid.c_str());
407 while (cid[pos] ==
' ')
410 wrong_syntax(cid, pos);
414inline bool has_inequality(
const std::string& cid,
size_t start,
size_t end) {
415 for (
size_t i = start; i < end; ++i)
416 if (cid[i] ==
'<' || cid[i] ==
'=' || cid[i] ==
'>')
421inline void parse_cid(
const std::string& cid, Selection& sel) {
422 if (cid.empty() || (cid.size() == 1 && cid[0] ==
'*'))
424 int omit = determine_omitted_cid_fields(cid);
426 size_t semi = cid.find(
';');
429 sep = std::min(cid.find(
'/', 1), semi);
430 if (sep != 1 && cid[1] !=
'*') {
432 sel.mdl = std::strtol(&cid[1], &endptr, 10);
433 size_t end_pos = endptr - &cid[0];
434 if (end_pos != sep && end_pos != cid.size())
435 wrong_syntax(cid, 0,
" (at model number)");
440 if (omit <= 1 && sep < semi) {
441 size_t pos = (sep == 0 ? 0 : sep + 1);
442 sep = std::min(cid.find(
'/', pos), semi);
445 const char* disallowed_chars =
"[]()!/*.:;";
446 sel.chain_ids = make_cid_list(cid, pos, sep, disallowed_chars);
453 if (omit <= 2 && sep < semi) {
454 size_t pos = (sep == 0 ? 0 : sep + 1);
456 sel.from_seqid = parse_cid_seqid(cid, pos, INT_MIN);
457 if (cid[pos] ==
'(') {
459 size_t right_br = cid.find(
')', pos);
460 sel.residue_names = make_cid_list(cid, pos, right_br);
468 if (cid[pos] ==
'-') {
470 sel.to_seqid = parse_cid_seqid(cid, pos, INT_MAX);
471 }
else if (sel.from_seqid.seqnum != INT_MIN) {
472 sel.to_seqid = sel.from_seqid;
475 if (cid[sep] !=
'/' && cid[sep] !=
';' && cid[sep] !=
'\0')
476 wrong_syntax(cid, 0);
480 if (sep < std::min(cid.size(), semi)) {
481 size_t pos = (sep == 0 ? 0 : sep + 1);
482 size_t end = cid.find_first_of(
"[:;", pos);
484 sel.atom_names = make_cid_list(cid, pos, end);
487 if (!sel.atom_names.inverted && sel.atom_names.list.empty())
488 sel.atom_names.all =
true;
489 if (end == std::string::npos)
492 if (cid[end] ==
'[') {
494 end = cid.find(
']', pos);
495 if (end == std::string::npos)
496 wrong_syntax(cid, 0,
" (no matching ']')");
497 parse_cid_elements(cid, pos, sel.elements);
500 if (cid[end] ==
':') {
502 sel.altlocs = make_cid_list(cid, pos, semi);
507 while (semi < cid.size()) {
508 size_t pos = semi + 1;
509 while (cid[pos] ==
' ')
511 semi = std::min(cid.find(
';', pos), cid.size());
513 while (end > pos && cid[end-1] ==
' ')
515 if (has_inequality(cid, pos, end)) {
516 sel.atom_inequalities.push_back(parse_atom_inequality(cid, pos, end));
518 sel.entity_types = make_cid_list(cid, pos, end);
519 bool inv = sel.entity_types.inverted;
520 std::fill(sel.et_flags.begin(), sel.et_flags.end(),
char(inv));
521 for (
const std::string& item :
split_str(sel.entity_types.list,
',')) {
523 if (item ==
"polymer")
525 else if (item ==
"solvent")
528 wrong_syntax(cid, 0, (
" at " + item).c_str());
529 sel.et_flags[(int)et] =
char(!inv);
539 impl::parse_cid(
cid, *
this);
546 for (
const auto&
child :
obj.children())
551 return (!
sel ||
sel->matches(atom)) ? 1 : 0;
557 for (
const auto&
child :
obj.children())
562 return (!
sel ||
sel->matches(atom)) ? atom.
occ : 0;
void vector_remove_if(std::vector< T > &v, F &&condition)
El find_element(const char *symbol)
from_chars_result fast_from_chars(const char *start, const char *end, double &d)
double count_occupancies(const T &obj, const Selection *sel=nullptr)
const char * element_name(El el)
std::string cat(Args const &... args)
bool is_in_list(const std::string &name, const std::string &list, char sep=',')
void cat_to(std::string &)
GEMMI_DLL int GEMMI_DLL int std::string to_str(double d)
std::vector< std::string > split_str(const std::string &str, S sep)
size_t count_atom_sites(const T &obj, const Selection *sel=nullptr)
Represents atom site in macromolecular structure (~100 bytes).
std::vector< Residue > residues
std::vector< Chain > chains
std::vector< Atom > atoms
bool matches(const Atom &a) const
bool has(char flag) const
bool has(const std::string &name) const
int compare(const SeqId &seqid) const
Selection & set_atom_flags(const std::string &pattern)
std::vector< AtomInequality > atom_inequalities
FilterProxy< Selection, Atom > atoms(Residue &residue) const
bool matches(const Residue &res) const
T copy_selection(const T &orig) const
bool matches(const Chain &chain) const
bool matches(const Structure &) const
bool matches(const Model &model) const
CRA first_in_model(Model &model) const
FilterProxy< Selection, Residue > residues(Chain &chain) const
void remove_not_selected(T &t) const
std::pair< Model *, CRA > first(Structure &st) const
void add_matching_children(const T &orig, T &target) const
FilterProxy< Selection, Model > models(Structure &st) const
void add_matching_children(const Atom &, Atom &) const
Selection & set_residue_flags(const std::string &pattern)
std::vector< char > elements
void remove_not_selected(Atom &) const
bool matches(const Atom &a) const
FilterProxy< Selection, Chain > chains(Model &model) const
void remove_selected(T &t) const
bool matches(const CRA &cra) const
std::array< char, 6 > et_flags
void remove_selected(Residue &res) const