Gemmi C++ API
Loading...
Searching...
No Matches
polyheur.hpp
Go to the documentation of this file.
1// Copyright 2017-2018 Global Phasing Ltd.
2//
3// Heuristic methods for working with chains and polymers.
4// Includes also a few well-defined functions, such as removal of waters.
5
6#ifndef GEMMI_POLYHEUR_HPP_
7#define GEMMI_POLYHEUR_HPP_
8
9#include <vector>
10#include "model.hpp"
11#include "resinfo.hpp" // for find_tabulated_residue
12#include "util.hpp" // for vector_remove_if
13
14namespace gemmi {
15
16// A simplistic classification. It may change in the future.
17// It returns PolymerType which corresponds to _entity_poly.type,
18// but here we use only PeptideL, Rna, Dna, DnaRnaHybrid and Unknown.
20 bool ignore_entity_type=false);
21
23 const ConstResidueSpan& polymer) {
24 if (ent && ent->polymer_type != PolymerType::Unknown)
25 return ent->polymer_type;
26 return check_polymer_type(polymer);
27}
28
29struct AtomNameElement { std::string atom_name; El el; };
30
31inline std::vector<AtomNameElement> get_mainchain_atoms(PolymerType ptype) {
33 return {{"P", El::P}, {"O5'", El::O}, {"C5'", El::C},
34 {"C4'", El::C}, {"O4'", El::O}, {"C3'", El::C}, {"O3'", El::O},
35 {"C2'", El::C}, {"O2'", El::O}, {"C1'", El::C}};
36 return {{"N", El::N}, {"CA", El::C}, {"C", El::C}, {"O", El::O}};
37}
38
40inline bool in_peptide_bond_distance(const Atom* a1, const Atom* a2) {
41 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(1.341 * 1.5);
42}
43inline bool have_peptide_bond(const Residue& r1, const Residue& r2) {
44 return in_peptide_bond_distance(r1.get_c(), r2.get_n());
45}
46
48inline bool in_nucleotide_bond_distance(const Atom* a1, const Atom* a2) {
49 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(1.6 * 1.5);
50}
51inline bool have_nucleotide_bond(const Residue& r1, const Residue& r2) {
52 return in_nucleotide_bond_distance(r1.get_o3prim(), r2.get_p());
53}
54
56inline bool are_connected(const Residue& r1, const Residue& r2, PolymerType ptype) {
58 return have_peptide_bond(r1, r2);
60 return have_nucleotide_bond(r1, r2);
61 return false;
62}
63
65inline bool are_connected2(const Residue& r1, const Residue& r2, PolymerType ptype) {
66 auto this_or_first = [](const Atom* a, const Residue& r, El el) -> const Atom* {
67 if (a || r.atoms.empty())
68 return a;
69 if (const Atom* b = r.find_by_element(el))
70 return b;
71 return &r.atoms.front();
72 };
73 if (is_polypeptide(ptype)) {
74 const Atom* a1 = this_or_first(r1.get_ca(), r1, El::C);
75 const Atom* a2 = this_or_first(r2.get_ca(), r2, El::C);
76 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(5.0);
77 }
79 const Atom* a1 = this_or_first(r1.get_p(), r1, El::P);
80 const Atom* a2 = this_or_first(r2.get_p(), r2, El::P);
81 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(7.5);
82 }
83 return false;
84}
85
87inline bool are_connected3(const Residue& r1, const Residue& r2, PolymerType ptype) {
88 if (is_polypeptide(ptype)) {
89 if (const Atom* a1 = r1.get_c())
90 if (const Atom* a2 = r2.get_n())
91 return a1->pos.dist_sq(a2->pos) < sq(1.341 * 1.5);
92 if (const Atom* a1 = r1.get_ca())
93 if (const Atom* a2 = r2.get_ca())
94 return a1->pos.dist_sq(a2->pos) < sq(5.0);
95 } else if (is_polynucleotide(ptype)) {
96 if (const Atom* a1 = r1.get_o3prim())
97 if (const Atom* a2 = r2.get_p())
98 return a1->pos.dist_sq(a2->pos) < sq(1.6 * 1.5);
99 if (const Atom* a1 = r1.get_p())
100 if (const Atom* a2 = r2.get_p())
101 return a1->pos.dist_sq(a2->pos) < sq(7.5);
102 }
103 return false;
104}
105
106inline std::string make_one_letter_sequence(const ConstResidueSpan& polymer) {
107 std::string seq;
108 const Residue* prev = nullptr;
110 for (const Residue& residue : polymer.first_conformer()) {
111 ResidueInfo info = find_tabulated_residue(residue.name);
112 if (prev && !are_connected2(*prev, residue, ptype))
113 seq += '-';
114 seq += (info.one_letter_code != ' ' ? info.one_letter_code : 'X');
115 prev = &residue;
116 }
117 return seq;
118}
119
127
130
133
146
148
150
152
154 add_entity_types(st, /*overwrite=*/false);
155 assign_subchains(st, /*force=*/false);
158}
159
160// Remove waters. It may leave empty chains.
161template<class T> void remove_waters(T& obj) {
162 for (auto& child : obj.children())
164}
165template<> inline void remove_waters(Chain& ch) {
166 vector_remove_if(ch.residues,
167 [](const Residue& res) { return res.is_water(); });
168}
169
170// Remove ligands and waters. It may leave empty chains.
171template<class T> void remove_ligands_and_waters(T& obj) {
172 for (auto& child : obj.children())
174}
175template<> inline void remove_ligands_and_waters(Chain& ch) {
176 vector_remove_if(ch.residues, [&](const Residue& res) {
177 if (res.entity_type == EntityType::Unknown)
178 fail("remove_ligands_and_waters(): missing entity_type in chain ", ch.name);
179 return res.entity_type != EntityType::Polymer;
180 });
181}
182
183// Trim to alanine. Returns true if trimmed, false if it's (likely) not AA.
184inline bool trim_to_alanine(Residue& res) {
185 static const std::pair<std::string, El> ala_atoms[6] = {
186 {"N", El::N}, {"CA", El::C}, {"C", El::C}, {"O", El::O}, {"CB", El::C},
187 {"OXT", El::O}
188 };
189 if (res.get_ca() == nullptr)
190 return false;
191 vector_remove_if(res.atoms, [](const Atom& a) {
192 for (const auto& name_el : ala_atoms)
193 if (a.name == name_el.first && a.element == name_el.second)
194 return false;
195 return true;
196 });
197 return true;
198}
199
200inline void trim_to_alanine(Chain& chain) {
201 for (Residue& res : chain.residues)
202 trim_to_alanine(res);
203}
204
205// Functions for switching between long (>3 chars) residue names (CCD codes)
206// and shortened ones that are compatible with the PDB format.
208void change_ccd_code(Structure& st, const std::string& old, const std::string& new_);
209
211
213 for (const OldToNew& item : st.shortened_ccd_codes)
214 change_ccd_code(st, item.new_, item.old);
215 st.shortened_ccd_codes.clear();
216}
217
218} // namespace gemmi
219#endif
#define GEMMI_DLL
Definition fail.hpp:53
GEMMI_DLL void add_entity_types(Chain &chain, bool overwrite)
Assigns entity_type=Polymer|NonPolymer|Water for each Residue (only for residues with entity_type==Un...
GEMMI_DLL void assign_subchain_names(Chain &chain, int &nonpolymer_counter)
The subchain field in the residue is where we store_atom_site.label_asym_id from mmCIF files.
void vector_remove_if(std::vector< T > &v, F &&condition)
Definition util.hpp:266
bool have_nucleotide_bond(const Residue &r1, const Residue &r2)
Definition polyheur.hpp:51
std::string make_one_letter_sequence(const ConstResidueSpan &polymer)
Definition polyheur.hpp:106
bool are_connected(const Residue &r1, const Residue &r2, PolymerType ptype)
check C-N or O3'-P distance
Definition polyheur.hpp:56
std::vector< AtomNameElement > get_mainchain_atoms(PolymerType ptype)
Definition polyheur.hpp:31
void remove_ligands_and_waters(T &obj)
Definition polyheur.hpp:171
bool in_nucleotide_bond_distance(const Atom *a1, const Atom *a2)
distance-based check for phosphodiester bond between nucleotide
Definition polyheur.hpp:48
bool is_polypeptide(PolymerType pt)
Definition metadata.hpp:211
GEMMI_DLL PolymerType check_polymer_type(const ConstResidueSpan &span, bool ignore_entity_type=false)
void remove_waters(T &obj)
Definition polyheur.hpp:161
GEMMI_DLL void add_entity_ids(Structure &st, bool overwrite)
Assigns Residue::entity_id based on Residue::subchain and Entity::subchains.
PolymerType get_or_check_polymer_type(const Entity *ent, const ConstResidueSpan &polymer)
Definition polyheur.hpp:22
GEMMI_DLL void shorten_ccd_codes(Structure &st)
GEMMI_DLL void assign_subchains(Structure &st, bool force, bool fail_if_unknown=true)
bool in_peptide_bond_distance(const Atom *a1, const Atom *a2)
distance-based check for peptide bond
Definition polyheur.hpp:40
bool are_connected2(const Residue &r1, const Residue &r2, PolymerType ptype)
are_connected2() is less exact, but requires only CA (or P) atoms.
Definition polyheur.hpp:65
GEMMI_DLL void remove_entity_types(Structure &st)
Assigns entity_type=Unknown for all residues.
bool is_polynucleotide(PolymerType pt)
Definition metadata.hpp:215
constexpr float sq(float x)
Definition math.hpp:34
bool trim_to_alanine(Residue &res)
Definition polyheur.hpp:184
bool have_peptide_bond(const Residue &r1, const Residue &r2)
Definition polyheur.hpp:43
bool are_connected3(const Residue &r1, const Residue &r2, PolymerType ptype)
are_connected3() = are_connected() + fallback to are_connected2()
Definition polyheur.hpp:87
GEMMI_DLL void change_ccd_code(Structure &st, const std::string &old, const std::string &new_)
GEMMI_DLL ResidueInfo find_tabulated_residue(const std::string &name)
GEMMI_DLL void deduplicate_entities(Structure &st)
GEMMI_DLL void ensure_entities(Structure &st)
void setup_entities(Structure &st)
Definition polyheur.hpp:153
void restore_full_ccd_codes(Structure &st)
Definition polyheur.hpp:212
std::string atom_name
Definition polyheur.hpp:29
Represents atom site in macromolecular structure (~100 bytes).
Definition model.hpp:112
std::vector< Residue > residues
Definition model.hpp:465
ConstUniqProxy< Residue, ConstResidueSpan > first_conformer() const
Definition model.hpp:304
helper type used for Structure::shortened_ccd_codes
Definition model.hpp:91
const Atom * get_ca() const
Definition model.hpp:247
std::vector< Atom > atoms
Definition model.hpp:188