Gemmi C++ API
Loading...
Searching...
No Matches
polyheur.hpp
Go to the documentation of this file.
1// Copyright 2017-2018 Global Phasing Ltd.
2//
3// Heuristic methods for working with chains and polymers.
4// Also includes a few well-defined functions, such as removal of waters.
5
6#ifndef GEMMI_POLYHEUR_HPP_
7#define GEMMI_POLYHEUR_HPP_
8
9#include <vector>
10#include "model.hpp"
11#include "util.hpp" // for vector_remove_if
12
13namespace gemmi {
14
15// A simplistic classification. It may change in the future.
16// It returns PolymerType which corresponds to _entity_poly.type,
17// but here we use only PeptideL, Rna, Dna, DnaRnaHybrid and Unknown.
19 bool ignore_entity_type=false);
20
22 const ConstResidueSpan& polymer) {
23 if (ent && ent->polymer_type != PolymerType::Unknown)
24 return ent->polymer_type;
25 return check_polymer_type(polymer);
26}
27
28struct AtomNameElement { std::string atom_name; El el; };
29
30inline std::vector<AtomNameElement> get_mainchain_atoms(PolymerType ptype) {
32 return {{"P", El::P}, {"O5'", El::O}, {"C5'", El::C},
33 {"C4'", El::C}, {"O4'", El::O}, {"C3'", El::C}, {"O3'", El::O},
34 {"C2'", El::C}, {"O2'", El::O}, {"C1'", El::C}};
35 return {{"N", El::N}, {"CA", El::C}, {"C", El::C}, {"O", El::O}};
36}
37
39inline bool in_peptide_bond_distance(const Atom* a1, const Atom* a2) {
40 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(1.341 * 1.5);
41}
42inline bool have_peptide_bond(const Residue& r1, const Residue& r2) {
43 return in_peptide_bond_distance(r1.get_c(), r2.get_n());
44}
45
47inline bool in_nucleotide_bond_distance(const Atom* a1, const Atom* a2) {
48 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(1.6 * 1.5);
49}
50inline bool have_nucleotide_bond(const Residue& r1, const Residue& r2) {
51 return in_nucleotide_bond_distance(r1.get_o3prim(), r2.get_p());
52}
53
55inline bool are_connected(const Residue& r1, const Residue& r2, PolymerType ptype) {
57 return have_peptide_bond(r1, r2);
59 return have_nucleotide_bond(r1, r2);
60 return false;
61}
62
64inline bool are_connected2(const Residue& r1, const Residue& r2, PolymerType ptype) {
65 auto this_or_first = [](const Atom* a, const Residue& r, El el) -> const Atom* {
66 if (a || r.atoms.empty())
67 return a;
68 if (const Atom* b = r.find_by_element(el))
69 return b;
70 return &r.atoms.front();
71 };
72 if (is_polypeptide(ptype)) {
73 const Atom* a1 = this_or_first(r1.get_ca(), r1, El::C);
74 const Atom* a2 = this_or_first(r2.get_ca(), r2, El::C);
75 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(5.0);
76 }
78 const Atom* a1 = this_or_first(r1.get_p(), r1, El::P);
79 const Atom* a2 = this_or_first(r2.get_p(), r2, El::P);
80 return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(7.5);
81 }
82 return false;
83}
84
86inline bool are_connected3(const Residue& r1, const Residue& r2, PolymerType ptype) {
87 if (is_polypeptide(ptype)) {
88 if (const Atom* a1 = r1.get_c())
89 if (const Atom* a2 = r2.get_n())
90 return a1->pos.dist_sq(a2->pos) < sq(1.341 * 1.5);
91 if (const Atom* a1 = r1.get_ca())
92 if (const Atom* a2 = r2.get_ca())
93 return a1->pos.dist_sq(a2->pos) < sq(5.0);
94 } else if (is_polynucleotide(ptype)) {
95 if (const Atom* a1 = r1.get_o3prim())
96 if (const Atom* a2 = r2.get_p())
97 return a1->pos.dist_sq(a2->pos) < sq(1.6 * 1.5);
98 if (const Atom* a1 = r1.get_p())
99 if (const Atom* a2 = r2.get_p())
100 return a1->pos.dist_sq(a2->pos) < sq(7.5);
101 }
102 return false;
103}
104
106
114
117
120
133
135
137
139
141 add_entity_types(st, /*overwrite=*/false);
142 assign_subchains(st, /*force=*/false);
145}
146
150template<class T> void assign_het_flags(T& obj, char flag='R') {
151 for (auto& child : obj.children())
152 assign_het_flags(child, flag);
153}
154template<> inline void assign_het_flags(Residue& res, char flag) {
155 flag &= ~0x20; // uppercase letters, ' ' -> \0
156 if (flag != 'R' && flag != '\0' && flag != 'A' && flag != 'H')
157 fail("assign_het_flags(): the only allowed values are A, H, ' ' and R");
158 res.het_flag = flag == 'R' ? recommended_het_flag(res) : flag;
159}
160
161// Remove waters. It may leave empty chains.
162template<class T> void remove_waters(T& obj) {
163 for (auto& child : obj.children())
165}
166template<> inline void remove_waters(Chain& ch) {
167 vector_remove_if(ch.residues,
168 [](const Residue& res) { return res.is_water(); });
169}
170
171// Remove ligands and waters. It may leave empty chains.
172template<class T> void remove_ligands_and_waters(T& obj) {
173 for (auto& child : obj.children())
175}
176template<> inline void remove_ligands_and_waters(Chain& ch) {
177 vector_remove_if(ch.residues, [&](const Residue& res) {
178 if (res.entity_type == EntityType::Unknown)
179 fail("remove_ligands_and_waters(): missing entity_type in chain ", ch.name);
180 return res.entity_type != EntityType::Polymer;
181 });
182}
183
184// Trim to alanine. Returns true if trimmed, false if it's (likely) not AA.
186
187inline void trim_to_alanine(Chain& chain) {
188 for (Residue& res : chain.residues)
189 trim_to_alanine(res);
190}
191
192// Functions for switching between long (>3 chars) residue names (CCD codes)
193// and shortened ones that are compatible with the PDB format.
196
199
201
202} // namespace gemmi
203#endif
#define GEMMI_DLL
Definition fail.hpp:53
Data structures to store macromolecular structure models.
GEMMI_DLL bool trim_to_alanine(Residue &res)
GEMMI_DLL void add_microhetero_to_sequences(Structure &st, bool overwrite=false)
Modifies Entity::full_sequence. Uses only the first chain for each Entity.
GEMMI_DLL void add_entity_types(Chain &chain, bool overwrite)
Assigns entity_type=Polymer|NonPolymer|Water for each Residue (only for residues with entity_type==Un...
GEMMI_DLL void assign_subchain_names(Chain &chain, int &nonpolymer_counter)
The subchain field in the residue is where we store_atom_site.label_asym_id from mmCIF files.
void vector_remove_if(std::vector< T > &v, F &&condition)
Definition util.hpp:267
bool have_nucleotide_bond(const Residue &r1, const Residue &r2)
Definition polyheur.hpp:50
bool are_connected(const Residue &r1, const Residue &r2, PolymerType ptype)
check C-N or O3'-P distance
Definition polyheur.hpp:55
std::vector< AtomNameElement > get_mainchain_atoms(PolymerType ptype)
Definition polyheur.hpp:30
GEMMI_DLL std::string make_one_letter_sequence(const ConstResidueSpan &polymer)
void remove_ligands_and_waters(T &obj)
Definition polyheur.hpp:172
bool in_nucleotide_bond_distance(const Atom *a1, const Atom *a2)
distance-based check for phosphodiester bond between nucleotide
Definition polyheur.hpp:47
bool is_polypeptide(PolymerType pt)
Definition metadata.hpp:231
GEMMI_DLL PolymerType check_polymer_type(const ConstResidueSpan &span, bool ignore_entity_type=false)
void remove_waters(T &obj)
Definition polyheur.hpp:162
GEMMI_DLL void add_entity_ids(Structure &st, bool overwrite)
Assigns Residue::entity_id based on Residue::subchain and Entity::subchains.
PolymerType get_or_check_polymer_type(const Entity *ent, const ConstResidueSpan &polymer)
Definition polyheur.hpp:21
GEMMI_DLL void shorten_ccd_codes(Structure &st)
GEMMI_DLL void assign_subchains(Structure &st, bool force, bool fail_if_unknown=true)
bool in_peptide_bond_distance(const Atom *a1, const Atom *a2)
distance-based check for peptide bond
Definition polyheur.hpp:39
GEMMI_DLL void add_tls_group_ids(Structure &st)
bool are_connected2(const Residue &r1, const Residue &r2, PolymerType ptype)
are_connected2() is less exact, but requires only CA (or P) atoms.
Definition polyheur.hpp:64
GEMMI_DLL void remove_entity_types(Structure &st)
Assigns entity_type=Unknown for all residues.
bool is_polynucleotide(PolymerType pt)
Definition metadata.hpp:235
constexpr float sq(float x)
Definition math.hpp:34
bool have_peptide_bond(const Residue &r1, const Residue &r2)
Definition polyheur.hpp:42
bool are_connected3(const Residue &r1, const Residue &r2, PolymerType ptype)
are_connected3() = are_connected() + fallback to are_connected2()
Definition polyheur.hpp:86
GEMMI_DLL char recommended_het_flag(const Residue &res)
Determine ATOM/HETATM record type, based on Residue::entity_type.
void fail(const std::string &msg)
Definition fail.hpp:59
void assign_het_flags(T &obj, char flag='R')
R = recommended_het_flag(), other valid values are A, H and '\0'.
Definition polyheur.hpp:150
GEMMI_DLL void deduplicate_entities(Structure &st)
GEMMI_DLL void ensure_entities(Structure &st)
void setup_entities(Structure &st)
Definition polyheur.hpp:140
GEMMI_DLL void restore_full_ccd_codes(Structure &st)
std::string atom_name
Definition polyheur.hpp:28
Represents atom site in macromolecular structure (~100 bytes).
Definition model.hpp:120
std::vector< Residue > residues
Definition model.hpp:486
Utilities. Mostly for working with strings and vectors.