Gemmi C++ API
Loading...
Searching...
No Matches
neighbor.hpp
Go to the documentation of this file.
1// Copyright 2018 Global Phasing Ltd.
2//
3// Cell-linked lists method for atom searching (a.k.a. grid search, binning,
4// bucketing, cell technique for neighbor search, etc).
5
6#ifndef GEMMI_NEIGHBOR_HPP_
7#define GEMMI_NEIGHBOR_HPP_
8
9#include <vector>
10#include <cmath> // for INFINITY, sqrt
11
12#include "fail.hpp" // for fail
13#include "grid.hpp"
14#include "model.hpp"
15#include "small.hpp"
16
17namespace gemmi {
18
20
21 struct Mark {
23 char altloc;
25 short image_idx;
29
30 Mark(const Position& p, char alt, El el, short im, int ch, int res, int atom)
31 : pos(p), altloc(alt), element(el),
32 image_idx(im), chain_idx(ch), residue_idx(res), atom_idx(atom) {}
33
34 CRA to_cra(Model& mdl) const {
35 Chain& c = mdl.chains.at(chain_idx);
37 Atom& a = r.atoms.at(atom_idx);
38 return {&c, &r, &a};
39 }
40 const_CRA to_cra(const Model& mdl) const {
41 const Chain& c = mdl.chains.at(chain_idx);
42 const Residue& r = c.residues.at(residue_idx);
43 const Atom& a = r.atoms.at(atom_idx);
44 return {&c, &r, &a};
45 }
50 return small_st.sites.at(atom_idx);
51 }
52 };
53
55 double radius_specified = 0.;
56 Model* model = nullptr;
58 bool use_pbc = true;
59 bool include_h = true;
60
61 NeighborSearch() = default;
62 // Model is not const so it can be modified in for_each_contact()
63 NeighborSearch(Model& model_, const UnitCell& cell, double radius) {
64 model = &model_;
66 set_bounding_cell(cell);
67 set_grid_size();
68 }
72 grid.unit_cell = small_st.cell;
73 set_grid_size();
74 }
75
77 void add_chain(const Chain& chain, bool include_h_=true);
78 void add_chain_n(const Chain& chain, int n_ch);
79 void add_atom(const Atom& atom, int n_ch, int n_res, int n_atom);
80 void add_site(const SmallStructure::Site& site, int n);
81
82 // assumes data in [0, 1), but uses index_n to account for numerical errors
83 std::vector<Mark>& get_subcell(const Fractional& fr) {
84 return grid.data[grid.index_n(int(fr.x * grid.nu),
85 int(fr.y * grid.nv),
86 int(fr.z * grid.nw))];
87 }
88
89 template<typename Func>
90 void for_each_cell(const Position& pos, const Func& func, int k=1);
91
92 template<typename Func>
93 void for_each(const Position& pos, char alt, double radius, const Func& func, int k=1) {
94 if (radius <= 0)
95 return;
96 for_each_cell(pos, [&](std::vector<Mark>& marks, const Fractional& fr) {
97 Position p = use_pbc ? grid.unit_cell.orthogonalize(fr) : pos;
98 for (Mark& m : marks) {
99 double dist_sq = m.pos.dist_sq(p);
100 if (dist_sq < sq(radius) && is_same_conformer(alt, m.altloc))
101 func(m, dist_sq);
102 }
103 }, k);
104 }
105
106 int sufficient_k(double r) const {
107 // .00001 is added to account for possible numeric error in r
108 return r <= radius_specified ? 1 : int(r / radius_specified + 1.00001);
109 }
110
111 // with radius==0 it uses radius_specified
112 std::vector<Mark*> find_atoms(const Position& pos, char alt,
113 double min_dist, double radius) {
114 int k = sufficient_k(radius);
115 if (radius == 0)
117 std::vector<Mark*> out;
118 for_each(pos, alt, radius, [&](Mark& a, double dist_sq) {
119 if (dist_sq >= sq(min_dist))
120 out.push_back(&a);
121 }, k);
122 return out;
123 }
124
125 std::vector<Mark*> find_neighbors(const Atom& atom, double min_dist, double max_dist) {
126 return find_atoms(atom.pos, atom.altloc, min_dist, max_dist);
127 }
129 double min_dist, double max_dist) {
130 Position pos = grid.unit_cell.orthogonalize(site.fract);
131 return find_atoms(pos, '\0', min_dist, max_dist);
132 }
133
134 std::pair<Mark*, double>
135 find_nearest_atom_within_k(const Position& pos, int k, double radius) {
136 Mark* mark = nullptr;
137 double nearest_dist_sq = radius * radius;
138 for_each_cell(pos, [&](std::vector<Mark>& marks, const Fractional& fr) {
139 Position p = use_pbc ? grid.unit_cell.orthogonalize(fr) : pos;
140 for (Mark& m : marks) {
141 double dist_sq = m.pos.dist_sq(p);
142 if (dist_sq < nearest_dist_sq) {
143 mark = &m;
145 }
146 }
147 }, k);
148 return {mark, nearest_dist_sq};
149 }
150
151 // it would be good to return also NearestImage
153 double r_spec = radius_specified;
154 if (radius == 0.f)
155 radius = r_spec;
156 int max_k = std::max(std::max(std::max(grid.nu, grid.nv), grid.nw), 2);
157 for (int k = 1; k < max_k; k *= 2) {
159 // if Mark was not found, result.second is set to radius^2.
160 if (result.second < sq(k * r_spec))
161 return result.first;
162 if (result.first != nullptr) {
163 // We found an atom, but because it was further away than k*r_spec,
164 // so now it's sufficient to find the nearest atom in dist:
165 double dist = std::sqrt(result.second);
167 }
168 }
169 if (!use_pbc)
170 // pos can be outside of bounding box. In such case, although it's slow,
171 // search in all cells. Using large number that will be clipped.
172 return find_nearest_atom_within_k(pos, INT_MAX/4, radius).first;
173 return nullptr;
174 }
175
176 double dist_sq(const Position& pos1, const Position& pos2) const {
177 return grid.unit_cell.distance_sq(pos1, pos2);
178 }
179 double dist(const Position& pos1, const Position& pos2) const {
180 return std::sqrt(dist_sq(pos1, pos2));
181 }
182
184 // 0 is for identity, other indices are shifted by one.
185 if (image_idx == 0)
186 return Transform{};
187 if ((size_t)image_idx <= grid.unit_cell.images.size())
188 return grid.unit_cell.images[image_idx-1];
189 fail("No such image index: " + std::to_string(image_idx));
190 }
191
192private:
193 void set_grid_size() {
194 // We don't use set_size_from_spacing() etc because we don't need
195 // FFT-friendly size nor symmetry.
196 double inv_radius = 1 / radius_specified;
197 const UnitCell& uc = grid.unit_cell;
198 grid.set_size_without_checking(std::max(int(inv_radius / uc.ar), 1),
199 std::max(int(inv_radius / uc.br), 1),
200 std::max(int(inv_radius / uc.cr), 1));
201 }
202
203 void set_bounding_cell(const UnitCell& cell) {
204 use_pbc = cell.is_crystal();
205 if (use_pbc) {
206 grid.unit_cell = cell;
207 } else {
208 // cf. calculate_box()
209 Box<Position> box;
210 for (CRA cra : model->all())
211 box.extend(cra.atom->pos);
212 // The box needs to include all NCS images (strict NCS from MTRIXn).
213 // To avoid additional function parameter that would pass Structure::ncs,
214 // here we obtain NCS transformations from UnitCell::images.
215 std::vector<FTransform> ncs = cell.get_ncs_transforms();
216 if (!ncs.empty()) {
217 for (CRA cra : model->all())
218 // images store fractional transforms, but for non-crystal
219 // it should be the same as Cartesian transform.
220 for (const Transform& tr : ncs)
221 box.extend(Position(tr.apply(cra.atom->pos)));
222 }
223 box.add_margin(0.01);
224 Position size = box.get_size();
225 grid.unit_cell.set(size.x, size.y, size.z, 90, 90, 90);
226 grid.unit_cell.frac.vec -= grid.unit_cell.fractionalize(box.minimum);
227 grid.unit_cell.orth.vec += box.minimum;
228 for (const Transform& tr : ncs) {
229 UnitCell& c = grid.unit_cell;
230 // cf. add_ncs_images_to_cs_images()
231 c.images.push_back(c.frac.combine(tr.combine(c.orth)));
232 }
233 }
234 }
235};
236
239 if (model) {
240 for (int n_ch = 0; n_ch != (int) model->chains.size(); ++n_ch)
242 } else if (small_structure) {
243 for (int n = 0; n != (int) small_structure->sites.size(); ++n) {
245 if (include_h || !site.element.is_hydrogen())
246 add_site(site, n);
247 }
248 } else {
249 fail("NeighborSearch not initialized");
250 }
251 return *this;
252}
253
254inline void NeighborSearch::add_chain(const Chain& chain, bool include_h_) {
255 if (!model)
256 fail("NeighborSearch.add_chain(): model not initialized yet");
257 // to be safe avoid (&chain - model.chains[0]) which could be UB
258 for (int n_ch = 0; n_ch != (int) model->chains.size(); ++n_ch)
259 if (&model->chains[n_ch] == &chain) {
261 add_chain_n(chain, n_ch);
262 return;
263 }
264 fail("NeighborSearch.add_chain(): chain not in this model");
265}
266
267inline void NeighborSearch::add_chain_n(const Chain& chain, int n_ch) {
268 for (int n_res = 0; n_res != (int) chain.residues.size(); ++n_res) {
269 const Residue& res = chain.residues[n_res];
270 for (int n_atom = 0; n_atom != (int) res.atoms.size(); ++n_atom) {
271 const Atom& atom = res.atoms[n_atom];
272 if (include_h || !atom.is_hydrogen())
273 add_atom(atom, n_ch, n_res, n_atom);
274 }
275 }
276}
277
278inline void NeighborSearch::add_atom(const Atom& atom,
279 int n_ch, int n_res, int n_atom) {
280 const UnitCell& gcell = grid.unit_cell;
281 Fractional frac0 = gcell.fractionalize(atom.pos);
282 {
283 Fractional frac = frac0.wrap_to_unit();
284 // for non-crystals, frac==frac0 => pos = atom.pos
285 Position pos = use_pbc ? gcell.orthogonalize(frac) : atom.pos;
286 get_subcell(frac).emplace_back(pos, atom.altloc, atom.element.elem,
287 0, n_ch, n_res, n_atom);
288 }
289 for (int n_im = 0; n_im != (int) gcell.images.size(); ++n_im) {
290 Fractional frac = gcell.images[n_im].apply(frac0).wrap_to_unit();
291 Position pos = gcell.orthogonalize(frac);
292 get_subcell(frac).emplace_back(pos, atom.altloc, atom.element.elem,
293 short(n_im + 1), n_ch, n_res, n_atom);
294 }
295}
296
297// We exclude special position images of atoms here, but not in add_atom.
298// This choice is somewhat arbitrary, but it also reflects the fact that
299// in MX files occupances of atoms on special positions are (almost always)
300// fractional and all images are to be taken into account.
302 const double SPECIAL_POS_TOL = 0.4;
303 const UnitCell& gcell = grid.unit_cell;
304 std::vector<Fractional> others;
305 others.reserve(gcell.images.size());
306 Fractional frac0 = site.fract.wrap_to_unit();
307 {
308 Position pos = gcell.orthogonalize(frac0);
309 get_subcell(frac0).emplace_back(pos, '\0', site.element.elem, 0, -1, -1, n);
310 }
311 for (int n_im = 0; n_im != (int) gcell.images.size(); ++n_im) {
312 Fractional frac = gcell.images[n_im].apply(site.fract).wrap_to_unit();
313 if (gcell.distance_sq(frac, frac0) < sq(SPECIAL_POS_TOL) ||
314 std::any_of(others.begin(), others.end(), [&](const Fractional& f) {
315 return gcell.distance_sq(frac, f) < sq(SPECIAL_POS_TOL);
316 }))
317 continue;
318 Position pos = gcell.orthogonalize(frac);
319 get_subcell(frac).emplace_back(pos, '\0', site.element.elem,
320 short(n_im + 1), -1, -1, n);
321 others.push_back(frac);
322 }
323}
324
325template<typename Func>
326void NeighborSearch::for_each_cell(const Position& pos, const Func& func, int k) {
327 Fractional fr = grid.unit_cell.fractionalize(pos);
328 if (use_pbc)
329 fr = fr.wrap_to_unit();
330 int u0 = int(fr.x * grid.nu) - k;
331 int v0 = int(fr.y * grid.nv) - k;
332 int w0 = int(fr.z * grid.nw) - k;
333 int uend = u0 + 2 * k + 1;
334 int vend = v0 + 2 * k + 1;
335 int wend = w0 + 2 * k + 1;
336 if (use_pbc) {
337 auto shift = [](int j, int n) {
338 if (j < 0)
339 return (j + 1) / n - 1;
340 if (j >= n)
341 return j / n;
342 return 0;
343 };
344 for (int w = w0; w < wend; ++w) {
345 int dw = shift(w, grid.nw);
346 for (int v = v0; v < vend; ++v) {
347 int dv = shift(v, grid.nv);
348 size_t idx0 = grid.index_q(0, v - dv * grid.nv, w - dw * grid.nw);
349 for (int u = u0; u < uend; ++u) {
350 int du = shift(u, grid.nu);
351 size_t idx = idx0 + (u - du * grid.nu);
352 func(grid.data[idx], Fractional(fr.x - du, fr.y - dv, fr.z - dw));
353 }
354 }
355 }
356 } else {
357 u0 = std::max(0, u0);
358 v0 = std::max(0, v0);
359 w0 = std::max(0, w0);
360 uend = std::min(uend, grid.nu);
361 vend = std::min(vend, grid.nv);
362 wend = std::min(wend, grid.nw);
363 for (int w = w0; w < wend; ++w)
364 for (int v = v0; v < vend; ++v)
365 for (int u = u0; u < uend; ++u) {
366 size_t idx = grid.index_q(u, v, w);
367 func(grid.data[idx], fr);
368 }
369 }
370}
371
372} // namespace gemmi
373#endif
bool is_same_conformer(char altloc1, char altloc2)
Definition model.hpp:107
constexpr float sq(float x)
Definition math.hpp:34
void fail(const std::string &msg)
Definition fail.hpp:59
Represents atom site in macromolecular structure (~100 bytes).
Definition model.hpp:112
char altloc
Definition model.hpp:115
bool is_hydrogen() const
Definition model.hpp:140
Element element
Definition model.hpp:117
Position pos
Definition model.hpp:123
std::vector< Residue > residues
Definition model.hpp:465
Like Transform, but apply() arg is Fractional (not Vec3 - for type safety).
Definition unitcell.hpp:112
Fractional coordinates.
Definition unitcell.hpp:50
std::vector< Chain > chains
Definition model.hpp:700
const_CRA to_cra(const Model &mdl) const
Definition neighbor.hpp:40
CRA to_cra(Model &mdl) const
Definition neighbor.hpp:34
SmallStructure::Site & to_site(SmallStructure &small_st) const
Definition neighbor.hpp:46
const SmallStructure::Site & to_site(const SmallStructure &small_st) const
Definition neighbor.hpp:49
Mark(const Position &p, char alt, El el, short im, int ch, int res, int atom)
Definition neighbor.hpp:30
int sufficient_k(double r) const
Definition neighbor.hpp:106
void add_atom(const Atom &atom, int n_ch, int n_res, int n_atom)
Definition neighbor.hpp:278
void add_site(const SmallStructure::Site &site, int n)
Definition neighbor.hpp:301
void add_chain(const Chain &chain, bool include_h_=true)
Definition neighbor.hpp:254
NeighborSearch & populate(bool include_h_=true)
Definition neighbor.hpp:237
void for_each(const Position &pos, char alt, double radius, const Func &func, int k=1)
Definition neighbor.hpp:93
NeighborSearch(Model &model_, const UnitCell &cell, double radius)
Definition neighbor.hpp:63
Grid< std::vector< Mark > > grid
Definition neighbor.hpp:54
NeighborSearch(SmallStructure &small_st, double radius)
Definition neighbor.hpp:69
SmallStructure * small_structure
Definition neighbor.hpp:57
std::pair< Mark *, double > find_nearest_atom_within_k(const Position &pos, int k, double radius)
Definition neighbor.hpp:135
void for_each_cell(const Position &pos, const Func &func, int k=1)
Definition neighbor.hpp:326
double dist_sq(const Position &pos1, const Position &pos2) const
Definition neighbor.hpp:176
void add_chain_n(const Chain &chain, int n_ch)
Definition neighbor.hpp:267
std::vector< Mark * > find_site_neighbors(const SmallStructure::Site &site, double min_dist, double max_dist)
Definition neighbor.hpp:128
std::vector< Mark * > find_neighbors(const Atom &atom, double min_dist, double max_dist)
Definition neighbor.hpp:125
std::vector< Mark * > find_atoms(const Position &pos, char alt, double min_dist, double radius)
Definition neighbor.hpp:112
FTransform get_image_transformation(int image_idx) const
Definition neighbor.hpp:183
Mark * find_nearest_atom(const Position &pos, double radius=INFINITY)
Definition neighbor.hpp:152
std::vector< Mark > & get_subcell(const Fractional &fr)
Definition neighbor.hpp:83
double dist(const Position &pos1, const Position &pos2) const
Definition neighbor.hpp:179
Coordinates in Angstroms - orthogonal (Cartesian) coordinates.
Definition unitcell.hpp:32
std::vector< Atom > atoms
Definition model.hpp:188
std::vector< Site > sites
Definition small.hpp:59
Unit cell.
Definition unitcell.hpp:139