Gemmi C++ API
Loading...
Searching...
No Matches
neighbor.hpp
Go to the documentation of this file.
1// Copyright 2018 Global Phasing Ltd.
2//
3// Cell-linked lists method for atom searching (a.k.a. grid search, binning,
4// bucketing, cell technique for neighbor search, etc).
5
6#ifndef GEMMI_NEIGHBOR_HPP_
7#define GEMMI_NEIGHBOR_HPP_
8
9#include <vector>
10#include <cmath> // for INFINITY, sqrt
11
12#include "fail.hpp" // for fail
13#include "grid.hpp"
14#include "model.hpp"
15#include "small.hpp"
16
17namespace gemmi {
18
20
21 struct Mark {
23 char altloc;
25 short image_idx;
29
30 Mark(const Position& p, char alt, El el, short im, int ch, int res, int atom)
31 : pos(p), altloc(alt), element(el),
32 image_idx(im), chain_idx(ch), residue_idx(res), atom_idx(atom) {}
33
34 CRA to_cra(Model& mdl) const {
35 Chain& c = mdl.chains.at(chain_idx);
37 Atom& a = r.atoms.at(atom_idx);
38 return {&c, &r, &a};
39 }
40 const_CRA to_cra(const Model& mdl) const {
41 const Chain& c = mdl.chains.at(chain_idx);
42 const Residue& r = c.residues.at(residue_idx);
43 const Atom& a = r.atoms.at(atom_idx);
44 return {&c, &r, &a};
45 }
50 return small_st.sites.at(atom_idx);
51 }
52 };
53
55 double radius_specified = 0.;
56 Model* model = nullptr;
58 bool use_pbc = true;
59 bool include_h = true;
60
61 NeighborSearch() = default;
62 // Model is not const so it can be modified in for_each_contact()
63 NeighborSearch(Model& model_, const UnitCell& cell, double radius) {
64 model = &model_;
66 set_bounding_cell(cell);
67 set_grid_size();
68 }
72 grid.unit_cell = small_st.cell;
73 set_grid_size();
74 }
75
77 void add_chain(const Chain& chain, bool include_h_=true);
78 void add_chain_n(const Chain& chain, int n_ch);
79 void add_atom(const Atom& atom, int n_ch, int n_res, int n_atom);
80 void add_site(const SmallStructure::Site& site, int n);
81
82 // assumes data in [0, 1), but uses index_n to account for numerical errors
83 std::vector<Mark>& get_subcell(const Fractional& fr) {
84 size_t idx = grid.index_n(int(fr.x * grid.nu),
85 int(fr.y * grid.nv),
86 int(fr.z * grid.nw));
87 if (idx >= grid.data.size())
88 fail("NeighborSearch error, probably due to NaN in coordinates");
89 return grid.data[idx];
90 }
91
92 template<typename Func>
93 void for_each_cell(const Position& pos, const Func& func, int k=1);
94
95 template<typename Func>
96 void for_each(const Position& pos, char alt, double radius, const Func& func, int k=1) {
97 if (radius <= 0)
98 return;
99 for_each_cell(pos, [&](std::vector<Mark>& marks, const Fractional& fr) {
100 Position p = use_pbc ? grid.unit_cell.orthogonalize(fr) : pos;
101 for (Mark& m : marks) {
102 double dist_sq = m.pos.dist_sq(p);
103 if (dist_sq < sq(radius) && is_same_conformer(alt, m.altloc))
104 func(m, dist_sq);
105 }
106 }, k);
107 }
108
109 int sufficient_k(double r) const {
110 // .00001 is added to account for possible numeric error in r
111 return r <= radius_specified ? 1 : int(r / radius_specified + 1.00001);
112 }
113
114 // with radius==0 it uses radius_specified
115 std::vector<Mark*> find_atoms(const Position& pos, char alt,
116 double min_dist, double radius) {
117 int k = sufficient_k(radius);
118 if (radius == 0)
120 std::vector<Mark*> out;
121 for_each(pos, alt, radius, [&](Mark& a, double dist_sq) {
122 if (dist_sq >= sq(min_dist))
123 out.push_back(&a);
124 }, k);
125 return out;
126 }
127
128 std::vector<Mark*> find_neighbors(const Atom& atom, double min_dist, double max_dist) {
129 return find_atoms(atom.pos, atom.altloc, min_dist, max_dist);
130 }
132 double min_dist, double max_dist) {
133 Position pos = grid.unit_cell.orthogonalize(site.fract);
134 return find_atoms(pos, '\0', min_dist, max_dist);
135 }
136
137 std::pair<Mark*, double>
138 find_nearest_atom_within_k(const Position& pos, int k, double radius) {
139 Mark* mark = nullptr;
140 double nearest_dist_sq = radius * radius;
141 for_each_cell(pos, [&](std::vector<Mark>& marks, const Fractional& fr) {
142 Position p = use_pbc ? grid.unit_cell.orthogonalize(fr) : pos;
143 for (Mark& m : marks) {
144 double dist_sq = m.pos.dist_sq(p);
145 if (dist_sq < nearest_dist_sq) {
146 mark = &m;
148 }
149 }
150 }, k);
151 return {mark, nearest_dist_sq};
152 }
153
154 // it would be good to return also NearestImage
156 double r_spec = radius_specified;
157 if (radius == 0.f)
158 radius = r_spec;
159 int max_k = std::max(std::max(std::max(grid.nu, grid.nv), grid.nw), 2);
160 for (int k = 1; k < max_k; k *= 2) {
162 // if Mark was not found, result.second is set to radius^2.
163 if (result.second < sq(k * r_spec))
164 return result.first;
165 if (result.first != nullptr) {
166 // We found an atom, but because it was further away than k*r_spec,
167 // so now it's sufficient to find the nearest atom in dist:
168 double dist = std::sqrt(result.second);
170 }
171 }
172 if (!use_pbc)
173 // pos can be outside of bounding box. In such case, although it's slow,
174 // search in all cells. Using large number that will be clipped.
175 return find_nearest_atom_within_k(pos, INT_MAX/4, radius).first;
176 return nullptr;
177 }
178
179 double dist_sq(const Position& pos1, const Position& pos2) const {
180 return grid.unit_cell.distance_sq(pos1, pos2);
181 }
182 double dist(const Position& pos1, const Position& pos2) const {
183 return std::sqrt(dist_sq(pos1, pos2));
184 }
185
187 // 0 is for identity, other indices are shifted by one.
188 if (image_idx == 0)
189 return Transform{};
190 if ((size_t)image_idx <= grid.unit_cell.images.size())
191 return grid.unit_cell.images[image_idx-1];
192 fail("No such image index: " + std::to_string(image_idx));
193 }
194
195private:
196 void set_grid_size() {
197 // We don't use set_size_from_spacing() etc because we don't need
198 // FFT-friendly size nor symmetry.
199 double inv_radius = 1 / radius_specified;
200 const UnitCell& uc = grid.unit_cell;
201 grid.set_size_without_checking(std::max(int(inv_radius / uc.ar), 1),
202 std::max(int(inv_radius / uc.br), 1),
203 std::max(int(inv_radius / uc.cr), 1));
204 }
205
206 void set_bounding_cell(const UnitCell& cell) {
207 use_pbc = cell.is_crystal();
208 if (use_pbc) {
209 grid.unit_cell = cell;
210 } else {
211 // cf. calculate_box()
212 Box<Position> box;
213 for (CRA cra : model->all())
214 box.extend(cra.atom->pos);
215 // The box needs to include all NCS images (strict NCS from MTRIXn).
216 // To avoid additional function parameter that would pass Structure::ncs,
217 // here we obtain NCS transformations from UnitCell::images.
218 std::vector<FTransform> ncs = cell.get_ncs_transforms();
219 if (!ncs.empty()) {
220 for (CRA cra : model->all())
221 // images store fractional transforms, but for non-crystal
222 // it should be the same as Cartesian transform.
223 for (const Transform& tr : ncs)
224 box.extend(Position(tr.apply(cra.atom->pos)));
225 }
226 box.add_margin(0.01);
227 Position size = box.get_size();
228 grid.unit_cell.set(size.x, size.y, size.z, 90, 90, 90);
229 grid.unit_cell.frac.vec -= grid.unit_cell.fractionalize(box.minimum);
230 grid.unit_cell.orth.vec += box.minimum;
231 for (const Transform& tr : ncs) {
232 UnitCell& c = grid.unit_cell;
233 // cf. add_ncs_images_to_cs_images()
234 c.images.push_back(c.frac.combine(tr.combine(c.orth)));
235 }
236 }
237 }
238};
239
242 if (model) {
243 for (int n_ch = 0; n_ch != (int) model->chains.size(); ++n_ch)
245 } else if (small_structure) {
246 for (int n = 0; n != (int) small_structure->sites.size(); ++n) {
248 if (include_h || !site.element.is_hydrogen())
249 add_site(site, n);
250 }
251 } else {
252 fail("NeighborSearch not initialized");
253 }
254 return *this;
255}
256
257inline void NeighborSearch::add_chain(const Chain& chain, bool include_h_) {
258 if (!model)
259 fail("NeighborSearch.add_chain(): model not initialized yet");
260 // to be safe avoid (&chain - model.chains[0]) which could be UB
261 for (int n_ch = 0; n_ch != (int) model->chains.size(); ++n_ch)
262 if (&model->chains[n_ch] == &chain) {
264 add_chain_n(chain, n_ch);
265 return;
266 }
267 fail("NeighborSearch.add_chain(): chain not in this model");
268}
269
270inline void NeighborSearch::add_chain_n(const Chain& chain, int n_ch) {
271 for (int n_res = 0; n_res != (int) chain.residues.size(); ++n_res) {
272 const Residue& res = chain.residues[n_res];
273 for (int n_atom = 0; n_atom != (int) res.atoms.size(); ++n_atom) {
274 const Atom& atom = res.atoms[n_atom];
275 if (include_h || !atom.is_hydrogen())
276 add_atom(atom, n_ch, n_res, n_atom);
277 }
278 }
279}
280
281inline void NeighborSearch::add_atom(const Atom& atom,
282 int n_ch, int n_res, int n_atom) {
283 const UnitCell& gcell = grid.unit_cell;
284 Fractional frac0 = gcell.fractionalize(atom.pos);
285 {
286 Fractional frac = frac0.wrap_to_unit();
287 // for non-crystals, frac==frac0 => pos = atom.pos
288 Position pos = use_pbc ? gcell.orthogonalize(frac) : atom.pos;
289 get_subcell(frac).emplace_back(pos, atom.altloc, atom.element.elem,
290 0, n_ch, n_res, n_atom);
291 }
292 for (int n_im = 0; n_im != (int) gcell.images.size(); ++n_im) {
293 Fractional frac = gcell.images[n_im].apply(frac0).wrap_to_unit();
294 Position pos = gcell.orthogonalize(frac);
295 get_subcell(frac).emplace_back(pos, atom.altloc, atom.element.elem,
296 short(n_im + 1), n_ch, n_res, n_atom);
297 }
298}
299
300// We exclude special position images of atoms here, but not in add_atom.
301// This choice is somewhat arbitrary, but it also reflects the fact that
302// in MX files occupances of atoms on special positions are (almost always)
303// fractional and all images are to be taken into account.
305 const double SPECIAL_POS_TOL = 0.4;
306 const UnitCell& gcell = grid.unit_cell;
307 std::vector<Fractional> others;
308 others.reserve(gcell.images.size());
309 Fractional frac0 = site.fract.wrap_to_unit();
310 {
311 Position pos = gcell.orthogonalize(frac0);
312 get_subcell(frac0).emplace_back(pos, '\0', site.element.elem, 0, -1, -1, n);
313 }
314 for (int n_im = 0; n_im != (int) gcell.images.size(); ++n_im) {
315 Fractional frac = gcell.images[n_im].apply(site.fract).wrap_to_unit();
316 if (gcell.distance_sq(frac, frac0) < sq(SPECIAL_POS_TOL) ||
317 std::any_of(others.begin(), others.end(), [&](const Fractional& f) {
318 return gcell.distance_sq(frac, f) < sq(SPECIAL_POS_TOL);
319 }))
320 continue;
321 Position pos = gcell.orthogonalize(frac);
322 get_subcell(frac).emplace_back(pos, '\0', site.element.elem,
323 short(n_im + 1), -1, -1, n);
324 others.push_back(frac);
325 }
326}
327
328template<typename Func>
329void NeighborSearch::for_each_cell(const Position& pos, const Func& func, int k) {
330 Fractional fr = grid.unit_cell.fractionalize(pos);
331 if (use_pbc)
332 fr = fr.wrap_to_unit();
333 int u0 = int(fr.x * grid.nu) - k;
334 int v0 = int(fr.y * grid.nv) - k;
335 int w0 = int(fr.z * grid.nw) - k;
336 int uend = u0 + 2 * k + 1;
337 int vend = v0 + 2 * k + 1;
338 int wend = w0 + 2 * k + 1;
339 if (use_pbc) {
340 auto shift = [](int j, int n) {
341 if (j < 0)
342 return (j + 1) / n - 1;
343 if (j >= n)
344 return j / n;
345 return 0;
346 };
347 for (int w = w0; w < wend; ++w) {
348 int dw = shift(w, grid.nw);
349 for (int v = v0; v < vend; ++v) {
350 int dv = shift(v, grid.nv);
351 size_t idx0 = grid.index_q(0, v - dv * grid.nv, w - dw * grid.nw);
352 for (int u = u0; u < uend; ++u) {
353 int du = shift(u, grid.nu);
354 size_t idx = idx0 + (u - du * grid.nu);
355 func(grid.data[idx], Fractional(fr.x - du, fr.y - dv, fr.z - dw));
356 }
357 }
358 }
359 } else {
360 u0 = std::max(0, u0);
361 v0 = std::max(0, v0);
362 w0 = std::max(0, w0);
363 uend = std::min(uend, grid.nu);
364 vend = std::min(vend, grid.nv);
365 wend = std::min(wend, grid.nw);
366 for (int w = w0; w < wend; ++w)
367 for (int v = v0; v < vend; ++v)
368 for (int u = u0; u < uend; ++u) {
369 size_t idx = grid.index_q(u, v, w);
370 func(grid.data[idx], fr);
371 }
372 }
373}
374
375} // namespace gemmi
376#endif
fail(), unreachable() and __declspec/__attribute__ macros
3d grids used by CCP4 maps, cell-method search and hkl data.
Data structures to store macromolecular structure models.
bool is_same_conformer(char altloc1, char altloc2)
Definition model.hpp:115
constexpr float sq(float x)
Definition math.hpp:34
void fail(const std::string &msg)
Definition fail.hpp:59
Representation of a small molecule or inorganic crystal. Flat list of atom sites. Minimal functionali...
Represents atom site in macromolecular structure (~100 bytes).
Definition model.hpp:120
char altloc
Definition model.hpp:123
bool is_hydrogen() const
Definition model.hpp:148
Element element
Definition model.hpp:125
Position pos
Definition model.hpp:131
std::vector< Residue > residues
Definition model.hpp:486
Like Transform, but apply() arg is Fractional (not Vec3 - for type safety).
Definition unitcell.hpp:112
Fractional coordinates.
Definition unitcell.hpp:50
std::vector< Chain > chains
Definition model.hpp:722
const_CRA to_cra(const Model &mdl) const
Definition neighbor.hpp:40
CRA to_cra(Model &mdl) const
Definition neighbor.hpp:34
SmallStructure::Site & to_site(SmallStructure &small_st) const
Definition neighbor.hpp:46
const SmallStructure::Site & to_site(const SmallStructure &small_st) const
Definition neighbor.hpp:49
Mark(const Position &p, char alt, El el, short im, int ch, int res, int atom)
Definition neighbor.hpp:30
int sufficient_k(double r) const
Definition neighbor.hpp:109
void add_atom(const Atom &atom, int n_ch, int n_res, int n_atom)
Definition neighbor.hpp:281
void add_site(const SmallStructure::Site &site, int n)
Definition neighbor.hpp:304
void add_chain(const Chain &chain, bool include_h_=true)
Definition neighbor.hpp:257
NeighborSearch & populate(bool include_h_=true)
Definition neighbor.hpp:240
void for_each(const Position &pos, char alt, double radius, const Func &func, int k=1)
Definition neighbor.hpp:96
NeighborSearch(Model &model_, const UnitCell &cell, double radius)
Definition neighbor.hpp:63
Grid< std::vector< Mark > > grid
Definition neighbor.hpp:54
NeighborSearch(SmallStructure &small_st, double radius)
Definition neighbor.hpp:69
SmallStructure * small_structure
Definition neighbor.hpp:57
std::pair< Mark *, double > find_nearest_atom_within_k(const Position &pos, int k, double radius)
Definition neighbor.hpp:138
void for_each_cell(const Position &pos, const Func &func, int k=1)
Definition neighbor.hpp:329
double dist_sq(const Position &pos1, const Position &pos2) const
Definition neighbor.hpp:179
void add_chain_n(const Chain &chain, int n_ch)
Definition neighbor.hpp:270
std::vector< Mark * > find_site_neighbors(const SmallStructure::Site &site, double min_dist, double max_dist)
Definition neighbor.hpp:131
std::vector< Mark * > find_neighbors(const Atom &atom, double min_dist, double max_dist)
Definition neighbor.hpp:128
std::vector< Mark * > find_atoms(const Position &pos, char alt, double min_dist, double radius)
Definition neighbor.hpp:115
FTransform get_image_transformation(int image_idx) const
Definition neighbor.hpp:186
Mark * find_nearest_atom(const Position &pos, double radius=INFINITY)
Definition neighbor.hpp:155
std::vector< Mark > & get_subcell(const Fractional &fr)
Definition neighbor.hpp:83
double dist(const Position &pos1, const Position &pos2) const
Definition neighbor.hpp:182
Coordinates in Angstroms - orthogonal (Cartesian) coordinates.
Definition unitcell.hpp:32
std::vector< Atom > atoms
Definition model.hpp:196
std::vector< Site > sites
Definition small.hpp:79
Unit cell.
Definition unitcell.hpp:165