Gemmi C++ API
Loading...
Searching...
No Matches
binner.hpp
Go to the documentation of this file.
1// Copyright 2022 Global Phasing Ltd.
2//
3// Binning - resolution shells for reflections.
4
5#ifndef GEMMI_BINNER_HPP_
6#define GEMMI_BINNER_HPP_
7
8#include <vector>
9#include <limits> // for numeric_limits
10#include <unordered_map> // for unordered_map
11#include "unitcell.hpp" // for UnitCell
12#include "stats.hpp" // for Correlation
13
14namespace gemmi {
15
16struct Binner {
17 enum class Method {
19 Dstar,
20 Dstar2,
21 Dstar3,
22 };
23
24 void setup_from_1_d2(int nbins, Method method, std::vector<double>&& inv_d2,
25 const UnitCell* cell_) {
26 if (nbins < 1)
27 fail("Binner: nbins argument must be positive");
28 if (inv_d2.empty())
29 fail("Binner: no data");
30 if (cell_)
31 cell = *cell_;
32 if (!cell.is_crystal())
33 fail("Binner: unknown unit cell");
34 limits.resize(nbins);
35 if (method == Method::EqualCount) {
36 std::sort(inv_d2.begin(), inv_d2.end());
37 min_1_d2 = inv_d2.front();
38 max_1_d2 = inv_d2.back();
39 } else {
40 min_1_d2 = max_1_d2 = inv_d2.front();
41 for (double x : inv_d2) {
42 if (x < min_1_d2)
43 min_1_d2 = x;
44 if (x > max_1_d2)
45 max_1_d2 = x;
46 }
47 }
48 switch (method) {
49 case Method::EqualCount: {
50 double avg_count = double(inv_d2.size()) / nbins;
51 for (int i = 1; i < nbins; ++i)
52 limits[i-1] = inv_d2[int(avg_count * i)];
53 break;
54 }
55 case Method::Dstar2: {
56 double step = (max_1_d2 - min_1_d2) / nbins;
57 for (int i = 1; i < nbins; ++i)
58 limits[i-1] = min_1_d2 + i * step;
59 break;
60 }
61 case Method::Dstar: {
62 double min_1_d = std::sqrt(min_1_d2);
63 double max_1_d = std::sqrt(max_1_d2);
64 double step = (max_1_d - min_1_d) / nbins;
65 for (int i = 1; i < nbins; ++i)
66 limits[i-1] = sq(min_1_d + i * step);
67 break;
68 }
69 case Method::Dstar3: {
70 double min_1_d3 = min_1_d2 * std::sqrt(min_1_d2);
71 double max_1_d3 = max_1_d2 * std::sqrt(max_1_d2);
72 double step = (max_1_d3 - min_1_d3) / nbins;
73 for (int i = 1; i < nbins; ++i)
74 limits[i-1] = sq(std::cbrt(min_1_d3 + i * step));
75 break;
76 }
77 }
78 limits.back() = std::numeric_limits<double>::infinity();
79 }
80
81 template<typename DataProxy>
82 void setup(int nbins, Method method, const DataProxy& proxy,
83 const UnitCell* cell_=nullptr, bool with_mids=false, size_t col_idx=0) {
84 if (col_idx >= proxy.stride())
85 fail("wrong col_idx in Binner::setup()");
86 cell = cell_ ? *cell_ : proxy.unit_cell();
87 std::vector<double> inv_d2;
88 inv_d2.reserve(proxy.size() / proxy.stride());
89 for (size_t offset = 0; offset < proxy.size(); offset += proxy.stride())
90 if (col_idx == 0 || !std::isnan(proxy.get_num(offset + col_idx)))
91 inv_d2.push_back(cell.calculate_1_d2(proxy.get_hkl(offset)));
93 method, std::move(inv_d2), nullptr);
94 if (with_mids) {
95 mids.resize(nbins);
96 for (int i = 0; i < nbins; ++i) {
97 mids[i] = limits[2*i];
98 limits[i] = limits[2*i+1];
99 }
100 limits.resize(nbins);
101 }
102 }
103
105 if (limits.empty())
106 fail("Binner not set up");
107 }
108
109 // Generic. Method-specific versions could be faster.
112 auto it = std::lower_bound(limits.begin(), limits.end(), inv_d2);
113 // it can't be limits.end() b/c limits.back() is +inf
114 return int(it - limits.begin());
115 }
116
117 int get_bin(const Miller& hkl) {
118 double inv_d2 = cell.calculate_1_d2(hkl);
120 }
121
122 // We assume that bins are seeked mostly for sorted reflections,
123 // so it's usually either the same bin as previously, or the next one.
124 int get_bin_from_1_d2_hinted(double inv_d2, int& hint) const {
125 if (inv_d2 <= limits[hint]) {
126 while (hint != 0 && limits[hint-1] > inv_d2)
127 --hint;
128 } else {
129 // limits.back() is +inf, so we won't overrun
130 while (limits[hint] < inv_d2)
131 ++hint;
132 }
133 return hint;
134 }
135
136 int get_bin_hinted(const Miller& hkl, int& hint) const {
137 double inv_d2 = cell.calculate_1_d2(hkl);
139 }
140
141 template<typename DataProxy>
142 std::vector<int> get_bins(const DataProxy& proxy) const {
144 int hint = 0;
145 std::vector<int> nums(proxy.size() / proxy.stride());
146 for (size_t i = 0, offset = 0; i < nums.size(); ++i, offset += proxy.stride())
147 nums[i] = get_bin_hinted(proxy.get_hkl(offset), hint);
148 return nums;
149 }
150
151 std::vector<int> get_bins_from_1_d2(const std::vector<double>& inv_d2) const {
153 int hint = 0;
154 std::vector<int> nums(inv_d2.size());
155 for (size_t i = 0; i < inv_d2.size(); ++i)
157 return nums;
158 }
159
160 double dmin_of_bin(int n) const {
161 return 1. / std::sqrt(limits.at(n));
162 }
163 double dmax_of_bin(int n) const {
164 return 1. / std::sqrt(n == 0 ? min_1_d2 : limits.at(n-1));
165 }
166
167 size_t size() const { return limits.size(); }
168
170 double min_1_d2;
171 double max_1_d2;
172 std::vector<double> limits; // upper limit of each bin
173 std::vector<double> mids; // the middle of each bin
174};
175
178 r.n = a.n + b.n;
179 r.mean_x = (a.n * a.mean_x + b.n * b.mean_x) / r.n;
180 r.mean_y = (a.n * a.mean_y + b.n * b.mean_y) / r.n;
181 r.sum_xx = a.sum_xx + a.n * sq(a.mean_x - r.mean_x)
182 + b.sum_xx + b.n * sq(b.mean_x - r.mean_x);
183 r.sum_yy = a.sum_yy + a.n * sq(a.mean_y - r.mean_y)
184 + b.sum_yy + b.n * sq(b.mean_y - r.mean_y);
185 r.sum_xy = a.sum_xy + a.n * (a.mean_x - r.mean_x) * (a.mean_y - r.mean_y)
186 + b.sum_xy + b.n * (b.mean_x - r.mean_x) * (b.mean_y - r.mean_y);
187 return r;
188}
189
190inline Correlation combine_correlations(const std::vector<Correlation>& cors) {
192 for (const Correlation& cor : cors)
194 return result;
195}
196
197struct HklMatch {
198 std::vector<int> pos;
199 size_t hkl_size;
200
201 HklMatch(const Miller* hkl, size_t hkl_size_, const Miller* ref, size_t ref_size)
202 : pos(ref_size, -1), hkl_size(hkl_size_) {
203 // Usually, both datasets are sorted. This make things faster.
204 if (std::is_sorted(hkl, hkl + hkl_size) &&
205 std::is_sorted(ref, ref + ref_size)) {
206 // cf. for_matching_reflections()
207 auto a = hkl;
208 auto b = ref;
209 while (a != hkl + hkl_size && b != ref + ref_size) {
210 if (*a == *b)
211 pos[b++ - ref] = static_cast<int>(a++ - hkl);
212 else if (*a < *b)
213 ++a;
214 else
215 ++b;
216 }
217 } else {
218 std::unordered_map<Miller, int, MillerHash> hkl_index;
219 for (int i = 0; i != (int)hkl_size; ++i)
220 hkl_index.emplace(hkl[i], i);
221 for (size_t i = 0; i != ref_size; ++i) {
222 auto it = hkl_index.find(ref[i]);
223 if (it != hkl_index.end())
224 pos[i] = it->second;
225 }
226 }
227 }
228
229 HklMatch(const std::vector<Miller>& hkl, const std::vector<Miller>& ref)
230 : HklMatch(hkl.data(), hkl.size(), ref.data(), ref.size()) {}
231
232 template <typename T> std::vector<T> aligned(const std::vector<T>& v, T nan) {
233 if (v.size() != hkl_size)
234 fail("HklMatch.aligned(): wrong data, size differs");
235 std::vector<T> result(pos.size());
236 for (size_t i = 0; i != pos.size(); ++i)
237 result[i] = pos[i] >= 0 ? v[pos[i]] : nan;
238 return result;
239 }
240};
241
242} // namespace gemmi
243#endif
Correlation combine_correlations(const std::vector< Correlation > &cors)
Definition binner.hpp:190
std::array< int, 3 > Miller
A synonym for convenient passing of hkl.
Definition unitcell.hpp:128
Correlation combine_two_correlations(const Correlation &a, const Correlation &b)
Definition binner.hpp:176
constexpr float sq(float x)
Definition math.hpp:34
void fail(const std::string &msg)
Definition fail.hpp:59
int get_bin(const Miller &hkl)
Definition binner.hpp:117
std::vector< double > mids
Definition binner.hpp:173
UnitCell cell
Definition binner.hpp:169
std::vector< double > limits
Definition binner.hpp:172
double dmin_of_bin(int n) const
Definition binner.hpp:160
void ensure_limits_are_set() const
Definition binner.hpp:104
double max_1_d2
Definition binner.hpp:171
size_t size() const
Definition binner.hpp:167
void setup_from_1_d2(int nbins, Method method, std::vector< double > &&inv_d2, const UnitCell *cell_)
Definition binner.hpp:24
int get_bin_hinted(const Miller &hkl, int &hint) const
Definition binner.hpp:136
std::vector< int > get_bins(const DataProxy &proxy) const
Definition binner.hpp:142
int get_bin_from_1_d2_hinted(double inv_d2, int &hint) const
Definition binner.hpp:124
std::vector< int > get_bins_from_1_d2(const std::vector< double > &inv_d2) const
Definition binner.hpp:151
void setup(int nbins, Method method, const DataProxy &proxy, const UnitCell *cell_=nullptr, bool with_mids=false, size_t col_idx=0)
Definition binner.hpp:82
int get_bin_from_1_d2(double inv_d2)
Definition binner.hpp:110
double dmax_of_bin(int n) const
Definition binner.hpp:163
double min_1_d2
Definition binner.hpp:170
std::vector< T > aligned(const std::vector< T > &v, T nan)
Definition binner.hpp:232
HklMatch(const Miller *hkl, size_t hkl_size_, const Miller *ref, size_t ref_size)
Definition binner.hpp:201
HklMatch(const std::vector< Miller > &hkl, const std::vector< Miller > &ref)
Definition binner.hpp:229
std::vector< int > pos
Definition binner.hpp:198
size_t hkl_size
Definition binner.hpp:199
Unit cell.
Definition unitcell.hpp:139
bool is_crystal() const
Definition unitcell.hpp:164
double calculate_1_d2(const Miller &hkl) const
Definition unitcell.hpp:529