Gemmi C++ API
Loading...
Searching...
No Matches
scaling.hpp
Go to the documentation of this file.
1// Copyright 2020 Global Phasing Ltd.
2//
3// Anisotropic scaling of data (includes scaling of bulk solvent parameters).
4
5#ifndef GEMMI_SCALING_HPP_
6#define GEMMI_SCALING_HPP_
7
8#include "asudata.hpp"
9#include "levmar.hpp"
10#if WITH_NLOPT
11# include <nlopt.h>
12#endif
13
14namespace gemmi {
15
16using Vec6 = std::array<double, 6>;
17
18inline double vec6_dot(const Vec6& a, const SMat33<double>& s) {
19 return a[0] * s.u11 + a[1] * s.u22 + a[2] * s.u33
20 + a[3] * s.u12 + a[4] * s.u13 + a[5] * s.u23;
21}
22
27inline std::vector<Vec6> adp_symmetry_constraints(const SpaceGroup* sg) {
28 auto constraints = [](const std::initializer_list<int>& l) {
29 constexpr double K2 = 0.70710678118654752440; // sqrt(1/2)
30 constexpr double K3 = 0.57735026918962576451; // sqrt(1/3)
31 static const Vec6 vv[10] = {
32 {1, 0, 0, 0, 0, 0}, // 0
33 {0, 1, 0, 0, 0, 0}, // 1
34 {0, 0, 1, 0, 0, 0}, // 2
35 {0, 0, 0, 1, 0, 0}, // 3
36 {0, 0, 0, 0, 1, 0}, // 4
37 {0, 0, 0, 0, 0, 1}, // 5
38 {K2, K2, 0, 0, 0, 0}, // 6
39 {2/3., 2/3., 0, 1/3., 0, 0}, // 7
40 {K3, K3, K3, 0, 0, 0}, // 8
41 {0, 0, 0, K3, K3, K3} // 9
42 };
43 std::vector<Vec6> c;
44 c.reserve(l.size());
45 for (int i : l)
46 c.push_back(vv[i]);
47 return c;
48 };
50 switch (cr_system) {
52 return constraints({0, 1, 2, 3, 4, 5});
54 // the last index is: a->5, b->4, c->3
55 return constraints({0, 1, 2, 3 + 'c' - sg->monoclinic_unique_axis()});
57 return constraints({0, 1, 2});
59 return constraints({6, 2});
62 if (sg->ext == 'R')
63 return constraints({8, 9});
64 return constraints({7, 2});
66 return constraints({8});
67 }
69}
70
71template<typename Real>
72struct Scaling {
73 struct Point {
75 double stol2;
76 std::complex<Real> fcmol, fmask;
78
79 Miller get_x() const { return hkl; }
80 double get_y() const { return fobs; }
81 double get_weight() const { return 1.0 /* / sigma*/; }
82 };
83
85 // model parameters
86 double k_overall = 1.;
87 // b_star = F B_cart F^T, where F - fractionalization matrix
88 SMat33<double> b_star{0, 0, 0, 0, 0, 0};
89 std::vector<Vec6> constraint_matrix;
90 bool use_solvent = false;
91 bool fix_k_sol = false;
92 bool fix_b_sol = false;
93 // initialize with average values (Fokine & Urzhumtsev, 2002)
94 double k_sol = 0.35;
95 double b_sol = 46.0;
96 std::vector<Point> points;
97
100
101 // B_{overall} is stored as B* not B_{cartesian}.
102 // Use getter and setter to convert from/to B_{cartesian}.
104 b_star = b_overall.transformed_by(cell.frac.mat);
105 }
109
110 // Scale data, optionally adding bulk solvent correction.
111 void scale_data(AsuData<std::complex<Real>>& asu_data,
112 const AsuData<std::complex<Real>>* mask_data) const {
113 if (use_solvent && !(mask_data && mask_data->size() == asu_data.size()))
114 fail("scale_data(): mask data not prepared");
115 bool use_scaling = (k_overall != 1 || !b_star.all_zero());
116 for (size_t i = 0; i != asu_data.v.size(); ++i) {
118 if (use_solvent) {
119 if (hv.hkl != mask_data->v[i].hkl)
120 fail("scale_data(): data arrays don't match");
121 double stol2 = cell.calculate_stol_sq(hv.hkl);
122 hv.value += (Real)get_solvent_scale(stol2) * mask_data->v[i].value;
123 }
124 if (use_scaling)
126 }
127 }
128
129 std::complex<Real> scale_value(const Miller& hkl, std::complex<Real> f_value,
130 std::complex<Real> mask_value) {
131 if (use_solvent) {
132 double stol2 = cell.calculate_stol_sq(hkl);
134 }
135 return f_value * (Real) get_overall_scale_factor(hkl);
136 }
137
138 std::vector<double> get_parameters() const {
139 std::vector<double> ret;
140 ret.push_back(k_overall);
141 if (use_solvent) {
142 if (!fix_k_sol)
143 ret.push_back(k_sol);
144 if (!fix_b_sol)
145 ret.push_back(b_sol);
146 }
147 for (const Vec6& v : constraint_matrix)
148 ret.push_back(vec6_dot(v, b_star));
149 return ret;
150 }
151
153 void set_parameters(const double* p) {
154 k_overall = p[0];
155 int n = 0;
156 if (use_solvent) {
157 if (!fix_k_sol)
158 k_sol = p[++n];
159 if (!fix_b_sol)
160 b_sol = p[++n];
161 }
162 b_star = {0, 0, 0, 0, 0, 0};
163 for (const Vec6& row : constraint_matrix) {
164 double d = p[++n];
165 b_star.u11 += row[0] * d;
166 b_star.u22 += row[1] * d;
167 b_star.u33 += row[2] * d;
168 b_star.u12 += row[3] * d;
169 b_star.u13 += row[4] * d;
170 b_star.u23 += row[5] * d;
171 }
172 }
173
174 void set_parameters(const std::vector<double>& p) {
175 set_parameters(p.data());
176 }
177
178 // pre: all AsuData args are sorted
179 void prepare_points(const AsuData<std::complex<Real>>& calc,
181 const AsuData<std::complex<Real>>* mask_data) {
182 if (use_solvent && !(mask_data && mask_data->size() == calc.size()))
183 fail("prepare_points(): mask data not prepared");
184 std::complex<Real> fmask;
185 points.reserve(std::min(calc.size(), obs.size()));
186 auto c = calc.v.begin();
187 for (const HklValue<ValueSigma<Real>>& o : obs.v) {
188 if (c->hkl != o.hkl) {
189 while (*c < o.hkl) {
190 ++c;
191 if (c == calc.v.end())
192 return;
193 }
194 if (c->hkl != o.hkl)
195 continue;
196 }
197 if (use_solvent) {
198 const HklValue<std::complex<Real>>& m = mask_data->v[c - calc.v.begin()];
199 if (m.hkl != c->hkl)
200 fail("prepare_points(): unexpected data");
201 fmask = m.value;
202 }
203 double stol2 = cell.calculate_stol_sq(o.hkl);
204 if (!std::isnan(o.value.value) && !std::isnan(o.value.sigma))
205 points.push_back({o.hkl, stol2, c->value, fmask, o.value.value, o.value.sigma});
206 ++c;
207 if (c == calc.v.end())
208 break;
209 }
210 }
211
212
213 double get_solvent_scale(double stol2) const {
214 return k_sol * std::exp(-b_sol * stol2);
215 }
216
217 double get_overall_scale_factor(const Miller& hkl) const {
218 return k_overall * std::exp(-0.25 * b_star.r_u_r(hkl));
219 }
220
221 std::complex<Real> get_fcalc(const Point& p) const {
222 if (!use_solvent)
223 return p.fcmol;
224 return p.fcmol + (Real)get_solvent_scale(p.stol2) * p.fmask;
225 }
226
227 // quick linear fit (ignoring sigma) to get initial k_overall and isotropic B
229 double sx = 0, sy = 0, sxx = 0, sxy = 0;
230 int n = 0;
231 for (const Point& p : points) {
232 if (p.fobs < 1 || p.fobs < p.sigma) // skip weak reflections
233 continue;
234 double fcalc = std::abs(get_fcalc(p));
235 double x = p.stol2;
236 double y = std::log(static_cast<float>(p.fobs / fcalc));
237 sx += x;
238 sy += y;
239 sxx += x * x;
240 sxy += x * y;
241 n += 1;
242 }
243 if (n <= 5) // this is not expected to happen
244 return;
245 double slope = (n * sxy - sx * sy) / (n * sxx - sx * sx);
246 double intercept = (sy - slope * sx) / n;
247 double b_iso = -slope;
248 k_overall = std::exp(intercept);
249 set_b_overall({b_iso, b_iso, b_iso, 0, 0, 0});
250 }
251
252 // least-squares fitting of k_overall only
253 double lsq_k_overall() const {
254 double sxx = 0, sxy = 0;
255 for (const Point& p : points) {
256 if (p.fobs < 1 || p.fobs < p.sigma) // skip weak reflections
257 continue;
258 double x = std::abs(get_fcalc(p));
259 double y = p.fobs;
260 sxx += x * x;
261 sxy += x * y;
262 }
263 return sxx != 0. ? sxy / sxx : 1.;
264 }
265
266 // For testing only, don't use it.
267 // Estimates anisotropic b_star using other parameters (incl. isotropic B),
268 // following P. Afonine et al, doi:10.1107/S0907444913000462 sec. 2.1.
269 // The symmetry constraints are not implemented - don't use it!
271 double b_iso = 1/3. * get_b_overall().trace();
272 //size_t nc = constraint_matrix.size();
273 double M[36] = {};
274 double b[6] = {};
275 //std::vector<double> Vc(nc);
276 for (const Point& p : points) {
277 double fcalc = std::abs(get_fcalc(p));
278 // the factor 1 / 2 pi^2 will be taken into account later
279 double Z = std::log(p.fobs / (k_overall * fcalc)) + b_iso * p.stol2;
280 Vec3 h(p.hkl);
281 double V[6] = {h.x * h.x, h.y * h.y, h.z * h.z,
282 2 * h.x * h.y, 2 * h.x * h.z, 2 * h.y * h.z};
283 //for (size_t i = 0; i < nc; ++i)
284 // Vc[i] = vec6_dot(constraint_matrix[i], V);
285 for (size_t i = 0; i < 6; ++i) {
286 for (size_t j = 0; j < 6; ++j)
287 M[6*i+j] += V[i] * V[j];
288 b[i] -= Z * V[i];
289 }
290 }
291 jordan_solve(M, b, 6);
292 double b_star_iso = 1/3. * b_star.trace();
293 SMat33<double> u_star{b[0], b[1], b[2], b[3], b[4], b[5]};
294 // u_to_b() / (2 pi^2) = 8 pi^2 / 2 pi^2 = 4
295 b_star = u_star.scaled(4.0).added_kI(b_star_iso);
296 //auto e = get_b_overall().elements_pdb();
297 //printf("fitted B = {%g %g %g %g %g %g}\n", e[0], e[1], e[2], e[3], e[4], e[5]);
298 }
299
300 double fit_parameters() {
302 return levmar.fit(*this);
303 }
304
305 double calculate_r_factor() const {
306 double abs_diff_sum = 0;
307 double denom = 0;
308 for (const Point& p : points) {
309 abs_diff_sum += std::fabs(p.fobs - compute_value(p));
310 denom += p.fobs;
311 }
312 return abs_diff_sum / denom;
313 }
314
315 // interface for fitting
316 double compute_value(const Point& p) const {
317 return std::abs(get_fcalc(p)) * (Real) get_overall_scale_factor(p.hkl);
318 }
319
320 double compute_value_and_derivatives(const Point& p, std::vector<double>& dy_da) const {
321 Vec3 h(p.hkl);
322 double kaniso = std::exp(-0.25 * b_star.r_u_r(h));
323 double fcalc_abs;
324 int n = 1;
325 if (use_solvent) {
326 double solv_b = std::exp(-b_sol * p.stol2);
327 double solv_scale = k_sol * solv_b;
328 auto fcalc = p.fcmol + (Real)solv_scale * p.fmask;
329 fcalc_abs = std::abs(fcalc);
330 double dy_dsol = (fcalc.real() * p.fmask.real() +
331 fcalc.imag() * p.fmask.imag()) / fcalc_abs * k_overall * kaniso;
332 if (!fix_k_sol)
333 dy_da[n++] = solv_b * dy_dsol;
334 if (!fix_b_sol)
335 dy_da[n++] = -p.stol2 * solv_scale * dy_dsol;
336 } else {
337 fcalc_abs = std::abs(p.fcmol);
338 }
339 double fe = fcalc_abs * kaniso;
340 double y = k_overall * fe;
341 dy_da[0] = fe; // dy/d k_overall
343 -0.25 * y * (h.x * h.x),
344 -0.25 * y * (h.y * h.y),
345 -0.25 * y * (h.z * h.z),
346 -0.5 * y * (h.x * h.y),
347 -0.5 * y * (h.x * h.z),
348 -0.5 * y * (h.y * h.z),
349 };
350 for (size_t j = 0; j < constraint_matrix.size(); ++j)
352 return y;
353 }
354};
355
356// only for testing and evaluation - scaling with NLOpt
357#if WITH_NLOPT
358namespace impl {
359
360template<typename Real>
361double calculate_for_nlopt(unsigned n, const double* x, double* grad, void* data) {
362 auto scaling = static_cast<Scaling<Real>*>(data);
363 scaling->set_parameters(x);
364 if (grad)
365 return compute_gradients(*scaling, n, grad);
366 else
367 return compute_wssr(*scaling);
368}
369
370inline const char* nlresult_to_string(nlopt_result r) {
371 switch (r) {
372 case NLOPT_FAILURE: return "failure";
373 case NLOPT_INVALID_ARGS: return "invalid arguments";
374 case NLOPT_OUT_OF_MEMORY: return "out of memory";
375 case NLOPT_ROUNDOFF_LIMITED: return "roundoff errors limit progress";
376 case NLOPT_FORCED_STOP: return "interrupted";
377 case NLOPT_SUCCESS: return "success";
378 case NLOPT_STOPVAL_REACHED: return "stop-value reached";
379 case NLOPT_FTOL_REACHED: return "ftol-value reached";
380 case NLOPT_XTOL_REACHED: return "xtol-value reached";
381 case NLOPT_MAXEVAL_REACHED: return "max. evaluation number reached";
382 case NLOPT_MAXTIME_REACHED: return "max. time reached";
383 default: return "<unknown result>";
384 }
385 unreachable();
386}
387
388} // namespace impl
389
390template<typename Real>
391double fit_parameters_with_nlopt(Scaling<Real>& scaling, const char* optimizer) {
392 std::vector<double> params = scaling.get_parameters();
393 nlopt_opt opt = nlopt_create(nlopt_algorithm_from_string(optimizer), params.size());
394 { // prepare bounds
395 std::vector<double> lb(params.size());
396 std::vector<double> ub(params.size());
397 lb[0] = 0.7 * params[0]; // k_ov
398 ub[0] = 1.2 * params[0];
399 size_t n = 1;
400 if (scaling.use_solvent) {
401 if (!scaling.fix_k_sol) {
402 lb[n] = 0.15;
403 ub[n] = 0.5;
404 ++n;
405 }
406 if (!scaling.fix_b_sol) {
407 lb[n] = 10;
408 ub[n] = 80;
409 ++n;
410 }
411 }
412 for (; n < params.size(); ++n) {
413 lb[n] = -0.01;
414 ub[n] = 0.01;
415 }
416 nlopt_set_lower_bounds(opt, lb.data());
417 nlopt_set_upper_bounds(opt, ub.data());
418 }
419 nlopt_set_min_objective(opt, impl::calculate_for_nlopt<Real>, &scaling);
420 nlopt_set_maxeval(opt, 100);
421 double minf = NAN;
422 nlopt_result r = nlopt_optimize(opt, &params[0], &minf);
423 (void) r;
424 //if (r < 0)
425 // printf("NLopt result: %s\n", impl::nlresult_to_string(r));
426 //else
427 // printf("NLopt minimum value: %g\n", minf);
428 nlopt_destroy(opt);
429 scaling.set_parameters(params);
430 return minf;
431}
432#endif // WITH_NLOPT
433
434} // namespace gemmi
435#endif
AsuData for storing reflection data.
Least-squares fitting - Levenberg-Marquardt method.
double vec6_dot(const Vec6 &a, const SMat33< double > &s)
Definition scaling.hpp:18
double compute_gradients(const Target &target, unsigned n, double *grad)
Definition levmar.hpp:93
std::array< double, 6 > Vec6
Definition scaling.hpp:16
std::array< int, 3 > Miller
A synonym for convenient passing of hkl.
Definition unitcell.hpp:129
double compute_wssr(const Target &target)
Definition levmar.hpp:85
Vec3_< double > Vec3
Definition math.hpp:113
std::vector< Vec6 > adp_symmetry_constraints(const SpaceGroup *sg)
Symmetry constraints of ADP.
Definition scaling.hpp:27
void fail(const std::string &msg)
Definition fail.hpp:59
void jordan_solve(double *a, double *b, int n)
This function solves a set of linear algebraic equations using Gauss-Jordan elimination with partial ...
Definition levmar.hpp:29
void unreachable()
Definition fail.hpp:80
CrystalSystem
Definition symmetry.hpp:572
T trace() const
Definition math.hpp:291
SMat33< Real > transformed_by(const Mat33 &m) const
Definition math.hpp:338
bool all_zero() const
Definition math.hpp:294
auto r_u_r(const Vec3_< VT > &r) const -> decltype(r.x+u11)
Definition math.hpp:314
double get_y() const
Definition scaling.hpp:80
std::complex< Real > fmask
Definition scaling.hpp:76
double get_weight() const
Definition scaling.hpp:81
Miller get_x() const
Definition scaling.hpp:79
std::complex< Real > fcmol
Definition scaling.hpp:76
double compute_value_and_derivatives(const Point &p, std::vector< double > &dy_da) const
Definition scaling.hpp:320
Scaling(const UnitCell &cell_, const SpaceGroup *sg)
Definition scaling.hpp:98
void set_b_overall(const SMat33< double > &b_overall)
Definition scaling.hpp:103
void set_parameters(const double *p)
set k_overall, k_sol, b_sol, b_star
Definition scaling.hpp:153
void fit_isotropic_b_approximately()
Definition scaling.hpp:228
double get_overall_scale_factor(const Miller &hkl) const
Definition scaling.hpp:217
SMat33< double > get_b_overall() const
Definition scaling.hpp:106
std::complex< Real > get_fcalc(const Point &p) const
Definition scaling.hpp:221
double calculate_r_factor() const
Definition scaling.hpp:305
UnitCell cell
Definition scaling.hpp:84
std::complex< Real > scale_value(const Miller &hkl, std::complex< Real > f_value, std::complex< Real > mask_value)
Definition scaling.hpp:129
double k_overall
Definition scaling.hpp:86
void scale_data(AsuData< std::complex< Real > > &asu_data, const AsuData< std::complex< Real > > *mask_data) const
Definition scaling.hpp:111
double fit_parameters()
Definition scaling.hpp:300
SMat33< double > b_star
Definition scaling.hpp:88
void fit_b_star_approximately()
Definition scaling.hpp:270
double compute_value(const Point &p) const
Definition scaling.hpp:316
std::vector< double > get_parameters() const
Definition scaling.hpp:138
double lsq_k_overall() const
Definition scaling.hpp:253
void prepare_points(const AsuData< std::complex< Real > > &calc, const AsuData< ValueSigma< Real > > &obs, const AsuData< std::complex< Real > > *mask_data)
Definition scaling.hpp:179
std::vector< Vec6 > constraint_matrix
Definition scaling.hpp:89
double get_solvent_scale(double stol2) const
Definition scaling.hpp:213
std::vector< Point > points
Definition scaling.hpp:96
void set_parameters(const std::vector< double > &p)
Definition scaling.hpp:174
Unit cell.
Definition unitcell.hpp:165
Transform frac
Definition unitcell.hpp:174
double calculate_stol_sq(const Miller &hkl) const
Calculate (sin(theta)/lambda)^2 = d*^2/4.
Definition unitcell.hpp:584
Transform orth
Definition unitcell.hpp:173