Gemmi C++ API
Loading...
Searching...
No Matches
merge.hpp
Go to the documentation of this file.
1// Copyright 2020 Global Phasing Ltd.
2//
3// Class Intensities that reads multi-record data from MTZ, mmCIF or XDS_ASCII
4// and merges it into mean or anomalous intensities.
5// It can also read merged data.
6
7#ifndef GEMMI_MERGE_HPP_
8#define GEMMI_MERGE_HPP_
9
10#include <cassert>
11#include "atof.hpp" // for fast_from_chars
12#include "symmetry.hpp"
13#include "unitcell.hpp"
14#include "util.hpp" // for vector_remove_if
15#include "mtz.hpp" // for Mtz
16#include "refln.hpp" // for ReflnBlock
17#include "stats.hpp" // for Correlation
18#include "xds_ascii.hpp" // for XdsAscii
19
20namespace gemmi {
21
22inline std::string miller_str(const Miller& hkl) {
23 std::string s;
24 for (int i = 0; i < 3; ++i) {
25 s += (i == 0 ? '(' : ' ');
26 s += std::to_string(hkl[i]);
27 }
28 s += ')';
29 return s;
30}
31
32inline bool parse_voigt_notation(const char* start, const char* end, SMat33<double>& b) {
33 bool first = true;
34 for (double* u : {&b.u11, &b.u22, &b.u33, &b.u23, &b.u13, &b.u12}) {
35 if (*start != (first ? '(' : ','))
36 return false;
37 first = false;
38 auto result = fast_from_chars(++start, end, *u);
39 if (result.ec != std::errc())
40 return false;
41 start = skip_blank(result.ptr);
42 }
43 return *start == ')';
44}
45
47 // read what is written by MtzToCif::write_staraniso_b()
48 cif::Table table = const_cast<cif::Block*>(&block)->find(
49 "_reflns.pdbx_aniso_B_tensor_eigen",
50 {"value_1", "value_2", "value_3",
51 "vector_1_ortho[1]", "vector_1_ortho[2]", "vector_1_ortho[3]",
52 "vector_2_ortho[1]", "vector_2_ortho[2]", "vector_2_ortho[3]",
53 "vector_3_ortho[1]", "vector_3_ortho[2]", "vector_3_ortho[3]"});
54 if (!table.ok())
55 return false;
56 cif::Table::Row row = table.one();
57 using cif::as_number;
58 double eigval[3] = {as_number(row[0]), as_number(row[1]), as_number(row[2])};
59 double min_val = std::min(std::min(eigval[0], eigval[1]), eigval[2]);
60 Mat33 mat(as_number(row[3]), as_number(row[6]), as_number(row[9]),
61 as_number(row[4]), as_number(row[7]), as_number(row[10]),
62 as_number(row[5]), as_number(row[8]), as_number(row[11]));
64 // If the columns of mat are an orthonomal basis, mat^−1==mat^T.
65 // But just in case it isn't, we use mat^-1 here.
67 // t is a symmetric tensor, so we return only 6 numbers
68 output = {t[0][0], t[1][1], t[2][2], t[0][1], t[0][2], t[1][2]};
69 return true;
70}
71
72// returns STARANISO version or empty string
74 std::string version;
75 size_t hlen = mtz.history.size();
76 for (size_t i = 0; i != hlen; ++i)
77 if (mtz.history[i].find("STARANISO") != std::string::npos) {
78 size_t version_pos = mtz.history[i].find("version:");
79 if (version_pos != std::string::npos)
80 version = read_word(mtz.history[i].c_str() + version_pos + 8);
81 else
82 version = "?";
83 // StarAniso 2.3.74 (24-Apr-2021) and later write B tensor in history
84 for (size_t j = i+1; j < std::min(i+4, hlen); ++j) {
85 const std::string& line = mtz.history[j];
86 if (starts_with(line, "B=(")) {
87 if (!parse_voigt_notation(line.c_str() + 2,
88 line.c_str() + line.size(),
89 output))
90 fail("failed to parse tensor Voigt notation: " + line);
91 break;
92 }
93 }
94 break;
95 }
96 return version;
97}
98
99
101 struct Refl {
103 short isign; // 1 for I(+), -1 for I(-), 0 for mean
104 short nobs;
105 double value;
106 double sigma;
107
108 bool operator<(const Refl& o) const {
109 return std::tie(hkl[0], hkl[1], hkl[2], isign) <
110 std::tie(o.hkl[0], o.hkl[1], o.hkl[2], o.isign);
111 }
112 const char* intensity_label() const {
113 if (isign == 0) return "<I>";
114 if (isign > 0) return "I(+)";
115 return "I(-)";
116 }
117
118 std::string hkl_label() const {
119 std::string s = intensity_label();
120 s += ' ';
121 s += miller_str(hkl);
122 return s;
123 };
124 };
125
127 SMat33<double> b = {0., 0., 0., 0., 0., 0.};
128
129 bool ok() const { return !b.all_zero(); }
130 double scale(const Miller& hkl, const UnitCell& cell) const {
131 Vec3 s = cell.frac.mat.left_multiply(Vec3(hkl[0], hkl[1], hkl[2]));
132 return std::exp(0.5 * b.r_u_r(s));
133 }
134 };
135
136 std::vector<Refl> data;
137 const SpaceGroup* spacegroup = nullptr;
139 double unit_cell_rmsd[6] = {0., 0., 0., 0., 0., 0.};
143
144
145 static const char* type_str(DataType data_type) {
146 switch (data_type) {
147 case DataType::Unknown: return "n/a";
148 case DataType::Unmerged: return "I";
149 case DataType::Mean: return "<I>";
150 case DataType::Anomalous: return "I+/I-";
151 }
152 unreachable();
153 }
154
155 const char* type_str() const { return Intensities::type_str(type); }
156
157 std::string spacegroup_str() const { return spacegroup ? spacegroup->xhm() : "none"; }
158
159 std::array<double,2> resolution_range() const {
160 double min_1_d2 = INFINITY;
161 double max_1_d2 = 0;
162 for (const Refl& x : data) {
163 double a_1_d2 = unit_cell.calculate_1_d2(x.hkl);
164 if (a_1_d2 < min_1_d2)
165 min_1_d2 = a_1_d2;
166 if (a_1_d2 > max_1_d2)
167 max_1_d2 = a_1_d2;
168 }
169 return {{ 1 / std::sqrt(min_1_d2), 1 / std::sqrt(max_1_d2) }};
170 }
171
172 // pre: both are sorted
173 // cf. calculate_hkl_value_correlation()
175 Correlation corr;
176 auto r1 = data.begin();
177 auto r2 = other.data.begin();
178 while (r1 != data.end() && r2 != other.data.end()) {
179 if (r1->hkl == r2->hkl && r1->isign == r2->isign) {
180 corr.add_point(r1->value, r2->value);
181 ++r1;
182 ++r2;
183 } else if (*r1 < *r2) {
184 ++r1;
185 } else {
186 ++r2;
187 }
188 }
189 return corr;
190 }
191
193 if (!spacegroup)
194 return;
195 GroupOps gops = spacegroup->operations();
196 vector_remove_if(data, [&](Refl& x) {
197 return gops.is_systematically_absent(x.hkl);
198 });
199 }
200
201 void sort() { std::sort(data.begin(), data.end()); }
202
204 type = data_type;
205 if (data.empty())
206 return;
208 // discard signs so that merging produces Imean
209 for (Refl& refl : data)
210 refl.isign = 0;
211 sort();
212 std::vector<Refl>::iterator out = data.begin();
213 double sum_wI = 0.;
214 double sum_w = 0.;
215 int nobs = 0;
216 for (auto in = data.begin(); in != data.end(); ++in) {
217 if (out->hkl != in->hkl || out->isign != in->isign) {
218 out->value = sum_wI / sum_w;
219 out->sigma = 1.0 / std::sqrt(sum_w);
220 out->nobs = nobs;
221 sum_wI = sum_w = 0.;
222 nobs = 0;
223 ++out;
224 out->hkl = in->hkl;
225 out->isign = in->isign;
226 }
227 double w = 1. / (in->sigma * in->sigma);
228 sum_wI += w * in->value;
229 sum_w += w;
230 ++nobs;
231 }
232 out->value = sum_wI / sum_w;
233 out->sigma = 1.0 / std::sqrt(sum_w);
234 out->nobs = nobs;
235 data.erase(++out, data.end());
236 }
237
238 // for unmerged centric reflections set isign=1.
239 void switch_to_asu_indices(bool merged=false) {
240 GroupOps gops = spacegroup->operations();
242 for (Refl& refl : data) {
243 if (asu.is_in(refl.hkl)) {
244 if (!merged) {
245 // isign is 0 for original hkl (e.g. from XDS file)
246 if (refl.isign == 0)
247 refl.isign = 1; // since it's in asu - I+ or centric
248 // when reading asu hkl from MTZ file - count centrics always as I+
249 else if (refl.isign == -1 && gops.is_reflection_centric(refl.hkl))
250 refl.isign = 1;
251 }
252 continue;
253 }
254 bool sign;
255 std::tie(refl.hkl, sign) = asu.to_asu_sign(refl.hkl, gops);
256 if (!merged)
257 refl.isign = sign ? 1 : -1;
258 }
259 }
260
262 if (mtz.batches.empty())
263 fail("expected unmerged file");
264 const Mtz::Column* isym_col = mtz.column_with_label("M/ISYM");
265 if (!isym_col || isym_col->idx != 3)
266 fail("unmerged file should have M/ISYM as 4th column");
267 const Mtz::Column& col = mtz.get_column_with_label("I");
268 size_t value_idx = col.idx;
269 size_t sigma_idx = mtz.get_column_with_label("SIGI").idx;
270 unit_cell = mtz.get_average_cell_from_batch_headers(unit_cell_rmsd);
271 spacegroup = mtz.spacegroup;
272 if (!spacegroup)
273 fail("unknown space group");
274 wavelength = mtz.dataset(col.dataset_id).wavelength;
275 for (size_t i = 0; i < mtz.data.size(); i += mtz.columns.size()) {
276 short isign = ((int)mtz.data[i + 3] % 2 == 0 ? -1 : 1);
277 add_if_valid(mtz.get_hkl(i), isign, mtz.data[i + value_idx], mtz.data[i + sigma_idx]);
278 }
279 // Aimless >=0.7.6 (from 2021) has an option to output unmerged file
280 // with original indices instead of reduced indices, with all ISYM = 1.
283 }
284
286 if (!mtz.batches.empty())
287 fail("expected merged file");
288 const Mtz::Column* col = mtz.imean_column();
289 if (!col)
290 fail("Mean intensities (IMEAN, I, IOBS or I-obs) not found");
291 size_t sigma_idx = mtz.get_column_with_label("SIG" + col->label).idx;
292 copy_metadata(mtz);
293 wavelength = mtz.dataset(col->dataset_id).wavelength;
294 read_data(MtzDataProxy{mtz}, col->idx, sigma_idx);
296 }
297
299 if (!mtz.batches.empty())
300 fail("expected merged file");
301 const Mtz::Column* colp = mtz.iplus_column();
302 const Mtz::Column* colm = mtz.iminus_column();
303 if (!colp || !colm)
304 fail("anomalous intensities not found");
305 size_t value_idx[2] = {colp->idx, colm->idx};
306 size_t sigma_idx[2] = {mtz.get_column_with_label("SIG" + colp->label).idx,
307 mtz.get_column_with_label("SIG" + colm->label).idx};
308 int mean_idx = -1;
309 if (check_complete)
310 if (const Mtz::Column* mean_col = mtz.imean_column())
311 mean_idx = (int) mean_col->idx;
312 copy_metadata(mtz);
313 wavelength = mtz.dataset(colp->dataset_id).wavelength;
314 read_anomalous_data(MtzDataProxy{mtz}, mean_idx, value_idx, sigma_idx);
316 }
317
319 if (mtz.column_with_label("I(+)"))
321 else
323 }
324
326 switch (data_type) {
329 break;
330 case DataType::Mean:
332 break;
335 break;
337 assert(0);
338 break;
339 }
340 }
341
343 size_t value_idx = rb.get_column_index("intensity_net");
344 size_t sigma_idx = rb.get_column_index("intensity_sigma");
345 copy_metadata(rb);
346 wavelength = rb.wavelength;
347 read_data(ReflnDataProxy(rb), value_idx, sigma_idx);
350 }
351
353 size_t value_idx = rb.get_column_index("intensity_meas");
354 size_t sigma_idx = rb.get_column_index("intensity_sigma");
355 copy_metadata(rb);
356 wavelength = rb.wavelength;
357 read_data(ReflnDataProxy(rb), value_idx, sigma_idx);
359 }
360
362 bool check_complete=false) {
363 size_t value_idx[2] = {rb.get_column_index("pdbx_I_plus"),
364 rb.get_column_index("pdbx_I_minus")};
365 size_t sigma_idx[2] = {rb.get_column_index("pdbx_I_plus_sigma"),
366 rb.get_column_index("pdbx_I_minus_sigma")};
367 int mean_idx = -1;
368 if (check_complete)
369 mean_idx = rb.find_column_index("intensity_meas");
370 copy_metadata(rb);
371 wavelength = rb.wavelength;
372 read_anomalous_data(ReflnDataProxy(rb), mean_idx, value_idx, sigma_idx);
374 }
375
377 if (rb.find_column_index("pdbx_I_plus") != -1)
379 else if (rb.find_column_index("intensity_meas") != -1)
381 else
382 fail("Intensities not found in the mmCIF file, block ", rb.block.name,
383 " has neither intensity_meas nor pdbx_I_plus/minus");
384 }
385
387 int value_idx = rb.find_column_index("F_meas");
388 if (value_idx == -1)
389 value_idx = rb.find_column_index("F_meas_au");
390 if (value_idx == -1)
391 fail("Column F_meas[_au] not found.");
392 int sigma_idx = rb.find_column_index("F_meas_sigma");
393 if (sigma_idx == -1)
394 sigma_idx = rb.find_column_index("F_meas_sigma_au");
395 if (sigma_idx == -1)
396 fail("Column F_meas_sigma[_au] not found.");
397 copy_metadata(rb);
398 wavelength = rb.wavelength;
399 read_data(ReflnDataProxy(rb), value_idx, sigma_idx);
400 for (Refl& r : data) {
401 r.value *= r.value;
402 r.sigma *= 2 * r.value;
403 }
405 }
406
408 switch (data_type) {
411 break;
412 case DataType::Mean:
414 break;
417 break;
419 break;
420 }
421 }
422
424 unit_cell = xds.unit_cell;
425 spacegroup = find_spacegroup_by_number(xds.spacegroup_number);
426 wavelength = xds.wavelength;
427 data.reserve(xds.data.size());
428 for (const XdsAscii::Refl& in : xds.data)
429 add_if_valid(in.hkl, 0, in.iobs, in.sigma);
432 }
433
434 // returns STARANISO version or empty string
435 std::string take_staraniso_b_from_mtz(const Mtz& mtz) {
437 }
438
442
443private:
444 template<typename Source>
445 void copy_metadata(const Source& source) {
446 unit_cell = source.cell;
447 spacegroup = source.spacegroup;
448 if (!spacegroup)
449 fail("unknown space group");
450 }
451
452 void add_if_valid(const Miller& hkl, short isign, double value, double sigma) {
453 // XDS marks rejected reflections with negative sigma.
454 // Sigma 0.0 is also problematic - it rarely happens (e.g. 5tkn).
455 if (!std::isnan(value) && sigma > 0)
456 data.push_back({hkl, isign, /*nobs=*/0, value, sigma});
457 }
458
459 template<typename DataProxy>
460 void read_data(const DataProxy& proxy, size_t value_idx, size_t sigma_idx) {
461 for (size_t i = 0; i < proxy.size(); i += proxy.stride())
462 add_if_valid(proxy.get_hkl(i), 0,
463 proxy.get_num(i + value_idx),
464 proxy.get_num(i + sigma_idx));
465 }
466
467 template<typename DataProxy>
468 void read_anomalous_data(const DataProxy& proxy, int mean_idx,
469 size_t (&value_idx)[2], size_t (&sigma_idx)[2]) {
470 GroupOps gops = spacegroup->operations();
471 for (size_t i = 0; i < proxy.size(); i += proxy.stride()) {
472 Miller hkl = proxy.get_hkl(i);
473 bool centric = gops.is_reflection_centric(hkl);
474
475 // sanity check
476 if (mean_idx >= 0 && !std::isnan(proxy.get_num(i + mean_idx)) && !centric) {
477 if (std::isnan(proxy.get_num(i + value_idx[0])) &&
478 std::isnan(proxy.get_num(i + value_idx[1])))
479 fail(miller_str(hkl), " has <I>, but I(+) and I(-) are both null");
480 }
481
482 add_if_valid(hkl, 1, proxy.get_num(i + value_idx[0]),
483 proxy.get_num(i + sigma_idx[0]));
484 if (!centric) // ignore I(-) of centric reflections
485 add_if_valid(hkl, -1, proxy.get_num(i + value_idx[1]),
486 proxy.get_num(i + sigma_idx[1]));
487 }
488 }
489};
490
491} // namespace gemmi
492#endif
double as_number(const std::string &s, double nan=NAN)
Definition numb.hpp:19
DataType
Definition refln.hpp:18
bool parse_voigt_notation(const char *start, const char *end, SMat33< double > &b)
Definition merge.hpp:32
void vector_remove_if(std::vector< T > &v, F &&condition)
Definition util.hpp:266
std::string read_staraniso_b_from_mtz(const Mtz &mtz, SMat33< double > &output)
Definition merge.hpp:73
bool read_staraniso_b_from_mmcif(const cif::Block &block, SMat33< double > &output)
Definition merge.hpp:46
std::array< int, 3 > Miller
A synonym for convenient passing of hkl.
Definition unitcell.hpp:128
const SpaceGroup * find_spacegroup_by_number(int ccp4) noexcept
from_chars_result fast_from_chars(const char *start, const char *end, double &d)
Definition atof.hpp:16
std::string read_word(const char *line)
Definition atox.hpp:61
std::string miller_str(const Miller &hkl)
Definition merge.hpp:22
Vec3_< double > Vec3
Definition math.hpp:101
bool starts_with(const std::string &str, const std::string &prefix)
Definition util.hpp:38
void fail(const std::string &msg)
Definition fail.hpp:59
void unreachable()
Definition fail.hpp:80
const char * skip_blank(const char *p)
Definition atox.hpp:47
void add_point(double x, double y)
Definition stats.hpp:53
bool is_systematically_absent(const Op::Miller &hkl) const
Definition symmetry.hpp:515
bool is_reflection_centric(const Op::Miller &hkl) const
Definition symmetry.hpp:491
double scale(const Miller &hkl, const UnitCell &cell) const
Definition merge.hpp:130
std::string hkl_label() const
Definition merge.hpp:118
bool operator<(const Refl &o) const
Definition merge.hpp:108
const char * intensity_label() const
Definition merge.hpp:112
double unit_cell_rmsd[6]
Definition merge.hpp:139
static const char * type_str(DataType data_type)
Definition merge.hpp:145
void read_merged_intensities_from_mmcif(const ReflnBlock &rb)
Definition merge.hpp:376
bool take_staraniso_b_from_mmcif(const cif::Block &block)
Definition merge.hpp:439
void read_mean_intensities_from_mmcif(const ReflnBlock &rb)
Definition merge.hpp:352
void read_unmerged_intensities_from_xds(const XdsAscii &xds)
Definition merge.hpp:423
UnitCell unit_cell
Definition merge.hpp:138
void switch_to_asu_indices(bool merged=false)
Definition merge.hpp:239
std::string spacegroup_str() const
Definition merge.hpp:157
void read_anomalous_intensities_from_mtz(const Mtz &mtz, bool check_complete=false)
Definition merge.hpp:298
void merge_in_place(DataType data_type)
Definition merge.hpp:203
AnisoScaling staraniso_b
Definition merge.hpp:142
std::string take_staraniso_b_from_mtz(const Mtz &mtz)
Definition merge.hpp:435
void remove_systematic_absences()
Definition merge.hpp:192
void read_merged_intensities_from_mtz(const Mtz &mtz)
Definition merge.hpp:318
const char * type_str() const
Definition merge.hpp:155
void read_anomalous_intensities_from_mmcif(const ReflnBlock &rb, bool check_complete=false)
Definition merge.hpp:361
void read_mtz(const Mtz &mtz, DataType data_type)
Definition merge.hpp:325
std::array< double, 2 > resolution_range() const
Definition merge.hpp:159
void read_unmerged_intensities_from_mtz(const Mtz &mtz)
Definition merge.hpp:261
void read_unmerged_intensities_from_mmcif(const ReflnBlock &rb)
Definition merge.hpp:342
Correlation calculate_correlation(const Intensities &other) const
Definition merge.hpp:174
void read_mean_intensities_from_mtz(const Mtz &mtz)
Definition merge.hpp:285
void read_mmcif(const ReflnBlock &rb, DataType data_type)
Definition merge.hpp:407
void read_f_squared_from_mmcif(const ReflnBlock &rb)
Definition merge.hpp:386
std::vector< Refl > data
Definition merge.hpp:136
const SpaceGroup * spacegroup
Definition merge.hpp:137
Vec3 left_multiply(const Vec3 &p) const
Definition math.hpp:161
Mat33 multiply_by_diagonal(const Vec3 &p) const
Definition math.hpp:167
Mat33 inverse() const
Definition math.hpp:206
Vec3 multiply(const Vec3 &p) const
Definition math.hpp:156
std::size_t idx
Definition mtz.hpp:88
std::string label
Definition mtz.hpp:83
std::pair< Op::Miller, bool > to_asu_sign(const Op::Miller &hkl, const GroupOps &gops) const
Similar to to_asu(), but the second returned value is sign: true for + or centric.
bool is_in(const Op::Miller &hkl) const
bool all_zero() const
Definition math.hpp:275
auto r_u_r(const Vec3_< VT > &r) const -> decltype(r.x+u11)
Definition math.hpp:295
Unit cell.
Definition unitcell.hpp:139
Transform frac
Definition unitcell.hpp:151
double calculate_1_d2(const Miller &hkl) const
Definition unitcell.hpp:529
bool ok() const
Definition cifdoc.hpp:350