Gemmi C++ API
Loading...
Searching...
No Matches
xds_ascii.hpp
Go to the documentation of this file.
1// Copyright 2020 Global Phasing Ltd.
2//
3// Read unmerged XDS files: XDS_ASCII.HKL and INTEGRATE.HKL.
4
5#ifndef GEMMI_XDS_ASCII_HPP_
6#define GEMMI_XDS_ASCII_HPP_
7
8#include "atof.hpp" // for fast_from_chars
9#include "atox.hpp" // for is_space
10#include "input.hpp" // for copy_line_from_stream
11#include "fileutil.hpp" // for file_open
12#include "unitcell.hpp" // for UnitCell
13#include "util.hpp" // for starts_with
14
15namespace gemmi {
16
17// from Pointless docs: likely in-house source, in which case
18// the unpolarised value is left unchanged (recognised wavelengths
19// are CuKalpha 1.5418 +- 0.0019, Mo 0.7107 +- 0.0002, Cr 2.29 +- 0.01)
20inline bool likely_in_house_source(double wavelength) {
21 return std::fabs(wavelength - 1.5418) < 0.0019 ||
22 std::fabs(wavelength - 0.7107) < 0.0002 ||
23 std::fabs(wavelength - 2.29) < 0.01;
24}
25
27 struct Refl {
29 double iobs;
30 double sigma;
31 double xd;
32 double yd;
33 double zd;
34 double rlp;
35 double peak;
36 double corr; // is it always integer?
37 double maxc;
38 int iset = 1;
39
40 // ZD can be negative for a few reflections
41 int frame() const { return (int) std::floor(zd + 1); }
42 };
43 struct Iset {
44 int id;
45 std::string input_file;
46 double wavelength = 0.;
47 double cell_constants[6] = {0., 0., 0., 0., 0., 0.};
48 //statistics set by gather_iset_statistics()
49 int frame_number_min = -1;
50 int frame_number_max = -1;
51 int frame_count = -1;
52 int reflection_count = -1;
53
54 Iset(int id_) : id(id_) {}
55 };
56 std::string source_path;
57 int read_columns = 0; // doesn't include ITEM_ISET from XSCALE
58 int spacegroup_number = 0;
60 Mat33 cell_axes{0.};
61 double wavelength = 0.;
62 double incident_beam_dir[3] = {0., 0., 0.};
63 double oscillation_range = 0.;
64 double rotation_axis[3] = {0., 0., 0.};
65 double starting_angle = 0.;
66 double reflecting_range_esd = 0.;
67 int starting_frame = 1;
68 int nx = 0; // detector size - number of pixels
69 int ny = 0;
70 double qx = 0.; // pixel size in mm
71 double qy = 0.;
72 double orgx = 0.;
73 double orgy = 0.;
74 double detector_distance = 0.;
75 std::string generated_by;
76 std::string version_str;
77 std::vector<Iset> isets;
78 std::vector<Refl> data;
79
81 for (Iset& i : isets)
82 if (i.id == id)
83 return i;
84 isets.emplace_back(id);
85 return isets.back();
86 }
87 template<typename Stream>
88 void read_stream(Stream&& stream, const std::string& source);
89
90 template<typename T>
91 inline void read_input(T&& input) {
92 if (input.is_stdin()) {
93 read_stream(FileStream{stdin}, "stdin");
94 } else if (input.is_compressed()) {
95 read_stream(input.get_uncompressing_stream(), input.path());
96 } else {
97 auto f = file_open(input.path().c_str(), "r");
98 read_stream(FileStream{f.get()}, input.path());
99 }
100 }
101
103 for (Iset& iset : isets) {
104 iset.frame_number_min = INT_MAX;
105 iset.frame_number_max = 0;
106 for (const XdsAscii::Refl& refl : data)
107 if (refl.iset == iset.id) {
108 ++iset.reflection_count;
109 int frame = refl.frame();
110 iset.frame_number_min = std::min(iset.frame_number_min, frame);
111 iset.frame_number_max = std::max(iset.frame_number_max, frame);
112 }
113 if (iset.frame_number_min > iset.frame_number_max)
114 continue;
115 std::vector<uint8_t> frames(iset.frame_number_max - iset.frame_number_min + 1);
116 for (const XdsAscii::Refl& refl : data)
117 if (refl.iset == iset.id)
118 frames[refl.frame() - iset.frame_number_min] = 1;
119 iset.frame_count = 0;
120 for (uint8_t f : frames)
121 iset.frame_count += f;
122 }
123 }
124
125 double rot_angle(const Refl& refl) const {
126 double z = refl.zd - starting_frame + 1;
127 return starting_angle + oscillation_range * z;
128 }
129
130 static Vec3 get_normalized(const double (&arr)[3], const char* name) {
131 Vec3 vec(arr[0], arr[1], arr[2]);
132 double length = vec.length();
133 if (length == 0)
134 fail("unknown ", name);
135 return vec / length;
136 }
137
138 // it's already normalized, but just in case normalize it again
140 return get_normalized(rotation_axis, "rotation axis");
141 }
142
143 // I'm not sure if always |incident_beam_dir| == 1/wavelength
145 return get_normalized(incident_beam_dir, "incident beam direction");
146 }
147
148 bool has_cell_axes() const {
149 for (int i = 0; i < 3; ++i)
150 if (cell_axes[i][0] == 0 && cell_axes[i][1] == 0 && cell_axes[i][2] == 0)
151 return false;
152 return true;
153 }
154
158 // Cambridge z direction is along the principal rotation axis
159 Vec3 z = get_rotation_axis();
160 // Cambridge z direction is along beam
161 Vec3 x = get_s0_direction();
162 Vec3 y = z.cross(x).normalized();
163 // beam and rotation axis may not be orthogonal
164 x = y.cross(z).normalized();
165 return Mat33::from_columns(x, y, z);
166 }
167
169 if (!has_cell_axes())
170 fail("unknown unit cell axes");
171 Vec3 a = cell_axes.row_copy(0);
172 Vec3 b = cell_axes.row_copy(1);
173 Vec3 c = cell_axes.row_copy(2);
174 Vec3 ar = b.cross(c).normalized();
175 Vec3 br = c.cross(a);
176 Vec3 cr = ar.cross(br).normalized();
177 br = cr.cross(ar);
178 return Mat33::from_columns(ar, br, cr);
179 }
180
183
186 vector_remove_if(data, [&](Refl& r) { return r.maxc > overload; });
187 }
188
191 double minz = batchmin - 1;
192 vector_remove_if(data, [&](Refl& r) { return r.zd < minz; });
193 }
194};
195
196template<size_t N>
197bool starts_with_ptr(const char* a, const char (&b)[N], const char** endptr) {
198 if (std::strncmp(a, b, N-1) != 0)
199 return false;
200 *endptr = a + N - 1;
201 return true;
202}
203
204template<size_t N>
205bool starts_with_ptr_b(const char* a, const char (&b)[N], const char** endptr) {
206 return starts_with_ptr<N>(skip_blank(a), b, endptr);
207}
208
209inline const char* parse_number_into(const char* start, const char* end,
210 double& val, const char* line) {
211 auto result = fast_from_chars(start, end, val);
212 if (result.ec != std::errc())
213 fail("failed to parse number in:\n", line);
214 return result.ptr;
215}
216
217template<int N>
218void parse_numbers_into_array(const char* start, const char* end,
219 double (&arr)[N], const char* line) {
220 for (int i = 0; i < N; ++i) {
221 auto result = fast_from_chars(start, end, arr[i]);
222 if (result.ec != std::errc())
223 fail("failed to parse number #", i+1, " in:\n", line);
224 start = result.ptr;
225 }
226}
227
228template<typename Stream>
229void XdsAscii::read_stream(Stream&& stream, const std::string& source) {
230 source_path = source;
231 read_columns = 12;
232 char line[256];
233 size_t len0 = copy_line_from_stream(line, 255, stream);
234 int iset_col = 0;
235 if (len0 == 0 || !(starts_with(line, "!FORMAT=XDS_ASCII MERGE=FALSE") ||
236 (starts_with(line, "!OUTPUT_FILE=INTEGRATE.HKL"))))
237 fail("not an unmerged XDS_ASCII nor INTEGRATE.HKL file: " + source_path);
238 const char* rhs;
239 while (size_t len = copy_line_from_stream(line, 255, stream)) {
240 if (line[0] == '!') {
241 if (starts_with_ptr(line+1, "Generated by ", &rhs)) {
244 } else if (starts_with_ptr(line+1, "SPACE_GROUP_NUMBER=", &rhs)) {
246 } else if (starts_with_ptr(line+1, "UNIT_CELL_", &rhs)) {
247 if (starts_with_ptr(rhs, "CONSTANTS=", &rhs)) { // UNIT_CELL_CONSTANTS=
248 double par[6];
250 unit_cell.set(par[0], par[1], par[2], par[3], par[4], par[5]);
251 } else if (starts_with_ptr(rhs, "A-AXIS=", &rhs)) { // UNIT_CELL_A-AXIS=
253 } else if (starts_with_ptr(rhs, "B-AXIS=", &rhs)) { // UNIT_CELL_B-AXIS=
255 } else if (starts_with_ptr(rhs, "C-AXIS=", &rhs)) { // UNIT_CELL_C-AXIS=
257 }
258 } else if (starts_with_ptr(line+1, "REFLECTING_RANGE_E.S.D.=", &rhs)) {
260 if (result.ec != std::errc())
261 fail("failed to parse mosaicity:\n", line);
262 } else if (starts_with_ptr(line+1, "X-RAY_WAVELENGTH=", &rhs)) {
264 if (result.ec != std::errc())
265 fail("failed to parse wavelength:\n", line);
266 } else if (starts_with_ptr(line+1, "INCIDENT_BEAM_DIRECTION=", &rhs)) {
268 } else if (starts_with_ptr(line+1, "OSCILLATION_RANGE=", &rhs)) {
270 if (result.ec != std::errc())
271 fail("failed to parse:\n", line);
272 } else if (starts_with_ptr(line+1, "ROTATION_AXIS=", &rhs)) {
274 } else if (starts_with_ptr(line+1, "STARTING_ANGLE=", &rhs)) {
276 if (result.ec != std::errc())
277 fail("failed to parse:\n", line);
278 } else if (starts_with_ptr(line+1, "STARTING_FRAME=", &rhs)) {
280 } else if (starts_with_ptr(line+1, " ISET= ", &rhs)) {
281 const char* endptr;
282 int id = simple_atoi(rhs, &endptr);
285 if (starts_with_ptr(endptr, "INPUT_FILE=", &rhs)) {
286 iset.input_file = read_word(rhs);
287 } else if (starts_with_ptr(endptr, "X-RAY_WAVELENGTH=", &rhs)) {
288 double w;
289 auto result = fast_from_chars(rhs, line+len, w);
290 if (result.ec != std::errc())
291 fail("failed to parse iset wavelength:\n", line);
292 iset.wavelength = w;
293 } else if (starts_with_ptr(endptr, "UNIT_CELL_CONSTANTS=", &rhs)) {
295 }
296 } else if (starts_with_ptr(line+1, "NX=", &rhs)) {
297 const char* endptr;
299 if (starts_with_ptr_b(endptr, "NY=", &rhs))
301 if (starts_with_ptr_b(endptr, "QX=", &rhs))
303 if (starts_with_ptr_b(endptr, "QY=", &rhs))
305 } else if (starts_with_ptr(line+1, "ORGX=", &rhs)) {
306 const char* endptr = parse_number_into(rhs, line+len, orgx, line);
307 if (starts_with_ptr_b(endptr, "ORGY=", &rhs))
309 if (starts_with_ptr_b(endptr, "DETECTOR_DISTANCE=", &rhs))
311 } else if (starts_with_ptr(line+1, "NUMBER_OF_ITEMS_IN_EACH_DATA_RECORD=", &rhs)) {
312 int num = simple_atoi(rhs);
313 // INTEGRATE.HKL has read_columns=12, as set above
314 if (generated_by == "XSCALE")
315 read_columns = 8;
316 else if (generated_by == "CORRECT")
317 read_columns = 11;
318 // check if the columns are what they always are
319 if (num < read_columns)
320 fail("expected ", std::to_string(read_columns), "+ columns, got:\n", line);
321 if (generated_by == "INTEGRATE") {
322 copy_line_from_stream(line, 52, stream);
323 if (!starts_with(line, "!H,K,L,IOBS,SIGMA,XCAL,YCAL,ZCAL,RLP,PEAK,CORR,MAXC"))
324 fail("unexpected column order in INTEGRATE.HKL");
325 } else {
326 const char* expected_columns[12] = {
327 "H=1", "K=2", "L=3", "IOBS=4", "SIGMA(IOBS)=5",
328 "XD=6", "YD=7", "ZD=8", "RLP=9", "PEAK=10", "CORR=11", "MAXC=12"
329 };
330 for (int i = 0; i < read_columns; ++i) {
331 const char* col = expected_columns[i];
332 copy_line_from_stream(line, 42, stream);
333 if (std::strncmp(line, "!ITEM_", 6) != 0 ||
334 std::strncmp(line+6, col, std::strlen(col)) != 0)
335 fail("column !ITEM_" + std::string(col), " not found.");
336 }
337 }
338 } else if (starts_with_ptr(line+1, "ITEM_ISET=", &rhs)) {
340 } else if (starts_with(line+1, "END_OF_DATA")) {
341 if (isets.empty()) {
342 isets.emplace_back(1);
343 isets.back().wavelength = wavelength;
344 }
345 for (XdsAscii::Refl& refl : data)
346 if (size_t(refl.iset - 1) >= isets.size())
347 fail("unexpected ITEM_ISET " + std::to_string(refl.iset));
348 return;
349 }
350 } else {
351 data.emplace_back();
352 XdsAscii::Refl& r = data.back();
353 const char* p = line;
354 for (int i = 0; i < 3; ++i)
355 r.hkl[i] = simple_atoi(p, &p);
356 auto result = fast_from_chars(p, line+len, r.iobs); // 4
357 result = fast_from_chars(result.ptr, line+len, r.sigma); // 5
358 result = fast_from_chars(result.ptr, line+len, r.xd); // 6
359 result = fast_from_chars(result.ptr, line+len, r.yd); // 7
360 result = fast_from_chars(result.ptr, line+len, r.zd); // 8
361 if (read_columns >= 11) {
362 result = fast_from_chars(result.ptr, line+len, r.rlp); // 9
363 result = fast_from_chars(result.ptr, line+len, r.peak); // 10
364 result = fast_from_chars(result.ptr, line+len, r.corr); // 11
365 if (read_columns > 11) {
366 result = fast_from_chars(result.ptr, line+len, r.maxc); // 12
367 } else {
368 r.maxc = 0;
369 }
370 } else {
371 r.rlp = r.peak = r.corr = r.maxc = 0;
372 }
373 if (result.ec != std::errc())
374 fail("failed to parse data line:\n", line);
375 if (iset_col >= read_columns) {
376 const char* iset_ptr = result.ptr;
377 for (int j = read_columns+1; j < iset_col; ++j)
379 r.iset = simple_atoi(iset_ptr);
380 }
381 }
382 }
383 fail("incorrect or unfinished file: " + source_path);
384}
385
386inline XdsAscii read_xds_ascii_file(const std::string& path) {
387 auto f = file_open(path.c_str(), "r");
389 ret.read_stream(FileStream{f.get()}, path);
390 return ret;
391}
392
393} // namespace gemmi
394#endif
#define GEMMI_DLL
Definition fail.hpp:53
void parse_numbers_into_array(const char *start, const char *end, double(&arr)[N], const char *line)
void vector_remove_if(std::vector< T > &v, F &&condition)
Definition util.hpp:266
int simple_atoi(const char *p, const char **endptr=nullptr)
Definition atox.hpp:105
size_t copy_line_from_stream(char *line, int size, Input &&in)
Definition input.hpp:155
bool starts_with_ptr_b(const char *a, const char(&b)[N], const char **endptr)
std::array< int, 3 > Miller
A synonym for convenient passing of hkl.
Definition unitcell.hpp:128
from_chars_result fast_from_chars(const char *start, const char *end, double &d)
Definition atof.hpp:16
std::string read_word(const char *line)
Definition atox.hpp:61
fileptr_t file_open(const char *path, const char *mode)
Definition fileutil.hpp:39
bool starts_with_ptr(const char *a, const char(&b)[N], const char **endptr)
Vec3_< double > Vec3
Definition math.hpp:101
bool likely_in_house_source(double wavelength)
Definition xds_ascii.hpp:20
bool starts_with(const std::string &str, const std::string &prefix)
Definition util.hpp:38
void fail(const std::string &msg)
Definition fail.hpp:59
XdsAscii read_xds_ascii_file(const std::string &path)
const char * skip_word(const char *p)
Definition atox.hpp:54
std::string trim_str(const std::string &str)
Definition util.hpp:109
const char * parse_number_into(const char *start, const char *end, double &val, const char *line)
const char * skip_blank(const char *p)
Definition atox.hpp:47
double a[3][3]
Definition math.hpp:116
Unit cell.
Definition unitcell.hpp:139
void set(double a_, double b_, double c_, double alpha_, double beta_, double gamma_)
Definition unitcell.hpp:280
std::string input_file
Definition xds_ascii.hpp:45
double incident_beam_dir[3]
Definition xds_ascii.hpp:62
Vec3 get_rotation_axis() const
Vec3 get_s0_direction() const
std::string generated_by
Definition xds_ascii.hpp:75
double detector_distance
Definition xds_ascii.hpp:74
bool has_cell_axes() const
UnitCell unit_cell
Definition xds_ascii.hpp:59
Mat33 calculate_conversion_from_cambridge() const
Return transition matrix from "Cambridge" frame to XDS frame.
std::string source_path
Definition xds_ascii.hpp:56
double oscillation_range
Definition xds_ascii.hpp:63
double reflecting_range_esd
Definition xds_ascii.hpp:66
Mat33 get_orientation() const
void read_stream(Stream &&stream, const std::string &source)
double rot_angle(const Refl &refl) const
void gather_iset_statistics()
std::vector< Iset > isets
Definition xds_ascii.hpp:77
static Vec3 get_normalized(const double(&arr)[3], const char *name)
void eliminate_overloads(double overload)
std::vector< Refl > data
Definition xds_ascii.hpp:78
double starting_angle
Definition xds_ascii.hpp:65
void apply_polarization_correction(double p, Vec3 normal)
Iset & find_or_add_iset(int id)
Definition xds_ascii.hpp:80
double rotation_axis[3]
Definition xds_ascii.hpp:64
void eliminate_batchmin(int batchmin)
void read_input(T &&input)
Definition xds_ascii.hpp:91
std::string version_str
Definition xds_ascii.hpp:76