Gemmi C++ API
Loading...
Searching...
No Matches
remarks.hpp
Go to the documentation of this file.
1// Copyright 2019 Global Phasing Ltd.
2//
3// Function read_metadata_from_remarks() that interprets REMARK 3
4// and REMARK 200/230/240 filling in Metadata.
5
6#ifndef GEMMI_REMARKS_HPP_
7#define GEMMI_REMARKS_HPP_
8
9#include <cctype> // for isspace
10#include <cstdlib> // for atoi
11#include <cstring> // for memcpy, strstr, strchr, strcmp
12#include <stdexcept> // for invalid_argument
13#include "atof.hpp" // for fast_from_chars
14#include "atox.hpp" // for string_to_int
15#include "fail.hpp" // for fail
16#include "metadata.hpp" // for Metadata
17#include "model.hpp" // for Structure, impl::find_or_add
18#include "util.hpp" // for trim_str, alpha_up, istarts_with
19
20namespace gemmi {
21
22namespace pdb_impl {
23
24inline int read_int(const char* p, int field_length) {
25 return string_to_int(p, false, field_length);
26}
27
28inline double read_double(const char* p, int field_length) {
29 double d = 0.;
30 // we don't check for errors here
31 fast_from_chars(p, p + field_length, d);
32 return d;
33}
34
35inline std::string read_string(const char* p, int field_length) {
36 // left trim
37 while (field_length != 0 && is_space(*p)) {
38 ++p;
39 --field_length;
40 }
41 // EOL/EOF ends the string
42 for (int i = 0; i < field_length; ++i)
43 if (p[i] == '\n' || p[i] == '\r' || p[i] == '\0') {
44 field_length = i;
45 break;
46 }
47 // right trim
48 while (field_length != 0 && is_space(p[field_length-1]))
49 --field_length;
50 return std::string(p, field_length);
51}
52
53template<size_t N>
54inline bool same_str(const std::string& s, const char (&literal)[N]) {
55 return s.size() == N - 1 && std::strcmp(s.c_str(), literal) == 0;
56}
57
58// "28-MAR-07" -> "2007-03-28"
59// (we also accept less standard format "28-Mar-2007" as used by BUSTER)
60// We do not check if the date is correct.
61// The returned value is one of:
62// DDDD-DD-DD - possibly correct date,
63// DDDD-xx-DD - unrecognized month,
64// empty string - the digits were not there.
65inline std::string pdb_date_format_to_iso(const std::string& date) {
66 const char months[] = "JAN01FEB02MAR03APR04MAY05JUN06"
67 "JUL07AUG08SEP09OCT10NOV11DEC122222";
68 if (date.size() < 9 || !is_digit(date[0]) || !is_digit(date[1]) ||
69 !is_digit(date[7]) || !is_digit(date[8]))
70 return std::string();
71 std::string iso = "xxxx-xx-xx";
72 if (date.size() >= 11 && is_digit(date[9]) && is_digit(date[10])) {
73 std::memcpy(&iso[0], &date[7], 4);
74 } else {
75 std::memcpy(&iso[0], (date[7] > '6' ? "19" : "20"), 2);
76 std::memcpy(&iso[2], &date[7], 2);
77 }
78 char month[4] = {alpha_up(date[3]), alpha_up(date[4]), alpha_up(date[5]), '\0'};
79 if (const char* m = std::strstr(months, month))
80 std::memcpy(&iso[5], m + 3, 2);
81 std::memcpy(&iso[8], &date[0], 2);
82 return iso;
83}
84
85inline bool is_double(const char* p) {
86 while (std::isspace(*p)) ++p;
87 if (*p == '-' || *p == '+') ++p;
88 while (is_digit(*p)) ++p;
89 if (*p == '.') {
90 ++p;
91 while (is_digit(*++p)) ++p;
92 }
93 while (std::isspace(*p)) ++p;
94 return *p == '\0';
95}
96
97inline bool is_tls_item(const std::string& key) {
98 return key.size() == 3 &&
99 (key[0] == 'T' || key[0] == 'L' || key[0] == 'S') &&
100 (key[1] == '1' || key[1] == '2' || key[1] == '3') &&
101 (key[2] == '1' || key[2] == '2' || key[2] == '3');
102}
103
104// Usually we have one program per line:
105// XDS
106// XDS VERSION NOVEMBER 3, 2014
107// AIMLESS 0.5.17
108// but it can also be a list of programs:
109// autoPROC (Version 1.3.0), AIMLESS, STARANISO
110// autoPROC, XDS (VERSION Jan 26, 2018)
111// We assume that:
112// - the name has only one word (apologies to Queen of Spades,
113// Force Field X, APEX 2 and Insight II).
114// - comma not followed by a digit separates programs
115// - brackets and the word VERSION are to be removed from version
116// Additionally, if version has format: "something (DATE)" where
117// the DATE format is either 28-MAR-07 or 28-Mar-2007, then DATE
118// is put into _software.date.
119inline void add_software(Metadata& meta, SoftwareItem::Classification type,
120 const std::string& name) {
121 for (size_t start = 0, end = 0; end != std::string::npos; start = end + 1) {
122 end = name.find(',', start);
123 while (end != std::string::npos &&
124 name[end+1] == ' ' && is_digit(name[end+2]))
125 end = name.find(',', end + 1);
126 meta.software.emplace_back();
127 SoftwareItem& item = meta.software.back();
128 item.name = trim_str(name.substr(start, end - start));
129 size_t sep = item.name.find(' ');
130 if (sep != std::string::npos) {
131 size_t ver_start = item.name.find_first_not_of(" (", sep + 1);
132 item.version = item.name.substr(ver_start);
133 item.name.resize(sep);
134 if (!item.version.empty() && item.version.back() == ')') {
135 size_t open_br = item.version.find('(');
136 if (open_br == std::string::npos) {
137 item.version.pop_back();
138 } else if (open_br + 11 == item.version.size() ||
139 open_br + 13 == item.version.size()) {
140 item.date = pdb_date_format_to_iso(item.version.substr(open_br + 1));
141 if (item.date.size() == 10 && item.date[5] != 'x') {
142 size_t last = item.version.find_last_not_of(' ', open_br - 1);
143 item.version.resize(last + 1);
144 } else {
145 item.date.clear();
146 }
147 }
148 }
149 if (istarts_with(item.version, "version "))
150 item.version.erase(0, 8);
151 }
152 item.classification = type;
153 }
154}
155
156// REMARK 3 TERM COUNT WEIGHT FUNCTION.
157// REMARK 3 BOND LENGTHS : 5760 ; 2.000 ; HARMONIC
158inline void add_restraint_count_weight(RefinementInfo& ref_info,
159 const char* key, const char* value) {
160 if (*value == 'N') // NULL instead of number
161 return;
162 ref_info.restr_stats.emplace_back(key);
163 RefinementInfo::Restr& restr = ref_info.restr_stats.back();
164 const char* endptr;
165 restr.count = no_sign_atoi(value, &endptr);
166 if (const char* sep = std::strchr(endptr, ';'))
167 restr.weight = fast_atof(sep + 1, &endptr);
168 if (const char* sep = std::strchr(endptr, ';'))
169 restr.function = read_string(sep+1, 50);
170}
171
172inline void read_remark3_line(const char* line, Metadata& meta,
173 std::string*& possibly_unfinished_remark3) {
174 // Based on:
175 // www.wwpdb.org/documentation/file-format-content/format23/remark3.html
176 // and analysis of PDB files.
177 // In special cases, such as joint X-ray and neutron refinement 5MOO,
178 // PDB file can have two REMARK 3 blocks.
179 // Generally, after "REMARK 3" we have either a header-like sentence
180 // or a key:value pair with a colon, or a continuation of text from the
181 // previous line.
182 const char* key_start = skip_blank(line + 10);
183 const char* colon = std::strchr(key_start, ':');
184 const char* key_end = rtrim_cstr(key_start, colon);
185 std::string key(key_start, key_end);
186
187 // multi-line continuation requires special handling
188 if (possibly_unfinished_remark3) {
189 if (key_start > line + 17) {
190 *possibly_unfinished_remark3 += ' ';
191 possibly_unfinished_remark3->append(key);
192 return;
193 }
194 possibly_unfinished_remark3 = nullptr;
195 }
196
197 if (colon) {
198 const char* value = skip_blank(colon + 1);
199 const char* end = rtrim_cstr(value);
200 if (end - value == 4 && std::strncmp(value, "NULL", 4) == 0)
201 return;
202 if (same_str(key, "PROGRAM"))
203 add_software(meta, SoftwareItem::Refinement, std::string(value, end));
204 if (meta.refinement.empty())
205 return;
206 RefinementInfo& ref_info = meta.refinement.back();
207 if (same_str(key, "RESOLUTION RANGE HIGH (ANGSTROMS)")) {
208 ref_info.resolution_high = fast_atof(value);
209 } else if (same_str(key, "RESOLUTION RANGE LOW (ANGSTROMS)")) {
210 ref_info.resolution_low = fast_atof(value);
211 } else if (same_str(key, "COMPLETENESS FOR RANGE (%)")) {
212 ref_info.completeness = fast_atof(value);
213 } else if (same_str(key, "NUMBER OF REFLECTIONS")) {
214 ref_info.reflection_count = std::atoi(value);
215 } else if (same_str(key, "CROSS-VALIDATION METHOD")) {
216 ref_info.cross_validation_method = std::string(value, end);
217 } else if (same_str(key, "FREE R VALUE TEST SET SELECTION")) {
218 ref_info.rfree_selection_method = std::string(value, end);
219 } else if (same_str(key, "R VALUE (WORKING + TEST SET)")) {
220 ref_info.r_all = fast_atof(value);
221 } else if (same_str(key, "R VALUE (WORKING SET)")) {
222 ref_info.r_work = fast_atof(value);
223 } else if (same_str(key, "FREE R VALUE")) {
224 ref_info.r_free = fast_atof(value);
225 } else if (same_str(key, "FREE R VALUE TEST SET COUNT")) {
226 ref_info.rfree_set_count = atoi(value);
227 } else if (same_str(key, "TOTAL NUMBER OF BINS USED")) {
228 ref_info.bin_count = std::atoi(value);
229 } else if (same_str(key, "BIN RESOLUTION RANGE HIGH (A)")) {
230 if (!ref_info.bins.empty())
231 ref_info.bins.back().resolution_high = fast_atof(value);
232 } else if (same_str(key, "BIN RESOLUTION RANGE LOW (A)")) {
233 if (!ref_info.bins.empty())
234 ref_info.bins.back().resolution_low = fast_atof(value);
235 } else if (same_str(key, "BIN COMPLETENESS (WORKING+TEST) (%)")) {
236 if (!ref_info.bins.empty())
237 ref_info.bins.back().completeness = fast_atof(value);
238 } else if (same_str(key, "REFLECTIONS IN BIN (WORKING+TEST)")) {
239 if (!ref_info.bins.empty())
240 ref_info.bins.back().reflection_count = std::atoi(value);
241 } else if (same_str(key, "BIN R VALUE (WORKING+TEST)")) {
242 if (!ref_info.bins.empty())
243 ref_info.bins.back().r_all = fast_atof(value);
244 } else if (same_str(key, "BIN R VALUE (WORKING SET)")) {
245 if (!ref_info.bins.empty())
246 ref_info.bins.back().r_work = fast_atof(value);
247 } else if (same_str(key, "BIN FREE R VALUE")) {
248 if (!ref_info.bins.empty())
249 ref_info.bins.back().r_free = fast_atof(value);
250 } else if (same_str(key, "BIN FREE R VALUE TEST SET COUNT")) {
251 if (!ref_info.bins.empty())
252 ref_info.bins.back().rfree_set_count = std::atoi(value);
253 } else if (same_str(key, "FROM WILSON PLOT (A**2)")) {
254 // TODO
255 // exper.b_wilson = fast_atof(value);
256 } else if (same_str(key, "MEAN B VALUE (OVERALL, A**2)")) {
257 ref_info.mean_b = fast_atof(value);
258 } else if (same_str(key, "B11 (A**2)")) {
259 ref_info.aniso_b[0][0] = fast_atof(value);
260 } else if (same_str(key, "B22 (A**2)")) {
261 ref_info.aniso_b[1][1] = fast_atof(value);
262 } else if (same_str(key, "B33 (A**2)")) {
263 ref_info.aniso_b[2][2] = fast_atof(value);
264 } else if (same_str(key, "B12 (A**2)")) {
265 ref_info.aniso_b[0][1] = fast_atof(value);
266 } else if (same_str(key, "B13 (A**2)")) {
267 ref_info.aniso_b[0][2] = fast_atof(value);
268 } else if (same_str(key, "B23 (A**2)")) {
269 ref_info.aniso_b[1][2] = fast_atof(value);
270 } else if (same_str(key, "ESD FROM LUZZATI PLOT (A)")) {
271 ref_info.luzzati_error = fast_atof(value);
272 } else if (same_str(key, "DPI (BLOW EQ-10) BASED ON R VALUE (A)")) {
273 ref_info.dpi_blow_r = fast_atof(value);
274 } else if (same_str(key, "DPI (BLOW EQ-9) BASED ON FREE R VALUE (A)")) {
275 ref_info.dpi_blow_rfree = fast_atof(value);
276 } else if (same_str(key, "DPI (CRUICKSHANK) BASED ON R VALUE (A)")) {
277 ref_info.dpi_cruickshank_r = fast_atof(value);
278 } else if (same_str(key, "DPI (CRUICKSHANK) BASED ON FREE R VALUE (A)")) {
279 ref_info.dpi_cruickshank_rfree = fast_atof(value);
280 } else if (same_str(key, "CORRELATION COEFFICIENT FO-FC")) {
281 ref_info.cc_fo_fc = fast_atof(value);
282 } else if (same_str(key, "CORRELATION COEFFICIENT FO-FC FREE")) {
283 ref_info.cc_fo_fc_free = fast_atof(value);
284 } else if (same_str(key, "BOND LENGTHS")) {
285 add_restraint_count_weight(ref_info, "t_bond_d", value);
286 } else if (same_str(key, "BOND ANGLES")) {
287 add_restraint_count_weight(ref_info, "t_angle_deg", value);
288 } else if (same_str(key, "TORSION ANGLES")) {
289 add_restraint_count_weight(ref_info, "t_dihedral_angle_d", value);
290 } else if (same_str(key, "TRIGONAL CARBON PLANES")) {
291 add_restraint_count_weight(ref_info, "t_trig_c_planes", value);
292 } else if (same_str(key, "GENERAL PLANES")) {
293 add_restraint_count_weight(ref_info, "t_gen_planes", value);
294 } else if (same_str(key, "ISOTROPIC THERMAL FACTORS")) {
295 add_restraint_count_weight(ref_info, "t_it", value);
296 } else if (same_str(key, "BAD NON-BONDED CONTACTS")) {
297 add_restraint_count_weight(ref_info, "t_nbd", value);
298 } else if (same_str(key, "IMPROPER TORSIONS")) {
299 add_restraint_count_weight(ref_info, "t_improper_torsion", value);
300 } else if (same_str(key, "CHIRAL IMPROPER TORSION")) {
301 add_restraint_count_weight(ref_info, "t_chiral_improper_torsion", value);
302 } else if (same_str(key, "SUM OF OCCUPANCIES")) {
303 add_restraint_count_weight(ref_info, "t_sum_occupancies", value);
304 } else if (same_str(key, "UTILITY DISTANCES")) {
305 add_restraint_count_weight(ref_info, "t_utility_distance", value);
306 } else if (same_str(key, "UTILITY ANGLES")) {
307 add_restraint_count_weight(ref_info, "t_utility_angle", value);
308 } else if (same_str(key, "UTILITY TORSION")) {
309 add_restraint_count_weight(ref_info, "t_utility_torsion", value);
310 } else if (same_str(key, "IDEAL-DIST CONTACT TERM")) {
311 add_restraint_count_weight(ref_info, "t_ideal_dist_contact", value);
312 } else if (same_str(key, "BOND LENGTHS (A)")) {
313 impl::find_or_add(ref_info.restr_stats, "t_bond_d").dev_ideal
314 = read_double(value, 50);
315 } else if (same_str(key, "BOND ANGLES (DEGREES)")) {
316 impl::find_or_add(ref_info.restr_stats, "t_angle_deg").dev_ideal
317 = read_double(value, 50);
318 } else if (same_str(key, "PEPTIDE OMEGA TORSION ANGLES (DEGREES)")) {
319 impl::find_or_add(ref_info.restr_stats, "t_omega_torsion").dev_ideal
320 = read_double(value, 50);
321 } else if (same_str(key, "OTHER TORSION ANGLES (DEGREES)")) {
322 impl::find_or_add(ref_info.restr_stats, "t_other_torsion").dev_ideal
323 = read_double(value, 50);
324 } else if (same_str(key, "TLS GROUP")) {
325 ref_info.tls_groups.emplace_back();
326 ref_info.tls_groups.back().id = std::string(value, end);
327 } else if (same_str(key, "SET") ||
328 // "REMARK 3 SELECTION:" -> TLS
329 // "REMARK 3 SELECTION :" -> NCS
330 (same_str(key, "SELECTION") && colon == line + 23)) {
331 if (!ref_info.tls_groups.empty()) {
332 TlsGroup& group = ref_info.tls_groups.back();
333 group.selections.emplace_back();
334 group.selections.back().details = std::string(value, end);
335 possibly_unfinished_remark3 = &group.selections.back().details;
336 }
337 } else if (same_str(key, "RESIDUE RANGE")) {
338 if (!ref_info.tls_groups.empty() && end > colon+21) {
339 TlsGroup& group = ref_info.tls_groups.back();
340 group.selections.emplace_back();
341 TlsGroup::Selection& sel = group.selections.back();
342 sel.chain = read_string(colon+1, 5);
343 if (sel.chain == read_string(colon+16, 5)) {
344 try {
345 sel.res_begin = SeqId(read_string(colon+6, 6));
346 sel.res_end = SeqId(read_string(colon+21, 6));
347 } catch (std::invalid_argument&) {
348 group.selections.pop_back();
349 }
350 } else { // unexpected -- TLS group should be in one chain
351 group.selections.pop_back();
352 }
353 }
354 } else if (same_str(key, "ORIGIN FOR THE GROUP (A)")) {
355 std::vector<std::string> xyz = split_str_multi(std::string(value, end));
356 if (ref_info.tls_groups.empty() || xyz.size() != 3)
357 return;
358 Position& origin = ref_info.tls_groups.back().origin;
359 origin.x = fast_atof(xyz[0].c_str());
360 origin.y = fast_atof(xyz[1].c_str());
361 origin.z = fast_atof(xyz[2].c_str());
362 } else if (is_tls_item(key)) {
363 if (ref_info.tls_groups.empty())
364 return;
365 TlsGroup& tls = ref_info.tls_groups.back();
366 std::vector<std::string> tokens = split_str_multi(key_start);
367 for (size_t i = 0; i + 1 < tokens.size(); i += 2) {
368 std::string& k = tokens[i];
369 if (k.size() == 4 && k[3] == ':')
370 k.resize(3);
371 if (is_tls_item(k)) {
372 Mat33& m = k[0] == 'T' ? tls.T : k[0] == 'L' ? tls.L : tls.S;
373 int x = k[1] - '1';
374 int y = k[2] - '1';
375 m[x][y] = m[y][x] = fast_atof(tokens[i+1].c_str());
376 }
377 }
378 }
379 } else {
380 if (same_str(key, "DATA USED IN REFINEMENT.")) {
381 meta.refinement.emplace_back();
382 meta.refinement.back().id = std::to_string(meta.refinement.size());
383 } else if (same_str(key, "FIT IN THE HIGHEST RESOLUTION BIN.")) {
384 if (!meta.refinement.empty())
385 meta.refinement.back().bins.emplace_back();
386 }
387 }
388}
389
390inline void read_remark_200_230_240(const char* line, Metadata& meta,
391 std::string*& cryst_desc) {
392 // multi-line continuation requires special handling
393 if (cryst_desc) {
394 if (line[10] == ' ' && line[11] == ' ') {
395 const char* start = line + 11;
396 cryst_desc->append(start, rtrim_cstr(start) - start);
397 return;
398 }
399 cryst_desc = nullptr;
400 }
401
402 const char* key_start = skip_blank(line + 10);
403 const char* colon = std::strchr(key_start, ':');
404 const char* key_end = rtrim_cstr(key_start, colon);
405 std::string key(key_start, key_end);
406 if (colon) {
407 const char* value = skip_blank(colon + 1);
408 const char* end = rtrim_cstr(value);
409 if (end - value == 4 && std::strncmp(value, "NULL", 4) == 0)
410 return;
411 if (same_str(key, "INTENSITY-INTEGRATION SOFTWARE")) {
412 add_software(meta, SoftwareItem::DataReduction, std::string(value, end));
413 } else if (same_str(key, "DATA SCALING SOFTWARE")) {
414 add_software(meta, SoftwareItem::DataScaling, std::string(value, end));
415 } else if (same_str(key, "SOFTWARE USED")) {
416 add_software(meta, SoftwareItem::Phasing, std::string(value, end));
417 } else if (same_str(key, "METHOD USED TO DETERMINE THE STRUCTURE")) {
418 meta.solved_by = std::string(value, end);
419 } else if (same_str(key, "STARTING MODEL")) {
420 meta.starting_model = std::string(value, end);
421 } else if (!meta.experiments.empty()) {
422 ExperimentInfo& exper = meta.experiments.back();
423 DiffractionInfo& diffr = meta.crystals.back().diffractions[0];
424 if (same_str(key, "EXPERIMENT TYPE")) {
425 exper.method = std::string(value, end);
426 } else if (same_str(key, "NUMBER OF CRYSTALS USED")) {
427 exper.number_of_crystals = std::atoi(value);
428 } else if (same_str(key, "PH")) {
429 if (is_double(value))
430 meta.crystals.back().ph = fast_atof(value);
431 else
432 meta.crystals.back().ph_range = std::string(value, end);
433 } else if (same_str(key, "DATE OF DATA COLLECTION")) {
434 diffr.collection_date = pdb_date_format_to_iso(std::string(value, end));
435 } else if (same_str(key, "TEMPERATURE (KELVIN)")) {
436 diffr.temperature = fast_atof(value);
437 } else if (same_str(key, "SYNCHROTRON (Y/N)")) {
438 if (*value == 'Y')
439 diffr.source = "SYNCHROTRON";
440 } else if (same_str(key, "RADIATION SOURCE")) {
441 if (same_str(diffr.source, "SYNCHROTRON"))
442 diffr.synchrotron = std::string(value, end);
443 else
444 diffr.source = std::string(value, end);
445 } else if (same_str(key, "NEUTRON SOURCE")) {
446 diffr.source = std::string(value, end);
447 } else if (same_str(key, "BEAMLINE")) {
448 diffr.beamline = std::string(value, end);
449 if (!diffr.synchrotron.empty() && diffr.source_type.empty())
450 diffr.source_type = diffr.synchrotron + " BEAMLINE " + diffr.beamline;
451 } else if (same_str(key, "X-RAY GENERATOR MODEL")) {
452 diffr.source_type = std::string(value, end);
453 } else if (same_str(key, "MONOCHROMATIC OR LAUE (M/L)")) {
454 diffr.mono_or_laue = *value;
455 } else if (same_str(key, "WAVELENGTH OR RANGE (A)")) {
456 diffr.wavelengths = std::string(value, end);
457 } else if (same_str(key, "MONOCHROMATOR")) {
458 diffr.monochromator = std::string(value, end);
459 } else if (same_str(key, "OPTICS")) {
460 diffr.optics = std::string(value, end);
461 } else if (same_str(key, "DETECTOR TYPE")) {
462 diffr.detector = std::string(value, end);
463 } else if (same_str(key, "DETECTOR MANUFACTURER")) {
464 diffr.detector_make = std::string(value, end);
465 } else if (same_str(key, "NUMBER OF UNIQUE REFLECTIONS")) {
466 exper.unique_reflections = std::atoi(value);
467 } else if (same_str(key, "RESOLUTION RANGE HIGH (A)")) {
468 exper.reflections.resolution_high = fast_atof(value);
469 } else if (same_str(key, "RESOLUTION RANGE LOW (A)")) {
470 exper.reflections.resolution_low = fast_atof(value);
471 } else if (same_str(key, "COMPLETENESS FOR RANGE (%)")) {
472 exper.reflections.completeness = fast_atof(value);
473 } else if (same_str(key, "DATA REDUNDANCY")) {
474 exper.reflections.redundancy = fast_atof(value);
475 } else if (same_str(key, "R MERGE (I)")) {
476 exper.reflections.r_merge = fast_atof(value);
477 } else if (same_str(key, "R SYM (I)")) {
478 exper.reflections.r_sym = fast_atof(value);
479 } else if (same_str(key, "<I/SIGMA(I)> FOR THE DATA SET")) {
480 exper.reflections.mean_I_over_sigma = fast_atof(value);
481 } else if (same_str(key, "REMARK")) {
482 cryst_desc = &meta.crystals.back().description;
483 *cryst_desc = std::string(value, end);
484 } else if (!exper.shells.empty()) {
485 if (same_str(key, "HIGHEST RESOLUTION SHELL, RANGE HIGH (A)")) {
486 exper.shells.back().resolution_high = fast_atof(value);
487 } else if (same_str(key, "HIGHEST RESOLUTION SHELL, RANGE LOW (A)")) {
488 exper.shells.back().resolution_low = fast_atof(value);
489 } else if (same_str(key, "COMPLETENESS FOR SHELL (%)")) {
490 exper.shells.back().completeness = fast_atof(value);
491 } else if (same_str(key, "DATA REDUNDANCY IN SHELL")) {
492 exper.shells.back().redundancy = fast_atof(value);
493 } else if (same_str(key, "R MERGE FOR SHELL (I)")) {
494 exper.shells.back().r_merge = fast_atof(value);
495 } else if (same_str(key, "R SYM FOR SHELL (I)")) {
496 exper.shells.back().r_sym = fast_atof(value);
497 } else if (same_str(key, "<I/SIGMA(I)> FOR SHELL")) {
498 exper.shells.back().mean_I_over_sigma = fast_atof(value);
499 }
500 }
501 }
502 } else {
503 if (same_str(key, "EXPERIMENTAL DETAILS")) {
504 meta.crystals.emplace_back();
505 CrystalInfo& c = meta.crystals.back();
506 c.id = std::to_string(meta.crystals.size());
507 c.diffractions.emplace_back();
508 c.diffractions[0].id = c.id;
509 meta.experiments.emplace_back();
510 meta.experiments.back().diffraction_ids.push_back(c.id);
511 if (line[8] == '0' && line[9] == '0')
512 c.diffractions[0].scattering_type = "x-ray";
513 else if (line[8] == '3' && line[9] == '0')
514 c.diffractions[0].scattering_type = "neutron";
515 else if (line[8] == '4' && line[9] == '0')
516 c.diffractions[0].scattering_type = "electron";
517 }
518 if (same_str(key, "IN THE HIGHEST RESOLUTION SHELL.")) {
519 if (!meta.experiments.empty())
520 meta.experiments.back().shells.emplace_back();
521 }
522 }
523}
524
525} // namespace pdb_impl
526
527inline int remark_number(const std::string& remark) {
528 if (remark.size() > 11)
529 return pdb_impl::read_int(remark.c_str() + 7, 3);
530 return 0;
531}
532
534 std::string* possibly_unfinished_remark3 = nullptr;
535 std::string* cr_desc = nullptr;
536 for (const std::string& remark : st.raw_remarks)
537 switch (remark_number(remark)) {
538 case 3:
539 pdb_impl::read_remark3_line(remark.c_str(), st.meta,
541 break;
542 case 200:
543 case 230:
544 case 240:
545 pdb_impl::read_remark_200_230_240(remark.c_str(), st.meta, cr_desc);
546 break;
547 case 300:
548 if (!st.meta.remark_300_detail.empty()) {
549 st.meta.remark_300_detail += '\n';
550 st.meta.remark_300_detail += rtrim_str(remark.substr(11));
551 } else if (remark.compare(11, 7, "REMARK:") == 0) {
552 st.meta.remark_300_detail = trim_str(remark.substr(18));
553 }
554 break;
555 }
556}
557
558// Returns operations corresponding to 1555, 2555, ... N555
559inline
560std::vector<Op> read_remark_290(const std::vector<std::string>& raw_remarks) {
561 std::vector<Op> ops;
562 // we only check triplet notation:
563 // REMARK 290 NNNMMM OPERATOR
564 // REMARK 290 1555 X,Y,Z
565 for (const std::string& remark : raw_remarks)
566 if (remark_number(remark) == 290 && remark.size() > 25 &&
567 std::memcmp(&remark[10], " ", 5) == 0 &&
568 std::memcmp(&remark[18], "555 ", 6) == 0) {
569 if (pdb_impl::read_int(remark.c_str() + 15, 3) != (int)ops.size() + 1)
570 fail("Symmetry operators not in order?: " + remark);
571 Op op = parse_triplet(pdb_impl::read_string(remark.c_str() + 24, 56));
572 ops.push_back(op);
573 }
574 return ops;
575}
576
577} // namespace gemmi
578#endif
Document read_string(const std::string &data)
Definition cif.hpp:303
int string_to_int(const char *p, bool checked, size_t length=0)
Definition atox.hpp:73
Op parse_triplet(const std::string &s)
Definition symmetry.hpp:302
const char * rtrim_cstr(const char *start, const char *end=nullptr)
Definition util.hpp:124
double fast_atof(const char *p, const char **endptr=nullptr)
Definition atof.hpp:32
bool istarts_with(const std::string &str, const std::string &prefix)
Definition util.hpp:93
std::string rtrim_str(const std::string &str)
Definition util.hpp:118
void read_metadata_from_remarks(Structure &st)
Definition remarks.hpp:533
from_chars_result fast_from_chars(const char *start, const char *end, double &d)
Definition atof.hpp:16
int remark_number(const std::string &remark)
Definition remarks.hpp:527
bool is_digit(char c)
Definition atox.hpp:43
void fail(const std::string &msg)
Definition fail.hpp:59
std::vector< std::string > split_str_multi(const std::string &str, const char *seps=" \t")
Definition util.hpp:174
std::vector< Op > read_remark_290(const std::vector< std::string > &raw_remarks)
Definition remarks.hpp:560
bool is_space(char c)
Definition atox.hpp:23
std::string trim_str(const std::string &str)
Definition util.hpp:109
char alpha_up(char c)
Definition util.hpp:60
const char * skip_blank(const char *p)
Definition atox.hpp:47
int no_sign_atoi(const char *p, const char **endptr=nullptr)
Definition atox.hpp:123