6#ifndef GEMMI_REMARKS_HPP_
7#define GEMMI_REMARKS_HPP_
24inline int read_int(
const char* p,
int field_length) {
28inline double read_double(
const char* p,
int field_length) {
35inline std::string
read_string(
const char* p,
int field_length) {
37 while (field_length != 0 &&
is_space(*p)) {
42 for (
int i = 0; i < field_length; ++i)
43 if (p[i] ==
'\n' || p[i] ==
'\r' || p[i] ==
'\0') {
48 while (field_length != 0 &&
is_space(p[field_length-1]))
50 return std::string(p, field_length);
54inline bool same_str(
const std::string& s,
const char (&literal)[
N]) {
55 return s.size() ==
N - 1 && std::strcmp(s.c_str(), literal) == 0;
65inline std::string pdb_date_format_to_iso(
const std::string& date) {
66 const char months[] =
"JAN01FEB02MAR03APR04MAY05JUN06"
67 "JUL07AUG08SEP09OCT10NOV11DEC122222";
71 std::string iso =
"xxxx-xx-xx";
73 std::memcpy(&iso[0], &date[7], 4);
75 std::memcpy(&iso[0], (date[7] >
'6' ?
"19" :
"20"), 2);
76 std::memcpy(&iso[2], &date[7], 2);
79 if (
const char* m = std::strstr(months, month))
80 std::memcpy(&iso[5], m + 3, 2);
81 std::memcpy(&iso[8], &date[0], 2);
85inline bool is_double(
const char* p) {
86 while (std::isspace(*p)) ++p;
87 if (*p ==
'-' || *p ==
'+') ++p;
93 while (std::isspace(*p)) ++p;
97inline bool is_tls_item(
const std::string& key) {
98 return key.size() == 3 &&
99 (key[0] ==
'T' || key[0] ==
'L' || key[0] ==
'S') &&
100 (key[1] ==
'1' || key[1] ==
'2' || key[1] ==
'3') &&
101 (key[2] ==
'1' || key[2] ==
'2' || key[2] ==
'3');
120 const std::string& name) {
121 for (
size_t start = 0, end = 0; end != std::string::npos; start = end + 1) {
122 end = name.find(
',', start);
123 while (end != std::string::npos &&
124 name[end+1] ==
' ' &&
is_digit(name[end+2]))
125 end = name.find(
',', end + 1);
126 meta.software.emplace_back();
127 SoftwareItem& item = meta.software.back();
128 item.name =
trim_str(name.substr(start, end - start));
129 size_t sep = item.name.find(
' ');
130 if (sep != std::string::npos) {
131 size_t ver_start = item.name.find_first_not_of(
" (", sep + 1);
132 item.version = item.name.substr(ver_start);
133 item.name.resize(sep);
134 if (!item.version.empty() && item.version.back() ==
')') {
135 size_t open_br = item.version.find(
'(');
136 if (open_br == std::string::npos) {
137 item.version.pop_back();
138 }
else if (open_br + 11 == item.version.size() ||
139 open_br + 13 == item.version.size()) {
140 item.date = pdb_date_format_to_iso(item.version.substr(open_br + 1));
141 if (item.date.size() == 10 && item.date[5] !=
'x') {
142 size_t last = item.version.find_last_not_of(
' ', open_br - 1);
143 item.version.resize(last + 1);
150 item.version.erase(0, 8);
152 item.classification = type;
158inline void add_restraint_count_weight(RefinementInfo& ref_info,
159 const char* key,
const char* value) {
162 ref_info.restr_stats.emplace_back(key);
163 RefinementInfo::Restr& restr = ref_info.restr_stats.back();
166 if (
const char* sep = std::strchr(endptr,
';'))
167 restr.weight =
fast_atof(sep + 1, &endptr);
168 if (
const char* sep = std::strchr(endptr,
';'))
172inline void read_remark3_line(
const char* line, Metadata& meta,
173 std::string*& possibly_unfinished_remark3) {
182 const char* key_start =
skip_blank(line + 10);
183 const char* colon = std::strchr(key_start,
':');
184 const char* key_end =
rtrim_cstr(key_start, colon);
185 std::string key(key_start, key_end);
188 if (possibly_unfinished_remark3) {
189 if (key_start > line + 17) {
190 *possibly_unfinished_remark3 +=
' ';
191 possibly_unfinished_remark3->append(key);
194 possibly_unfinished_remark3 =
nullptr;
200 if (end - value == 4 && std::strncmp(value,
"NULL", 4) == 0)
202 if (same_str(key,
"PROGRAM"))
204 if (meta.refinement.empty())
206 RefinementInfo& ref_info = meta.refinement.back();
207 if (same_str(key,
"RESOLUTION RANGE HIGH (ANGSTROMS)")) {
208 ref_info.resolution_high =
fast_atof(value);
209 }
else if (same_str(key,
"RESOLUTION RANGE LOW (ANGSTROMS)")) {
210 ref_info.resolution_low =
fast_atof(value);
211 }
else if (same_str(key,
"COMPLETENESS FOR RANGE (%)")) {
212 ref_info.completeness =
fast_atof(value);
213 }
else if (same_str(key,
"NUMBER OF REFLECTIONS")) {
214 ref_info.reflection_count = std::atoi(value);
215 }
else if (same_str(key,
"CROSS-VALIDATION METHOD")) {
216 ref_info.cross_validation_method = std::string(value, end);
217 }
else if (same_str(key,
"FREE R VALUE TEST SET SELECTION")) {
218 ref_info.rfree_selection_method = std::string(value, end);
219 }
else if (same_str(key,
"R VALUE (WORKING + TEST SET)")) {
221 }
else if (same_str(key,
"R VALUE (WORKING SET)")) {
223 }
else if (same_str(key,
"FREE R VALUE")) {
225 }
else if (same_str(key,
"FREE R VALUE TEST SET COUNT")) {
226 ref_info.rfree_set_count = atoi(value);
227 }
else if (same_str(key,
"TOTAL NUMBER OF BINS USED")) {
228 ref_info.bin_count = std::atoi(value);
229 }
else if (same_str(key,
"BIN RESOLUTION RANGE HIGH (A)")) {
230 if (!ref_info.bins.empty())
231 ref_info.bins.back().resolution_high =
fast_atof(value);
232 }
else if (same_str(key,
"BIN RESOLUTION RANGE LOW (A)")) {
233 if (!ref_info.bins.empty())
234 ref_info.bins.back().resolution_low =
fast_atof(value);
235 }
else if (same_str(key,
"BIN COMPLETENESS (WORKING+TEST) (%)")) {
236 if (!ref_info.bins.empty())
237 ref_info.bins.back().completeness =
fast_atof(value);
238 }
else if (same_str(key,
"REFLECTIONS IN BIN (WORKING+TEST)")) {
239 if (!ref_info.bins.empty())
240 ref_info.bins.back().reflection_count = std::atoi(value);
241 }
else if (same_str(key,
"BIN R VALUE (WORKING+TEST)")) {
242 if (!ref_info.bins.empty())
243 ref_info.bins.back().r_all =
fast_atof(value);
244 }
else if (same_str(key,
"BIN R VALUE (WORKING SET)")) {
245 if (!ref_info.bins.empty())
246 ref_info.bins.back().r_work =
fast_atof(value);
247 }
else if (same_str(key,
"BIN FREE R VALUE")) {
248 if (!ref_info.bins.empty())
249 ref_info.bins.back().r_free =
fast_atof(value);
250 }
else if (same_str(key,
"BIN FREE R VALUE TEST SET COUNT")) {
251 if (!ref_info.bins.empty())
252 ref_info.bins.back().rfree_set_count = std::atoi(value);
253 }
else if (same_str(key,
"FROM WILSON PLOT (A**2)")) {
256 }
else if (same_str(key,
"MEAN B VALUE (OVERALL, A**2)")) {
258 }
else if (same_str(key,
"B11 (A**2)")) {
259 ref_info.aniso_b[0][0] =
fast_atof(value);
260 }
else if (same_str(key,
"B22 (A**2)")) {
261 ref_info.aniso_b[1][1] =
fast_atof(value);
262 }
else if (same_str(key,
"B33 (A**2)")) {
263 ref_info.aniso_b[2][2] =
fast_atof(value);
264 }
else if (same_str(key,
"B12 (A**2)")) {
265 ref_info.aniso_b[0][1] =
fast_atof(value);
266 }
else if (same_str(key,
"B13 (A**2)")) {
267 ref_info.aniso_b[0][2] =
fast_atof(value);
268 }
else if (same_str(key,
"B23 (A**2)")) {
269 ref_info.aniso_b[1][2] =
fast_atof(value);
270 }
else if (same_str(key,
"ESD FROM LUZZATI PLOT (A)")) {
271 ref_info.luzzati_error =
fast_atof(value);
272 }
else if (same_str(key,
"DPI (BLOW EQ-10) BASED ON R VALUE (A)")) {
274 }
else if (same_str(key,
"DPI (BLOW EQ-9) BASED ON FREE R VALUE (A)")) {
275 ref_info.dpi_blow_rfree =
fast_atof(value);
276 }
else if (same_str(key,
"DPI (CRUICKSHANK) BASED ON R VALUE (A)")) {
277 ref_info.dpi_cruickshank_r =
fast_atof(value);
278 }
else if (same_str(key,
"DPI (CRUICKSHANK) BASED ON FREE R VALUE (A)")) {
279 ref_info.dpi_cruickshank_rfree =
fast_atof(value);
280 }
else if (same_str(key,
"CORRELATION COEFFICIENT FO-FC")) {
282 }
else if (same_str(key,
"CORRELATION COEFFICIENT FO-FC FREE")) {
283 ref_info.cc_fo_fc_free =
fast_atof(value);
284 }
else if (same_str(key,
"BOND LENGTHS")) {
285 add_restraint_count_weight(ref_info,
"t_bond_d", value);
286 }
else if (same_str(key,
"BOND ANGLES")) {
287 add_restraint_count_weight(ref_info,
"t_angle_deg", value);
288 }
else if (same_str(key,
"TORSION ANGLES")) {
289 add_restraint_count_weight(ref_info,
"t_dihedral_angle_d", value);
290 }
else if (same_str(key,
"TRIGONAL CARBON PLANES")) {
291 add_restraint_count_weight(ref_info,
"t_trig_c_planes", value);
292 }
else if (same_str(key,
"GENERAL PLANES")) {
293 add_restraint_count_weight(ref_info,
"t_gen_planes", value);
294 }
else if (same_str(key,
"ISOTROPIC THERMAL FACTORS")) {
295 add_restraint_count_weight(ref_info,
"t_it", value);
296 }
else if (same_str(key,
"BAD NON-BONDED CONTACTS")) {
297 add_restraint_count_weight(ref_info,
"t_nbd", value);
298 }
else if (same_str(key,
"IMPROPER TORSIONS")) {
299 add_restraint_count_weight(ref_info,
"t_improper_torsion", value);
300 }
else if (same_str(key,
"CHIRAL IMPROPER TORSION")) {
301 add_restraint_count_weight(ref_info,
"t_chiral_improper_torsion", value);
302 }
else if (same_str(key,
"SUM OF OCCUPANCIES")) {
303 add_restraint_count_weight(ref_info,
"t_sum_occupancies", value);
304 }
else if (same_str(key,
"UTILITY DISTANCES")) {
305 add_restraint_count_weight(ref_info,
"t_utility_distance", value);
306 }
else if (same_str(key,
"UTILITY ANGLES")) {
307 add_restraint_count_weight(ref_info,
"t_utility_angle", value);
308 }
else if (same_str(key,
"UTILITY TORSION")) {
309 add_restraint_count_weight(ref_info,
"t_utility_torsion", value);
310 }
else if (same_str(key,
"IDEAL-DIST CONTACT TERM")) {
311 add_restraint_count_weight(ref_info,
"t_ideal_dist_contact", value);
312 }
else if (same_str(key,
"BOND LENGTHS (A)")) {
313 impl::find_or_add(ref_info.restr_stats,
"t_bond_d").dev_ideal
314 = read_double(value, 50);
315 }
else if (same_str(key,
"BOND ANGLES (DEGREES)")) {
316 impl::find_or_add(ref_info.restr_stats,
"t_angle_deg").dev_ideal
317 = read_double(value, 50);
318 }
else if (same_str(key,
"PEPTIDE OMEGA TORSION ANGLES (DEGREES)")) {
319 impl::find_or_add(ref_info.restr_stats,
"t_omega_torsion").dev_ideal
320 = read_double(value, 50);
321 }
else if (same_str(key,
"OTHER TORSION ANGLES (DEGREES)")) {
322 impl::find_or_add(ref_info.restr_stats,
"t_other_torsion").dev_ideal
323 = read_double(value, 50);
324 }
else if (same_str(key,
"TLS GROUP")) {
325 ref_info.tls_groups.emplace_back();
326 ref_info.tls_groups.back().id = std::string(value, end);
327 }
else if (same_str(key,
"SET") ||
330 (same_str(key,
"SELECTION") && colon == line + 23)) {
331 if (!ref_info.tls_groups.empty()) {
332 TlsGroup& group = ref_info.tls_groups.back();
333 group.selections.emplace_back();
334 group.selections.back().details = std::string(value, end);
335 possibly_unfinished_remark3 = &group.selections.back().details;
337 }
else if (same_str(key,
"RESIDUE RANGE")) {
338 if (!ref_info.tls_groups.empty() && end > colon+21) {
339 TlsGroup& group = ref_info.tls_groups.back();
340 group.selections.emplace_back();
341 TlsGroup::Selection& sel = group.selections.back();
347 }
catch (std::invalid_argument&) {
348 group.selections.pop_back();
351 group.selections.pop_back();
354 }
else if (same_str(key,
"ORIGIN FOR THE GROUP (A)")) {
355 std::vector<std::string> xyz =
split_str_multi(std::string(value, end));
356 if (ref_info.tls_groups.empty() || xyz.size() != 3)
358 Position& origin = ref_info.tls_groups.back().origin;
362 }
else if (is_tls_item(key)) {
363 if (ref_info.tls_groups.empty())
365 TlsGroup& tls = ref_info.tls_groups.back();
367 for (
size_t i = 0; i + 1 < tokens.size(); i += 2) {
368 std::string& k = tokens[i];
369 if (k.size() == 4 && k[3] ==
':')
371 if (is_tls_item(k)) {
372 Mat33& m = k[0] ==
'T' ? tls.T : k[0] ==
'L' ? tls.L : tls.S;
375 m[x][y] = m[y][x] =
fast_atof(tokens[i+1].c_str());
380 if (same_str(key,
"DATA USED IN REFINEMENT.")) {
381 meta.refinement.emplace_back();
382 meta.refinement.back().id = std::to_string(meta.refinement.size());
383 }
else if (same_str(key,
"FIT IN THE HIGHEST RESOLUTION BIN.")) {
384 if (!meta.refinement.empty())
385 meta.refinement.back().bins.emplace_back();
390inline void read_remark_200_230_240(
const char* line, Metadata& meta,
391 std::string*& cryst_desc) {
394 if (line[10] ==
' ' && line[11] ==
' ') {
395 const char* start = line + 11;
396 cryst_desc->append(start,
rtrim_cstr(start) - start);
399 cryst_desc =
nullptr;
402 const char* key_start =
skip_blank(line + 10);
403 const char* colon = std::strchr(key_start,
':');
404 const char* key_end =
rtrim_cstr(key_start, colon);
405 std::string key(key_start, key_end);
409 if (end - value == 4 && std::strncmp(value,
"NULL", 4) == 0)
411 if (same_str(key,
"INTENSITY-INTEGRATION SOFTWARE")) {
413 }
else if (same_str(key,
"DATA SCALING SOFTWARE")) {
415 }
else if (same_str(key,
"SOFTWARE USED")) {
417 }
else if (same_str(key,
"METHOD USED TO DETERMINE THE STRUCTURE")) {
418 meta.solved_by = std::string(value, end);
419 }
else if (same_str(key,
"STARTING MODEL")) {
420 meta.starting_model = std::string(value, end);
421 }
else if (!meta.experiments.empty()) {
422 ExperimentInfo& exper = meta.experiments.back();
423 DiffractionInfo& diffr = meta.crystals.back().diffractions[0];
424 if (same_str(key,
"EXPERIMENT TYPE")) {
425 exper.method = std::string(value, end);
426 }
else if (same_str(key,
"NUMBER OF CRYSTALS USED")) {
427 exper.number_of_crystals = std::atoi(value);
428 }
else if (same_str(key,
"PH")) {
429 if (is_double(value))
430 meta.crystals.back().ph =
fast_atof(value);
432 meta.crystals.back().ph_range = std::string(value, end);
433 }
else if (same_str(key,
"DATE OF DATA COLLECTION")) {
434 diffr.collection_date = pdb_date_format_to_iso(std::string(value, end));
435 }
else if (same_str(key,
"TEMPERATURE (KELVIN)")) {
437 }
else if (same_str(key,
"SYNCHROTRON (Y/N)")) {
439 diffr.source =
"SYNCHROTRON";
440 }
else if (same_str(key,
"RADIATION SOURCE")) {
441 if (same_str(diffr.source,
"SYNCHROTRON"))
442 diffr.synchrotron = std::string(value, end);
444 diffr.source = std::string(value, end);
445 }
else if (same_str(key,
"NEUTRON SOURCE")) {
446 diffr.source = std::string(value, end);
447 }
else if (same_str(key,
"BEAMLINE")) {
448 diffr.beamline = std::string(value, end);
449 if (!diffr.synchrotron.empty() && diffr.source_type.empty())
450 diffr.source_type = diffr.synchrotron +
" BEAMLINE " + diffr.beamline;
451 }
else if (same_str(key,
"X-RAY GENERATOR MODEL")) {
452 diffr.source_type = std::string(value, end);
453 }
else if (same_str(key,
"MONOCHROMATIC OR LAUE (M/L)")) {
454 diffr.mono_or_laue = *value;
455 }
else if (same_str(key,
"WAVELENGTH OR RANGE (A)")) {
456 diffr.wavelengths = std::string(value, end);
457 }
else if (same_str(key,
"MONOCHROMATOR")) {
458 diffr.monochromator = std::string(value, end);
459 }
else if (same_str(key,
"OPTICS")) {
460 diffr.optics = std::string(value, end);
461 }
else if (same_str(key,
"DETECTOR TYPE")) {
462 diffr.detector = std::string(value, end);
463 }
else if (same_str(key,
"DETECTOR MANUFACTURER")) {
464 diffr.detector_make = std::string(value, end);
465 }
else if (same_str(key,
"NUMBER OF UNIQUE REFLECTIONS")) {
466 exper.unique_reflections = std::atoi(value);
467 }
else if (same_str(key,
"RESOLUTION RANGE HIGH (A)")) {
468 exper.reflections.resolution_high =
fast_atof(value);
469 }
else if (same_str(key,
"RESOLUTION RANGE LOW (A)")) {
470 exper.reflections.resolution_low =
fast_atof(value);
471 }
else if (same_str(key,
"COMPLETENESS FOR RANGE (%)")) {
472 exper.reflections.completeness =
fast_atof(value);
473 }
else if (same_str(key,
"DATA REDUNDANCY")) {
474 exper.reflections.redundancy =
fast_atof(value);
475 }
else if (same_str(key,
"R MERGE (I)")) {
476 exper.reflections.r_merge =
fast_atof(value);
477 }
else if (same_str(key,
"R SYM (I)")) {
478 exper.reflections.r_sym =
fast_atof(value);
479 }
else if (same_str(key,
"<I/SIGMA(I)> FOR THE DATA SET")) {
480 exper.reflections.mean_I_over_sigma =
fast_atof(value);
481 }
else if (same_str(key,
"REMARK")) {
482 cryst_desc = &meta.crystals.back().description;
483 *cryst_desc = std::string(value, end);
484 }
else if (!exper.shells.empty()) {
485 if (same_str(key,
"HIGHEST RESOLUTION SHELL, RANGE HIGH (A)")) {
486 exper.shells.back().resolution_high =
fast_atof(value);
487 }
else if (same_str(key,
"HIGHEST RESOLUTION SHELL, RANGE LOW (A)")) {
488 exper.shells.back().resolution_low =
fast_atof(value);
489 }
else if (same_str(key,
"COMPLETENESS FOR SHELL (%)")) {
490 exper.shells.back().completeness =
fast_atof(value);
491 }
else if (same_str(key,
"DATA REDUNDANCY IN SHELL")) {
492 exper.shells.back().redundancy =
fast_atof(value);
493 }
else if (same_str(key,
"R MERGE FOR SHELL (I)")) {
494 exper.shells.back().r_merge =
fast_atof(value);
495 }
else if (same_str(key,
"R SYM FOR SHELL (I)")) {
496 exper.shells.back().r_sym =
fast_atof(value);
497 }
else if (same_str(key,
"<I/SIGMA(I)> FOR SHELL")) {
498 exper.shells.back().mean_I_over_sigma =
fast_atof(value);
503 if (same_str(key,
"EXPERIMENTAL DETAILS")) {
504 meta.crystals.emplace_back();
505 CrystalInfo& c = meta.crystals.back();
506 c.id = std::to_string(meta.crystals.size());
507 c.diffractions.emplace_back();
508 c.diffractions[0].id = c.id;
509 meta.experiments.emplace_back();
510 meta.experiments.back().diffraction_ids.push_back(c.id);
511 if (line[8] ==
'0' && line[9] ==
'0')
512 c.diffractions[0].scattering_type =
"x-ray";
513 else if (line[8] ==
'3' && line[9] ==
'0')
514 c.diffractions[0].scattering_type =
"neutron";
515 else if (line[8] ==
'4' && line[9] ==
'0')
516 c.diffractions[0].scattering_type =
"electron";
518 if (same_str(key,
"IN THE HIGHEST RESOLUTION SHELL.")) {
519 if (!meta.experiments.empty())
520 meta.experiments.back().shells.emplace_back();
529 return pdb_impl::read_int(
remark.c_str() + 7, 3);
535 std::string*
cr_desc =
nullptr;
536 for (
const std::string&
remark :
st.raw_remarks)
539 pdb_impl::read_remark3_line(
remark.c_str(),
st.meta,
548 if (!
st.meta.remark_300_detail.empty()) {
549 st.meta.remark_300_detail +=
'\n';
551 }
else if (
remark.compare(11, 7,
"REMARK:") == 0) {
565 for (
const std::string&
remark : raw_remarks)
567 std::memcmp(&
remark[10],
" ", 5) == 0 &&
568 std::memcmp(&
remark[18],
"555 ", 6) == 0) {
569 if (pdb_impl::read_int(
remark.c_str() + 15, 3) != (
int)
ops.size() + 1)
570 fail(
"Symmetry operators not in order?: " +
remark);
Document read_string(const std::string &data)
int string_to_int(const char *p, bool checked, size_t length=0)
Op parse_triplet(const std::string &s)
const char * rtrim_cstr(const char *start, const char *end=nullptr)
double fast_atof(const char *p, const char **endptr=nullptr)
bool istarts_with(const std::string &str, const std::string &prefix)
std::string rtrim_str(const std::string &str)
void read_metadata_from_remarks(Structure &st)
from_chars_result fast_from_chars(const char *start, const char *end, double &d)
int remark_number(const std::string &remark)
void fail(const std::string &msg)
std::vector< std::string > split_str_multi(const std::string &str, const char *seps=" \t")
std::vector< Op > read_remark_290(const std::vector< std::string > &raw_remarks)
std::string trim_str(const std::string &str)
const char * skip_blank(const char *p)
int no_sign_atoi(const char *p, const char **endptr=nullptr)