5#ifndef GEMMI_CIF2MTZ_HPP_
6#define GEMMI_CIF2MTZ_HPP_
10#include <unordered_map>
25 if (
rb.refln_loop ==
nullptr)
27 for (
const std::string& tag :
rb.refln_loop->tags) {
28 if (tag.size() < 7 + 6)
39 tag !=
"_refln.pdbx_r_free_flag")))
55 std::vector<int> positions;
56 positions.reserve(13);
57 for (
const char* tag : {
"_refln.index_h",
"_refln.index_k",
"_refln.index_l"}) {
60 fail(
"while reading old anomalous: _refln.index_{h,k,l} not found");
61 positions.push_back(pos);
63 for (
const char* tag : {
"_refln.status",
"_refln.pdbx_r_free_flag"}) {
66 positions.push_back(pos);
70 ret.tags.reserve(positions.size());
71 for (
int p : positions)
74 {
"_refln.F_meas_au",
"_refln.F_meas_sigma_au"},
75 {
"_refln.intensity_meas",
"_refln.intensity_sigma"}
78 {
"_refln.pdbx_F_plus",
"_refln.pdbx_F_plus_sigma",
79 "_refln.pdbx_F_minus",
"_refln.pdbx_F_minus_sigma"},
80 {
"_refln.pdbx_I_plus",
"_refln.pdbx_I_plus_sigma",
81 "_refln.pdbx_I_minus",
"_refln.pdbx_I_minus_sigma"}
84 for (
int n = 0; n < 2; ++n) {
87 positions.insert(positions.end(), {idx, idx+1, idx, idx+1});
92 fail(
"while reading old anomalous: _refln has neither F_meas_au nor intensity_meas");
95 std::unordered_map<Miller, std::string*, MillerHash>
seen;
103 for (
size_t j = 0;
j < 3; ++
j)
107 std::string*
new_row =
ret.values.data() +
ret.values.size();
111 for (
int p : positions)
115 for (
int j = 0;
j < 3; ++
j)
125 for (
int j = 0;
j < 3; ++
j)
145 static const char* merged[] = {
146 "pdbx_r_free_flag FreeR_flag I 0",
147 "status FreeR_flag I 0 o=1,f=0",
148 "intensity_meas IMEAN J 1",
149 "F_squared_meas IMEAN J 1",
150 "intensity_sigma SIGIMEAN Q 1",
151 "F_squared_sigma SIGIMEAN Q 1",
152 "pdbx_I_plus I(+) K 1",
153 "pdbx_I_plus_sigma SIGI(+) M 1",
154 "pdbx_I_minus I(-) K 1",
155 "pdbx_I_minus_sigma SIGI(-) M 1",
158 "F_meas_sigma SIGFP Q 1",
159 "F_meas_sigma_au SIGFP Q 1",
160 "pdbx_F_plus F(+) G 1",
161 "pdbx_F_plus_sigma SIGF(+) L 1",
162 "pdbx_F_minus F(-) G 1",
163 "pdbx_F_minus_sigma SIGF(-) L 1",
164 "pdbx_anom_difference DP D 1",
165 "pdbx_anom_difference_sigma SIGDP Q 1",
168 "phase_calc PHIC P 1",
169 "pdbx_F_calc_with_solvent F-model F 1",
170 "pdbx_phase_calc_with_solvent PHIF-model P 1",
173 "pdbx_HL_A_iso HLA A 1",
174 "pdbx_HL_B_iso HLB A 1",
175 "pdbx_HL_C_iso HLC A 1",
176 "pdbx_HL_D_iso HLD A 1",
178 "pdbx_PHWT PHWT P 1",
179 "pdbx_DELFWT DELFWT F 1",
180 "pdbx_DELPHWT PHDELWT P 1",
184 "intensity_meas I J 0",
185 "intensity_net I J 0",
186 "intensity_sigma SIGI Q 0",
187 "pdbx_detector_x XDET R 0",
188 "pdbx_detector_y YDET R 0",
189 "pdbx_scan_angle ROT R 0",
203 std::vector<std::string>
tokens;
207 fail(
"line should have 4 or 5 words: " +
line);
218 tokens.push_back(
"o=1,f=0");
223 for (
const std::string& item : items) {
224 size_t pos = item.find(
'=');
225 if (pos == std::string::npos)
226 fail(
"wrong mapping (", item,
") in: ",
line);
228 auto result = fast_float::from_chars(item.c_str() + pos + 1,
229 item.c_str() + item.size(), f);
230 if (
result.ec != std::errc())
231 fail(
"failed to parse value in ", item,
" in: ",
line);
240 if (
c2n.first.size() == 1 &&
c2n.first[0] == v[0])
260 mtz.title =
title.empty() ?
"Converted from mmCIF block " +
rb.block.name :
title;
266 mtz.spacegroup =
rb.spacegroup;
267 mtz.add_dataset(
"HKL_base");
269 const cif::Loop* loop =
rb.refln_loop ?
rb.refln_loop :
rb.diffrn_refln_loop;
271 fail(
"_refln category not found in mmCIF block: " +
rb.block.name);
278 else if (
rb.wavelength_count > 1)
279 logger.
note(
"ignoring wavelengths, ",
rb.wavelength_count,
280 " are present in block ",
rb.block.name,
'.');
282 ds.wavelength =
rb.wavelength;
285 logger.
level<7>(
"Searching tags with known MTZ equivalents ...");
287 std::vector<const Entry*>
entries;
288 std::string tag = loop->
tags[0];
289 const size_t tag_offset =
rb.tag_offset();
291 auto set_tag = [&](
const std::string&
t) { tag.replace(tag_offset, std::string::npos,
t); };
306 for (
char c : {
'h',
'k',
'l'}) {
310 fail(
"Miller index tag not found: " + tag);
313 auto col =
mtz.columns.emplace(
mtz.columns.end());
321 auto col =
mtz.columns.emplace(
mtz.columns.end());
324 col->label =
"M/ISYM";
326 col =
mtz.columns.emplace(
mtz.columns.end());
329 col->label =
"BATCH";
339 if (
mtz.column_with_label(entry.col_label))
343 entries.push_back(entry.code_to_number.empty() ?
nullptr : &entry);
344 auto col =
mtz.columns.emplace(
mtz.columns.end());
346 col->dataset_id =
unmerged ? 0 : entry.dataset_id;
347 col->type = entry.col_type;
348 col->label = entry.col_label;
349 logger.
level<7>(
" ", tag,
" -> ", col->label);
354 for (
size_t i = 0;
i !=
mtz.columns.size(); ++
i) {
356 mtz.columns[
i].idx =
i;
360 std::unique_ptr<UnmergedHklMover>
hkl_mover;
371 logger.
level<7>(
"No diffrn_id. Assuming a single sweep.");
376 logger.
mesg(
"No pdbx_image_id, setting BATCH to a dummy value.");
378 logger.
mesg(
" ", tag,
" & diffrn_id -> BATCH");
383 float wavelength = 0.f;
387 std::map<int, SweepInfo>
sweeps;
391 {
"diffrn_id",
"wavelength_id"});
393 {
"id",
"wavelength"});
396 for (
size_t i = 0;
i < loop->
values.size();
i += loop->
tags.size()) {
403 frame = (
int) std::ceil(
d);
413 }
catch(std::exception&) {}
418 }
catch(std::exception&) {}
420 sweep.frame_ids.insert(frame);
429 ds = &
mtz.add_dataset(
si.crystal_id);
431 ds->project_name =
"unknown";
432 ds->wavelength =
si.wavelength;
434 si.dataset_id =
ds->id;
440 it.second.offset =
cap;
441 cap += *--
it.second.frame_ids.end() + 1100;
446 if (
p.sweep_id >= 0 &&
p.frame_id >= 0)
447 p.frame_id +=
sweeps.at(
p.sweep_id).offset;
475 mtz.data.resize(
mtz.columns.size() *
mtz.nreflections);
476 size_t k = 0,
row = 0;
477 for (
size_t i = 0;
i < loop->
values.size();
i += loop->
tags.size()) {
479 std::array<int, 3> hkl;
480 for (
size_t ii = 0;
ii != 3; ++
ii)
483 for (
size_t j = 0;
j != 3; ++
j)
484 mtz.data[
k++] = (
float) hkl[
j];
488 for (
size_t j = 0;
j != 3; ++
j)
491 for (
size_t j = 3;
j !=
indices.size(); ++
j) {
499 if (std::isnan(
mtz.data[
k]))
500 logger.
mesg(
"Value #",
i +
indices[
j],
" in the loop is not a number: ", v);
512 if (
mtz.is_merged() &&
mode ==
'a') {
516 logger.
note(
"data in ",
rb.block.name,
517 " is read as \"old-style\" anomalous (",
rb.refln_loop->length(),
524 logger.
err(
"in ",
rb.block.name,
", out of ",
526 " are unique under symmetry; the rest are equivalent to Friedel mates");
529 logger.
err(
"in ",
rb.block.name,
", out of ",
531 " are unique under symmetry");
533 logger.
mesg(
"Possibly unmerged data - you may use option --refln-to=unmerged");
540 static float status_to_freeflag(
const std::string& str) {
542 if (c ==
'\'' || c ==
'"')
struct Document that represents the CIF file (but can also be read from a different representation,...
fail(), unreachable() and __declspec/__attribute__ macros
Class Intensities that reads multi-record data from MTZ, mmCIF or XDS_ASCII and merges it into mean o...
Logger - a tiny utility for passing messages through a callback.
MTZ reflection file format.
double as_number(const std::string &s, double nan=NAN)
std::string as_string(const std::string &value)
bool is_null(const std::string &value)
int as_int(const std::string &str)
const SpaceGroup & get_spacegroup_p1()
std::array< int, 3 > Miller
A synonym for convenient passing of hkl.
std::pair< DataType, size_t > check_data_type_under_symmetry(const DataProxy &proxy)
void split_str_into_multi(const std::string &str, const char *seps, std::vector< std::string > &result)
cif::Loop transcript_old_anomalous_to_standard(const cif::Loop &loop, const SpaceGroup *sg)
Before _refln.pdbx_F_plus/minus was introduced, anomalous data was stored as two F_meas_au reflection...
bool possible_old_style(const ReflnBlock &rb, DataType data_type)
void fail(const std::string &msg)
std::vector< std::string > split_str(const std::string &str, S sep)
constexpr int ialpha4_id(const char *s)
Utilities for parsing CIF numbers (the CIF spec calls them 'numb').
Reads reflection data from the mmCIF format.
float translate_code_to_number(const std::string &v) const
std::vector< std::pair< std::string, float > > code_to_number
Entry(const std::string &line)
std::vector< std::string > spec_lines
Mtz auto_convert_block_to_mtz(ReflnBlock &rb, Logger &logger, char mode) const
static const char ** default_spec(bool for_merged)
Mtz convert_block_to_mtz(const ReflnBlock &rb, Logger &logger) const
std::vector< std::string > history
Passes messages (including warnings/errors) to a callback function.
void mesg(Args const &... args) const
Send a message without any prefix.
GEMMI_COLD void err(Args const &... args) const
Send a warning/error (see Quirk above).
void level(Args const &... args) const
Send a message without any prefix on with a numeric threshold N.
int threshold
Pass messages of this level and all lower (more severe) levels: 8=all, 6=all but debug,...
void note(Args const &... args) const
Send a note (a notice, a significant message).
std::pair< Op::Miller, bool > to_asu_sign(const Op::Miller &hkl, const GroupOps &gops) const
Similar to to_asu(), but the second returned value is sign: true for + or centric.
Table find(const std::string &prefix, const std::vector< std::string > &tags)
int find_tag(const std::string &tag) const
std::vector< std::string > tags
std::vector< std::string > values
int find_tag_lc(const std::string &lctag) const