5#ifndef GEMMI_CIF2MTZ_HPP_
6#define GEMMI_CIF2MTZ_HPP_
11#include <unordered_map>
22template<
typename DataProxy>
27 std::unordered_map<Op::Miller, int, MillerHash>
seen;
37 if ((
r.first->second & sign) != 0 ||
centric) {
40 r.first->second |= sign;
50 if (
rb.refln_loop ==
nullptr)
52 for (
const std::string& tag :
rb.refln_loop->tags) {
53 if (tag.size() < 7 + 6)
64 tag !=
"_refln.pdbx_r_free_flag")))
80 std::vector<int> positions;
81 positions.reserve(13);
82 for (
const char* tag : {
"_refln.index_h",
"_refln.index_k",
"_refln.index_l"}) {
85 fail(
"while reading old anomalous: _refln.index_{h,k,l} not found");
86 positions.push_back(pos);
88 for (
const char* tag : {
"_refln.status",
"_refln.pdbx_r_free_flag"}) {
91 positions.push_back(pos);
95 ret.tags.reserve(positions.size());
96 for (
int p : positions)
99 {
"_refln.F_meas_au",
"_refln.F_meas_sigma_au"},
100 {
"_refln.intensity_meas",
"_refln.intensity_sigma"}
103 {
"_refln.pdbx_F_plus",
"_refln.pdbx_F_plus_sigma",
104 "_refln.pdbx_F_minus",
"_refln.pdbx_F_minus_sigma"},
105 {
"_refln.pdbx_I_plus",
"_refln.pdbx_I_plus_sigma",
106 "_refln.pdbx_I_minus",
"_refln.pdbx_I_minus_sigma"}
109 for (
int n = 0; n < 2; ++n) {
112 positions.insert(positions.end(), {idx, idx+1, idx, idx+1});
117 fail(
"while reading old anomalous: _refln has neither F_meas_au nor intensity_meas");
120 std::unordered_map<Miller, std::string*, MillerHash>
seen;
128 for (
size_t j = 0;
j < 3; ++
j)
132 std::string*
new_row =
ret.values.data() +
ret.values.size();
136 for (
int p : positions)
140 for (
int j = 0;
j < 3; ++
j)
150 for (
int j = 0;
j < 3; ++
j)
170 static const char*
merged[] = {
171 "pdbx_r_free_flag FreeR_flag I 0",
172 "status FreeR_flag I 0 o=1,f=0",
173 "intensity_meas IMEAN J 1",
174 "F_squared_meas IMEAN J 1",
175 "intensity_sigma SIGIMEAN Q 1",
176 "F_squared_sigma SIGIMEAN Q 1",
177 "pdbx_I_plus I(+) K 1",
178 "pdbx_I_plus_sigma SIGI(+) M 1",
179 "pdbx_I_minus I(-) K 1",
180 "pdbx_I_minus_sigma SIGI(-) M 1",
183 "F_meas_sigma SIGFP Q 1",
184 "F_meas_sigma_au SIGFP Q 1",
185 "pdbx_F_plus F(+) G 1",
186 "pdbx_F_plus_sigma SIGF(+) L 1",
187 "pdbx_F_minus F(-) G 1",
188 "pdbx_F_minus_sigma SIGF(-) L 1",
189 "pdbx_anom_difference DP D 1",
190 "pdbx_anom_difference_sigma SIGDP Q 1",
193 "phase_calc PHIC P 1",
196 "pdbx_HL_A_iso HLA A 1",
197 "pdbx_HL_B_iso HLB A 1",
198 "pdbx_HL_C_iso HLC A 1",
199 "pdbx_HL_D_iso HLD A 1",
201 "pdbx_PHWT PHWT P 1",
202 "pdbx_DELFWT DELFWT F 1",
203 "pdbx_DELPHWT PHDELWT P 1",
207 "intensity_meas I J 0",
208 "intensity_net I J 0",
209 "intensity_sigma SIGI Q 0",
210 "pdbx_detector_x XDET R 0",
211 "pdbx_detector_y YDET R 0",
212 "pdbx_scan_angle ROT R 0",
226 std::vector<std::string>
tokens;
230 fail(
"line should have 4 or 5 words: " +
line);
241 tokens.push_back(
"o=1,f=0");
246 for (
const std::string& item : items) {
247 size_t pos = item.find(
'=');
248 if (pos == std::string::npos)
249 fail(
"wrong mapping (", item,
") in: ",
line);
251 auto result = fast_float::from_chars(item.c_str() + pos + 1,
252 item.c_str() + item.size(), f);
253 if (
result.ec != std::errc())
254 fail(
"failed to parse value in ", item,
" in: ",
line);
263 if (
c2n.first.size() == 1 &&
c2n.first[0] == v[0])
284 mtz.title =
title.empty() ?
"Converted from mmCIF block " +
rb.block.name :
title;
290 mtz.spacegroup =
rb.spacegroup;
291 mtz.add_dataset(
"HKL_base");
293 const cif::Loop* loop =
rb.refln_loop ?
rb.refln_loop :
rb.diffrn_refln_loop;
295 fail(
"_refln category not found in mmCIF block: " +
rb.block.name);
302 else if (
rb.wavelength_count > 1)
303 out <<
"Warning: ignoring wavelengths, " <<
rb.wavelength_count
304 <<
" are present in block " <<
rb.block.name <<
".\n";
306 ds.wavelength =
rb.wavelength;
310 out <<
"Searching tags with known MTZ equivalents ...\n";
312 std::vector<const Entry*>
entries;
313 std::string tag = loop->
tags[0];
314 const size_t tag_offset =
rb.tag_offset();
328 tag.replace(tag_offset, std::string::npos,
"index_h");
329 for (
char c : {
'h',
'k',
'l'}) {
333 fail(
"Miller index tag not found: " + tag);
336 auto col =
mtz.columns.emplace(
mtz.columns.end());
344 auto col =
mtz.columns.emplace(
mtz.columns.end());
347 col->label =
"M/ISYM";
349 col =
mtz.columns.emplace(
mtz.columns.end());
352 col->label =
"BATCH";
358 tag.replace(tag_offset, std::string::npos, entry.refln_tag);
362 if (
mtz.column_with_label(entry.col_label))
366 entries.push_back(entry.code_to_number.empty() ?
nullptr : &entry);
367 auto col =
mtz.columns.emplace(
mtz.columns.end());
369 col->dataset_id =
unmerged ? 0 : entry.dataset_id;
370 col->type = entry.col_type;
371 col->label = entry.col_label;
373 out <<
" " << tag <<
" -> " << col->label <<
'\n';
378 for (
size_t i = 0;
i !=
mtz.columns.size(); ++
i) {
380 mtz.columns[
i].idx =
i;
384 std::unique_ptr<UnmergedHklMover>
hkl_mover;
392 tag.replace(tag_offset, std::string::npos,
"diffrn_id");
395 out <<
"No diffrn_id. Assuming a single sweep.\n";
396 tag.replace(tag_offset, std::string::npos,
"pdbx_image_id");
400 out <<
"No pdbx_image_id, setting BATCH to a dummy value.\n";
402 out <<
" " << tag <<
" & diffrn_id -> BATCH\n";
407 float wavelength = 0.f;
411 std::map<int, SweepInfo>
sweeps;
415 {
"diffrn_id",
"wavelength_id"});
417 {
"id",
"wavelength"});
420 for (
size_t i = 0;
i < loop->
values.size();
i += loop->
tags.size()) {
427 frame = (
int) std::ceil(
d);
437 }
catch(std::exception&) {}
442 }
catch(std::exception&) {}
444 sweep.frame_ids.insert(frame);
453 ds = &
mtz.add_dataset(
si.crystal_id);
455 ds->project_name =
"unknown";
456 ds->wavelength =
si.wavelength;
458 si.dataset_id =
ds->id;
464 it.second.offset =
cap;
465 cap += *--
it.second.frame_ids.end() + 1100;
470 if (
p.sweep_id >= 0 &&
p.frame_id >= 0)
471 p.frame_id +=
sweeps.at(
p.sweep_id).offset;
499 mtz.data.resize(
mtz.columns.size() *
mtz.nreflections);
500 size_t k = 0,
row = 0;
501 for (
size_t i = 0;
i < loop->
values.size();
i += loop->
tags.size()) {
503 std::array<int, 3> hkl;
504 for (
size_t ii = 0;
ii != 3; ++
ii)
507 for (
size_t j = 0;
j != 3; ++
j)
508 mtz.data[
k++] = (
float) hkl[
j];
512 for (
size_t j = 0;
j != 3; ++
j)
515 for (
size_t j = 3;
j !=
indices.size(); ++
j) {
523 if (std::isnan(
mtz.data[
k]))
524 out <<
"Value #" <<
i +
indices[
j] <<
" in the loop is not a number: "
537 if (
mtz.is_merged() &&
mode ==
'a') {
541 out <<
"NOTE: data in " <<
rb.block.name
542 <<
" is read as \"old-style\" anomalous (" <<
rb.refln_loop->length()
549 out <<
"WARNING: in " <<
rb.block.name <<
", out of "
550 <<
rb.refln_loop->length() <<
" HKLs, only " <<
type_unique.second
551 <<
" are unique under symmetry; the rest are equivalent to Friedel mates\n";
554 out <<
"WARNING: in " <<
rb.block.name <<
", out of "
555 <<
rb.refln_loop->length() <<
" HKLs, only " <<
type_unique.second
556 <<
" are unique under symmetry\n";
558 out <<
"Possibly unmerged data - you may use option --refln-to=unmerged\n";
565 static float status_to_freeflag(
const std::string& str) {
567 if (c ==
'\'' || c ==
'"')
double as_number(const std::string &s, double nan=NAN)
std::string as_string(const std::string &value)
bool is_null(const std::string &value)
int as_int(const std::string &str)
const SpaceGroup & get_spacegroup_p1()
std::array< int, 3 > Miller
A synonym for convenient passing of hkl.
std::pair< DataType, size_t > check_data_type_under_symmetry(const DataProxy &proxy)
void split_str_into_multi(const std::string &str, const char *seps, std::vector< std::string > &result)
cif::Loop transcript_old_anomalous_to_standard(const cif::Loop &loop, const SpaceGroup *sg)
Before refln.pdbx_F_plus/minus was introduced, anomalous data was stored as two F_meas_au reflections...
bool possible_old_style(const ReflnBlock &rb, DataType data_type)
void fail(const std::string &msg)
std::vector< std::string > split_str(const std::string &str, S sep)
constexpr int ialpha4_id(const char *s)
float translate_code_to_number(const std::string &v) const
std::vector< std::pair< std::string, float > > code_to_number
Entry(const std::string &line)
Mtz auto_convert_block_to_mtz(ReflnBlock &rb, std::ostream &out, char mode) const
std::vector< std::string > spec_lines
Mtz convert_block_to_mtz(const ReflnBlock &rb, std::ostream &out) const
static const char ** default_spec(bool for_merged)
std::vector< std::string > history
bool is_centrosymmetric() const
std::pair< Op::Miller, bool > to_asu_sign(const Op::Miller &hkl, const GroupOps &gops) const
Similar to to_asu(), but the second returned value is sign: true for + or centric.
Table find(const std::string &prefix, const std::vector< std::string > &tags)
int find_tag(const std::string &tag) const
std::vector< std::string > tags
std::vector< std::string > values
int find_tag_lc(const std::string &lctag) const