14#include <initializer_list>
31template <typename T, typename FP=typename std::iterator_traits<T>::value_type>
35 while (
i != end && std::isnan(*
i))
59 std::pair<Miller, int>
hkl_isym = asu_.to_asu(hkl, group_ops_);
84 float min_value =
NAN;
85 float max_value =
NAN;
95 float&
operator[](std::size_t n) {
return parent->
data[idx + n * stride()]; }
96 float operator[](std::size_t n)
const {
return parent->
data[idx + n * stride()]; }
97 float&
at(std::size_t n) {
return parent->
data.at(idx + n * stride()); }
98 float at(std::size_t n)
const {
return parent->
data.at(idx + n * stride()); }
100 return type ==
'H' || type ==
'B' || type ==
'Y' || type ==
'I';
104 if (idx + 1 < parent->
columns.size()) {
130 floats.resize(156, 0.);
145 return UnitCell(floats[0], floats[1], floats[2],
146 floats[3], floats[4], floats[5]);
164 return Mat33(floats[6], floats[9], floats[12],
165 floats[7], floats[10], floats[13],
166 floats[8], floats[11], floats[14]);
171 bool same_byte_order =
true;
172 bool indices_switched_to_original =
false;
173 std::int64_t header_offset = 0;
176 int nreflections = 0;
177 std::array<int, 5> sort_order = {};
183 int spacegroup_number = 0;
195 std::ostream* warnings =
nullptr;
203 same_byte_order =
o.same_byte_order;
204 header_offset =
o.header_offset;
205 version_stamp = std::move(
o.version_stamp);
206 title = std::move(
o.title);
207 nreflections =
o.nreflections;
208 sort_order =
o.sort_order;
209 min_1_d2 =
o.min_1_d2;
210 max_1_d2 =
o.max_1_d2;
213 cell = std::move(
o.cell);
214 spacegroup_number =
o.spacegroup_number;
215 spacegroup_name = std::move(
o.spacegroup_name);
216 symops = std::move(
o.symops);
217 spacegroup =
o.spacegroup;
218 datasets = std::move(
o.datasets);
219 columns = std::move(
o.columns);
220 batches = std::move(
o.batches);
221 history = std::move(
o.history);
222 appended_text = std::move(
o.appended_text);
223 data = std::move(
o.data);
224 warnings =
o.warnings;
233 datasets.push_back({0,
"HKL_base",
"HKL_base",
"HKL_base", cell, 0.});
234 for (
int i = 0;
i != 3; ++
i)
235 add_column(std::string(1,
"HKL"[
i]),
'H', 0,
i,
false);
245 if (
ds.id == dataset &&
ds.cell.is_crystal() &&
ds.cell.a > 0)
251 return const_cast<Mtz*
>(
this)->get_cell(dataset);
263 for (
int i = 0;
i < 6; ++
i)
265 double avg[6] = {0., 0., 0., 0., 0., 0.};
267 for (
int i = 0;
i < 6; ++
i) {
273 if (
avg[0] <= 0 ||
avg[1] <= 0 ||
avg[2] <= 0 ||
274 avg[3] <= 0 ||
avg[4] <= 0 ||
avg[5] <= 0)
276 size_t n = batches.size();
277 for (
int i = 0;
i < 6; ++
i)
281 for (
int i = 0;
i < 6; ++
i)
283 for (
int i = 0;
i < 6; ++
i)
284 rmsd[
i] = std::sqrt(rmsd[
i] / n);
291 spacegroup_number =
new_sg ? spacegroup->
ccp4 : 0;
292 spacegroup_name =
new_sg ? spacegroup->
hm :
"";
296 if (datasets.empty())
297 fail(
"MTZ dataset not found (missing DATASET header line?).");
298 return datasets.back();
301 if ((
size_t)
id < datasets.size() && datasets[
id].id ==
id)
306 fail(
"MTZ file has no dataset with ID " + std::to_string(
id));
309 return const_cast<Mtz*
>(
this)->dataset(
id);
313 if (
d.dataset_name == name)
318 return const_cast<Mtz*
>(
this)->dataset_with_name(label);
320 int count(
const std::string& label)
const {
322 for (
const Column& col : columns)
323 if (col.label == label)
329 for (
const Column& col : columns)
330 if (col.type == type)
336 for (
Column& col : columns)
337 if (col.label == label && (!
ds ||
ds->id == col.dataset_id))
343 return const_cast<Mtz*
>(
this)->column_with_label(label,
ds);
347 if (
const Column* col = column_with_label(label,
ds))
349 fail(
"Column label not found: " + label);
352 std::vector<const Column*>
cols;
353 for (
const Column& col : columns)
354 if (col.type == type)
355 cols.push_back(&col);
360 std::vector<int>
cols;
361 for (
int i = 0;
i < (
int) columns.size(); ++
i)
362 if (columns[
i].type == col_type)
371 std::vector<std::pair<int,int>>
r;
372 for (
int i = 0;
i < (
int) columns.size(); ++
i) {
373 const Column& col = columns[
i];
375 if (
sign_pos != std::string::npos) {
378 for (
int j = 0;
j < (
int) columns.size(); ++
j)
380 columns[
j].type == col.
type &&
382 r.emplace_back(
i,
j);
391 for (
const char* label :
labels) {
392 if (
const Column* col = column_with_label(label))
399 std::initializer_list<const char*>
labels) {
400 for (
Column& col : columns)
401 if (col.type == type) {
402 for (
const char* label :
labels)
403 if (col.label == label)
411 return column_with_type_and_one_of_labels(
'I',
412 {
"FREE",
"RFREE",
"FREER",
"FreeR_flag",
"R-free-flags",
"FreeRflag"});
415 return const_cast<Mtz*
>(
this)->rfree_column();
419 return column_with_type_and_one_of_labels(
'J', {
"IMEAN",
"I",
"IOBS",
"I-obs"});
422 return const_cast<Mtz*
>(
this)->imean_column();
426 return column_with_type_and_one_of_labels(
'K', {
"I(+)",
"IOBS(+)",
"I-obs(+)"});
429 return const_cast<Mtz*
>(
this)->iplus_column();
433 return column_with_type_and_one_of_labels(
'K', {
"I(-)",
"IOBS(-)",
"I-obs(-)"});
436 return const_cast<Mtz*
>(
this)->iminus_column();
440 return data.size() == columns.size() * nreflections;
446 for (
size_t i = 0;
i < data.size();
i += columns.size()) {
447 double res =
uc.calculate_1_d2_double(data[
i+0], data[
i+1], data[
i+2]);
456 if (!has_data() || columns.size() < 3)
459 double max_value = 0.;
461 extend_min_max_1_d2(cell, min_value, max_value);
464 if (
ds.cell.is_crystal() &&
ds.cell.a > 0 &&
ds.cell != cell &&
466 extend_min_max_1_d2(
ds.cell, min_value, max_value);
471 return {{min_value, max_value}};
475 std::array<double,2>
reso = calculate_min_max_1_d2();
483 same_byte_order = !same_byte_order;
487 template<
typename Stream>
491 if (!stream.read(buf, 20))
492 fail(
"Could not read the MTZ file (is it empty?)");
493 if (buf[0] !=
'M' || buf[1] !=
'T' || buf[2] !=
'Z' || buf[3] !=
' ')
494 fail(
"Not an MTZ file - it does not start with 'MTZ '");
508 if (!same_byte_order)
512 std::memcpy(&header_offset, buf + 12, 8);
513 if (!same_byte_order) {
522 while (*
line !=
'\0' && !std::isspace(*
line))
524 while (std::isspace(*
line))
536 return UnitCell(a, b, c, alpha, beta, gamma);
539 template<
typename T>
void warn(
const T&
text)
const {
541 *warnings <<
text << std::endl;
544 template<
typename Stream>
546 std::ptrdiff_t pos = 4 * std::ptrdiff_t(header_offset - 1);
547 if (!stream.seek(pos))
548 fail(
"Cannot rewind to the MTZ header at byte " + std::to_string(pos));
552 template<
typename Stream>
555 seek_headers(stream);
572 fail(
"Wrong NCOL header");
577 cell = read_cell_parameters(
args);
580 for (
int& n : sort_order)
585 symops.reserve(nsymop);
592 else if (
const char* end = std::strchr(++
args,
'\''))
593 spacegroup_name.assign(
args, end);
615 columns.emplace_back();
616 Column& col = columns.back();
623 col.
idx = columns.size() - 1;
628 fail(
"MTZ: COLSRC before COLUMN?");
639 datasets.emplace_back();
642 datasets.back().wavelength = 0.0;
648 warn(
"MTZ CRYSTAL line: unusual numbering.");
654 warn(
"MTZ DATASET line: unusual numbering.");
658 datasets.back().cell = read_cell_parameters(
args);
660 warn(
"MTZ DCELL line: unusual numbering.");
667 warn(
"MTZ DWAV line: unusual numbering.");
678 if (
ncol != (
int) columns.size())
679 fail(
"Number of COLU records inconsistent with NCOL record.");
681 fail(
"BATCH header inconsistent with NCOL record.");
685 template<
typename Stream>
692 const char* end =
rtrim_cstr(start, start+80);
693 history.emplace_back(start, end);
698 warn(
"Wrong MTZ: number of headers should be between 0 and 30");
704 stream.read(buf, 80);
706 fail(
"Missing BH header");
713 fail(
"Wrong BH header");
714 stream.read(buf, 80);
715 const char* end =
rtrim_cstr(buf + 6, buf+76);
716 batch.title.assign(buf, end - buf);
721 stream.read(buf, 80);
723 fail(
"Missing BHCH header");
728 appended_text = stream.read_rest();
735 warn(
"MTZ: unrecognized spacegroup name: " + spacegroup_name);
738 if (spacegroup->
ccp4 != spacegroup_number)
739 warn(
"MTZ: inconsistent spacegroup name and number");
742 d.cell.set_cell_images_from_spacegroup(spacegroup);
745 template<
typename Stream>
747 size_t n = columns.size() * nreflections;
749 if (!stream.seek(80))
750 fail(
"Cannot rewind to the MTZ data.");
751 if (!stream.read(data.data(), 4 * n))
752 fail(
"Error when reading MTZ data");
753 if (!same_byte_order)
754 for (
float& f : data)
758 template<
typename Stream>
760 read_first_bytes(stream);
761 read_main_headers(stream);
762 read_history_and_batch_headers(stream);
764 if (datasets.empty())
765 datasets.push_back({0,
"HKL_base",
"HKL_base",
"HKL_base", cell, 0.});
768 template<
typename Stream>
770 read_all_headers(stream);
772 read_raw_data(stream);
780 }
catch (std::runtime_error&
e) {
781 fail(std::string(
e.what()) +
": " + path);
785 template<
typename Input>
787 source_path =
input.path();
788 if (
input.is_stdin()) {
805 fail(
"Wrong use_first arg in Mtz::sort.");
806 std::vector<int>
indices(nreflections);
807 for (
int i = 0;
i != nreflections; ++
i)
810 int a = i * (int) columns.size();
811 int b = j * (int) columns.size();
812 for (int n = 0; n < use_first; ++n)
813 if (data[a+n] != data[b+n])
814 return data[a+n] < data[b+n];
822 sort_order = {{0, 0, 0, 0, 0}};
824 sort_order[
i] =
i + 1;
827 std::vector<float>
new_data(data.size());
828 size_t w = columns.size();
839 for (
int i = 0;
i != 3; ++
i)
840 data[
offset +
i] =
static_cast<float>(hkl[
i]);
867 datasets.push_back({id, name, name, name, cell, 0.0});
868 return datasets.back();
873 if (datasets.empty())
874 fail(
"No datasets.");
876 dataset_id = datasets.back().id;
879 if (pos > (
int) columns.size())
880 fail(
"Requested column position after the end.");
882 pos = (
int) columns.size();
883 auto col = columns.emplace(columns.begin() + pos);
884 for (
auto i = col + 1;
i != columns.end(); ++
i)
886 col->dataset_id = dataset_id;
892 expand_data_rows(1, pos);
899 fail(
msg,
": data not read yet");
900 if (idx >= columns.size())
901 fail(
msg,
": no column with 0-based index ", std::to_string(idx));
907 fail(
"data in source mtz not read yet");
923 dst.min_value =
src.min_value;
924 dst.max_value =
src.max_value;
926 dst.dataset_id =
src.dataset_id;
930 for (
size_t n = 0; n < data.size(); n += columns.size())
935 std::vector<int>
dst_indices = sorted_row_indices();
976 fail(
"copy_column(): data not read yet");
988 for (
int i = 0; i <= (int) trailing_cols.size(); ++i)
989 add_column(
"",
' ', -1, dest_idx + i,
false);
990 expand_data_rows(1 + trailing_cols.size(), dest_idx);
992 const Column& src_col_now = col_idx < 0 ? src_col : columns[col_idx];
994 do_replace_column(dest_idx, src_col_now, trailing_cols);
995 return columns[dest_idx];
999 check_column(idx,
"remove_column()");
1000 columns.erase(columns.begin() + idx);
1001 for (
size_t i = idx;
i < columns.size(); ++
i)
1004 assert(columns.size() * nreflections == data.size());
1007 template <
typename Func>
1011 auto out = data.begin();
1012 size_t width = columns.size();
1013 for (
auto r = data.begin();
r < data.end();
r += width)
1016 std::copy(
r,
r + width,
out);
1019 data.erase(
out, data.end());
1020 nreflections =
int(data.size() / width);
1026 fail(
"Internal error");
1027 size_t pos = pos_ == -1 ?
old_row_size : (size_t) pos_;
1029 fail(
"expand_data_rows(): pos out of range");
1034 size_t ncols = columns.size();
1036 fail(
"Mtz.set_data(): expected " + std::to_string(
ncols) +
" columns.");
1047 template<
typename Write>
void write_to_stream(
Write write)
const;
1053 mtz.read_file(path);
1057template<
typename Input>
1078 fail(
"MTZ file has no column with label: " + label);
1087 size_t size()
const {
return mtz_.columns.size() * mtz_.nreflections; }
1088 float get_num(
size_t n)
const {
return data_[n]; }
Op parse_triplet(const std::string &s)
void swap_eight_bytes(void *start)
void swap_four_bytes(void *start)
std::unique_ptr< std::FILE, decltype(&std::fclose)> fileptr_t
MtzDataProxy data_proxy(const Mtz &mtz)
const char * rtrim_cstr(const char *start, const char *end=nullptr)
int simple_atoi(const char *p, const char **endptr=nullptr)
double fast_atof(const char *p, const char **endptr=nullptr)
const SpaceGroup * find_spacegroup_by_name(std::string name, double alpha=0., double gamma=0.) noexcept
constexpr int ialpha3_id(const char *s)
std::array< int, 3 > Miller
A synonym for convenient passing of hkl.
std::string rtrim_str(const std::string &str)
void vector_insert_columns(std::vector< T > &data, size_t old_width, size_t length, size_t n, size_t pos, T new_value)
std::string read_word(const char *line)
fileptr_t file_open(const char *path, const char *mode)
void split_str_into_multi(const std::string &str, const char *seps, std::vector< std::string > &result)
Mtz read_mtz_file(const std::string &path)
constexpr float sq(float x)
std::array< FP, 2 > calculate_min_max_disregarding_nans(T begin, T end)
void fail(const std::string &msg)
void vector_remove_column(std::vector< T > &data, size_t new_width, size_t pos)
const char * skip_word(const char *p)
Mtz read_mtz(Input &&input, bool with_data)
constexpr int ialpha4_id(const char *s)
const char * skip_blank(const char *p)
const SpaceGroup * spacegroup() const
size_t column_index(const std::string &label) const
const UnitCell & unit_cell() const
Miller get_hkl(size_t offset) const
float get_num(size_t n) const
MtzExternalDataProxy(const Mtz &mtz, const float *data)
Miller get_hkl(size_t offset) const
float get_num(size_t n) const
std::vector< float > floats
std::vector< std::string > axes
void set_wavelength(float lambda)
UnitCell get_cell() const
void set_cell(const UnitCell &uc)
void set_dataset_id(int id)
float & operator[](std::size_t n)
float at(std::size_t n) const
const Column * get_next_column_if_type(char next_type) const
float operator[](std::size_t n) const
const_iterator begin() const
float & at(std::size_t n)
const_iterator end() const
const Dataset & dataset() const
const Column * column_with_one_of_labels(std::initializer_list< const char * > labels) const
void check_trailing_cols(const Column &src_col, const std::vector< std::string > &trailing_cols) const
Mtz & operator=(Mtz &&o) noexcept
std::vector< Dataset > datasets
void read_first_bytes(Stream &stream)
const Column * iplus_column() const
Column & add_column(const std::string &label, char type, int dataset_id, int pos, bool expand_data)
int count_type(char type) const
std::vector< Batch > batches
const Column & get_column_with_label(const std::string &label, const Dataset *ds=nullptr) const
void warn(const T &text) const
void write_to_cstream(std::FILE *stream) const
void extend_min_max_1_d2(const UnitCell &uc, double &min, double &max) const
void expand_to_p1()
Change symmetry to P1 and expand reflections.
std::vector< int > positions_of_columns_with_type(char col_type) const
const Dataset * dataset_with_name(const std::string &label) const
std::string appended_text
bool sort(int use_first=3)
const Column * column_with_label(const std::string &label, const Dataset *ds=nullptr) const
Dataset & add_dataset(const std::string &name)
const UnitCell & get_cell(int dataset=-1) const
const Column * iminus_column() const
Column * column_with_label(const std::string &label, const Dataset *ds=nullptr)
bool switch_to_asu_hkl()
(for unmerged MTZ only) change HKL to ASU equivalent and set ISYM
void seek_headers(Stream &stream)
void write_to_file(const std::string &path) const
void ensure_asu(bool tnt_asu=false)
(for merged MTZ only) change HKL to ASU equivalent, adjust phases, etc
Column * column_with_type_and_one_of_labels(char type, std::initializer_list< const char * > labels)
void set_spacegroup(const SpaceGroup *new_sg)
std::string spacegroup_name
double resolution_low() const
void read_file_gz(const std::string &path, bool with_data=true)
the same as read_input(MaybeGzipped(path), with_data)
Dataset & dataset(int id)
void read_stream(Stream &&stream, bool with_data)
std::vector< const Column * > columns_with_type(char type) const
Column & copy_column(int dest_idx, const Column &src_col, const std::vector< std::string > &trailing_cols={})
static UnitCell read_cell_parameters(const char *line)
void read_raw_data(Stream &stream)
std::vector< int > sorted_row_indices(int use_first=3) const
void read_input(Input &&input, bool with_data)
void reindex(const Op &op, std::ostream *out)
reindex data, usually followed by ensure_asu()
void read_main_headers(Stream &stream)
std::array< double, 2 > calculate_min_max_1_d2() const
std::vector< std::pair< int, int > > positions_of_plus_minus_columns() const
const Column * rfree_column() const
const Column * imean_column() const
void set_hkl(size_t offset, const Miller &hkl)
void do_replace_column(size_t dest_idx, const Column &src_col, const std::vector< std::string > &trailing_cols)
std::vector< std::string > history
double resolution_high() const
void set_cell_for_all(const UnitCell &new_cell)
Dataset * dataset_with_name(const std::string &name)
void read_file(const std::string &path)
Mtz & operator=(Mtz const &)=delete
UnitCell get_average_cell_from_batch_headers(double *rmsd) const
void read_history_and_batch_headers(Stream &stream)
Column & replace_column(size_t dest_idx, const Column &src_col, const std::vector< std::string > &trailing_cols={})
bool switch_to_original_hkl()
(for unmerged MTZ only) change HKL according to M/ISYM
void set_data(const float *new_data, size_t n)
static const char * skip_word(const char *line)
const Dataset & dataset(int id) const
std::vector< Column > columns
void remove_rows_if(Func condition)
Mtz(bool with_base=false)
size_t find_offset_of_hkl(const Miller &hkl, size_t start=0) const
Returns offset of the first hkl or (size_t)-1. Can be slow.
void write_to_string(std::string &str) const
void expand_data_rows(size_t added, int pos_=-1)
Miller get_hkl(size_t offset) const
std::string version_stamp
void check_column(size_t idx, const char *msg) const
void read_all_headers(Stream &stream)
const SpaceGroup * spacegroup
UnitCell & get_cell(int dataset=-1)
int count(const std::string &label) const
std::vector< float > data
void remove_column(size_t idx)
GroupOps operations() const
void set_cell_images_from_spacegroup(const SpaceGroup *sg)
UnmergedHklMover(const SpaceGroup *spacegroup)
int move_to_asu(std::array< int, 3 > &hkl)