Gemmi C++ API
Loading...
Searching...
No Matches
ccp4.hpp
Go to the documentation of this file.
1// Copyright 2018 Global Phasing Ltd.
2//
3// CCP4 format for maps and masks.
4
5#ifndef GEMMI_CCP4_HPP_
6#define GEMMI_CCP4_HPP_
7
8#include <cassert>
9#include <cmath> // for ceil, fabs, floor, round
10#include <cstdint> // for uint16_t, int32_t
11#include <cstdio> // for FILE
12#include <cstring> // for memcpy
13#include <array>
14#include <string>
15#include <type_traits> // for is_same
16#include <vector>
17#include "symmetry.hpp"
18#include "fail.hpp" // for fail
19#include "fileutil.hpp" // for file_open, is_little_endian, ...
20#include "input.hpp" // for FileStream
21#include "grid.hpp"
22
23namespace gemmi {
24
25using std::int32_t;
26
27// options for Ccp4<>::setup
28enum class MapSetup {
29 Full, // reorder and expand to the whole unit cell
30 NoSymmetry, // reorder and resize to the whole cell, but no symmetry ops
31 ReorderOnly // reorder axes to X, Y, Z
32};
33
34struct Ccp4Base {
35 DataStats hstats; // data statistics read from / written to ccp4 map
36 // stores raw headers if the grid was read from ccp4 map
37 std::vector<int32_t> ccp4_header;
38 bool same_byte_order = true;
39
40 // methods to access info from ccp4 headers, w is word number from the spec
41 void* header_word(int w) { return &ccp4_header.at(w - 1); }
42 const void* header_word(int w) const { return &ccp4_header.at(w - 1); }
43
44 int32_t header_i32(int w) const {
45 int32_t value = ccp4_header.at(w - 1);
46 if (!same_byte_order)
47 swap_four_bytes(&value);
48 return value;
49 }
50 std::array<int, 3> header_3i32(int w) const {
51 return {{ header_i32(w), header_i32(w+1), header_i32(w+2) }};
52 }
53 float header_float(int w) const {
54 int32_t int_value = header_i32(w);
55 float f;
56 std::memcpy(&f, &int_value, 4);
57 return f;
58 }
59 // ccp4 map header has mostly 80-byte strings
60 std::string header_str(int w, size_t len=80) const {
61 if (4 * ccp4_header.size() < 4 * (w - 1) + len)
62 fail("invalid end of string");
63 return std::string(reinterpret_cast<const char*>(header_word(w)), len);
64 }
65 void set_header_i32(int w, int32_t value) {
66 if (!same_byte_order)
67 swap_four_bytes(&value);
68 ccp4_header.at(w - 1) = value;
69 };
70 void set_header_3i32(int w, int32_t x, int32_t y, int32_t z) {
71 set_header_i32(w, x);
72 set_header_i32(w+1, y);
73 set_header_i32(w+2, z);
74 };
75 void set_header_float(int w, float value) {
76 int32_t int32_value;
77 std::memcpy(&int32_value, &value, 4);
79 };
80 void set_header_str(int w, const std::string& str) {
81 std::memcpy(header_word(w), str.c_str(), str.size());
82 }
83
84 std::array<int, 3> axis_positions() const {
85 if (ccp4_header.empty())
86 return {{0, 1, 2}}; // assuming it's X,Y,Z
87 std::array<int, 3> pos{{-1, -1, -1}};
88 for (int i = 0; i != 3; ++i) {
89 int mapi = header_i32(17 + i);
90 if (mapi <= 0 || mapi > 3 || pos[mapi - 1] != -1)
91 fail("Incorrect MAPC/MAPR/MAPS records");
92 pos[mapi - 1] = i;
93 }
94 return pos;
95 }
96
97 double header_rfloat(int w) const { // rounded to 5 digits
98 return std::round(1e5 * header_float(w)) / 1e5;
99 }
100
103 // cf. setup()
104 auto pos = axis_positions();
105 std::array<int, 3> start = header_3i32(5);
106 std::array<int, 3> size = header_3i32(1);
107 std::array<int, 3> sampl = header_3i32(8);
108 for (int i = 0; i < 3; ++i) {
109 double scale = 1. / sampl[i];
110 int p = pos[i];
111 box.minimum.at(i) = scale * start[p] - 1e-9;
112 box.maximum.at(i) = scale * (start[p] + size[p] - 1) + 1e-9;
113 }
114 return box;
115 }
116
117 // Skew transformation (words 25-37) is supported by CCP4 maplib and PyMOL,
118 // but it's not in the MRC format and is not supported by most programs.
119 // From maplib.html: Skew transformation is from standard orthogonal
120 // coordinate frame (as used for atoms) to orthogonal map frame, as
121 // Xo(map) = S * (Xo(atoms) - t)
123 return header_i32(25) != 0; // LSKFLG should be 0 or 1
124 }
126 return {
127 // 26-34 SKWMAT
128 { header_float(26), header_float(27), header_float(28),
130 header_float(32), header_float(33), header_float(34) },
131 // 35-37 SKWTRN
132 { header_float(35), header_float(36), header_float(37) }
133 };
134 }
135
136 // ORIGIN (words 50-52), used in MRC format, zeros in CCP4 format
138 return Position(header_float(50), header_float(51), header_float(52));
139 }
140};
141
142template<typename T=float>
143struct Ccp4 : public Ccp4Base {
145
146 // this function assumes that the whole unit cell is covered with offset 0
149 if (grid.spacegroup)
150 ops = grid.spacegroup->operations();
151 ccp4_header.clear();
152 ccp4_header.resize(256 + ops.order() * 20, 0);
153 set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ
154 set_header_3i32(5, 0, 0, 0); // NXSTART, NYSTART, NZSTART
156 set_header_3i32(8, grid.nu, grid.nv, grid.nw); // MX, MY, MZ
157 else // grid.axis_order == AxisOrder::ZYX
159 set_header_float(11, (float) grid.unit_cell.a);
160 set_header_float(12, (float) grid.unit_cell.b);
161 set_header_float(13, (float) grid.unit_cell.c);
163 set_header_float(15, (float) grid.unit_cell.beta);
166 set_header_3i32(17, 1, 2, 3); // MAPC, MAPR, MAPS
167 else // grid.axis_order == AxisOrder::ZYX
168 set_header_3i32(17, 3, 2, 1);
169 set_header_i32(23, grid.spacegroup ? grid.spacegroup->ccp4 : 1); // ISPG
170 set_header_i32(24, ops.order() * 80); // NSYMBT
171 set_header_str(27, "CCP4"); // EXTTYP
172 set_header_i32(28, 20140); // NVERSION
173 set_header_str(53, "MAP ");
174 set_header_i32(54, is_little_endian() ? 0x00004144 : 0x11110000); // MACHST
175 set_header_i32(56, 1); // labels
176 std::memset(header_word(57), ' ', 800 + ops.order() * 80);
177 set_header_str(57, "written by GEMMI");
178 int n = 257;
179 for (Op op : ops) {
180 set_header_str(n, op.triplet());
181 n += 20;
182 }
183 }
184
187 void update_ccp4_header(int mode=-1, bool update_stats=true) {
188 if (mode > 2 && mode != 6)
189 fail("Only modes 0, 1, 2 and 6 are supported.");
190 if (grid.point_count() == 0)
191 fail("update_ccp4_header(): set the grid first (it has size 0)");
193 fail("update_ccp4_header(): run setup() first");
194 if (update_stats)
196 if (ccp4_header.empty())
198 assert(ccp4_header.size() >= 256);
199 if (mode < 0) {
201 if (mode < 0)
202 fail("update_ccp4_header: specify map mode explicitly (usually 2)");
203 }
205 set_header_float(20, (float) hstats.dmin);
206 set_header_float(21, (float) hstats.dmax);
207 set_header_float(22, (float) hstats.dmean);
208 set_header_float(55, (float) hstats.rms);
209 // labels could be modified but it's not important
210 }
211
212 static int mode_for_data() {
213 if (std::is_same<T, std::int8_t>::value)
214 return 0;
215 if (std::is_same<T, std::int16_t>::value)
216 return 1;
217 if (std::is_same<T, float>::value)
218 return 2;
219 if (std::is_same<T, std::uint16_t>::value)
220 return 6;
221 return -1;
222 }
223
224 bool full_cell() const {
225 if (ccp4_header.empty())
226 return true; // assuming it's full cell
227 return
228 // NXSTART et al. must be 0
229 header_i32(5) == 0 && header_i32(6) == 0 && header_i32(7) == 0 &&
230 // MX == NX
231 header_i32(8) == grid.nu && header_i32(9) == grid.nv && header_i32(10) == grid.nw;
232 }
233
234 template<typename Stream>
235 void read_ccp4_header(Stream& f, const std::string& path) {
236 const size_t hsize = 256;
237 ccp4_header.resize(hsize);
238 if (!f.read(ccp4_header.data(), 4 * hsize))
239 fail("Failed to read map header: " + path);
240 if (header_str(53, 4) != "MAP ")
241 fail("Not a CCP4 map: " + path);
242 std::string machst = header_str(54, 4);
243 if (machst[0] != 0x44 && machst[0] != 0x11)
244 fail("Unsupported machine stamp (endianness) in the file?");
245 same_byte_order = machst[0] == (is_little_endian() ? 0x44 : 0x11);
248 size_t ext_w = header_i32(24) / 4; // NSYMBT in words
249 if (ext_w != 0) {
250 if (ext_w > 1000000)
251 fail("Unexpectedly long extended header: " + path);
252 ccp4_header.resize(hsize + ext_w);
253 if (!f.read(ccp4_header.data() + hsize, 4 * ext_w))
254 fail("Failed to read extended header: " + path);
255 }
256 grid.nu = header_i32(1);
257 grid.nv = header_i32(2);
258 grid.nw = header_i32(3);
259 for (int i = 0; i < 3; ++i) {
260 int axis = header_i32(17 + i);
261 if (axis < 1 || axis > 3)
262 fail("Unexpected axis value in word " + std::to_string(17 + i)
263 + ": " + std::to_string(axis));
264 }
268 hstats.rms = header_float(55);
270 auto pos = axis_positions();
272 if (pos[0] == 0 && pos[1] == 1 && pos[2] == 2 && full_cell()) {
275 }
276 }
277
279 void set_extent(const Box<Fractional>& box);
280
281 template<typename Stream>
282 void read_ccp4_stream(Stream f, const std::string& path);
283
284 void read_ccp4_file(const std::string& path) {
285 fileptr_t f = file_open(path.c_str(), "rb");
286 read_ccp4_stream(FileStream{f.get()}, path);
287 }
288
289 inline void read_ccp4_from_memory(const char* data, size_t size,
290 const std::string& name) {
291 read_ccp4_stream(MemoryStream(data, size), name);
292 }
293
294 template<typename Input>
296 if (input.is_stdin())
298 else if (input.is_compressed())
299 read_ccp4_stream(input.get_uncompressing_stream(), input.path());
300 else
301 read_ccp4_file(input.path());
302 }
303
304 void write_ccp4_map(const std::string& path) const;
305};
306
307
308namespace impl {
309
310template<typename From, typename To>
311To translate_map_point(From f) { return static_cast<To>(f); }
312// We convert map 2 to 0 by translating non-zero values to 1.
313template<> inline
314std::int8_t translate_map_point<float,std::int8_t>(float f) { return f != 0; }
315
316template<typename Stream, typename TFile, typename TMem>
317void read_data(Stream& f, std::vector<TMem>& content) {
318 if (std::is_same<TFile, TMem>::value) {
319 size_t len = content.size();
320 if (!f.read(content.data(), sizeof(TMem) * len))
321 fail("Failed to read all the data from the map file.");
322 } else {
323 constexpr size_t chunk_size = 64 * 1024;
324 std::vector<TFile> work(chunk_size);
325 for (size_t i = 0; i < content.size(); i += chunk_size) {
326 size_t len = std::min(chunk_size, content.size() - i);
327 if (!f.read(work.data(), sizeof(TFile) * len))
328 fail("Failed to read all the data from the map file.");
329 for (size_t j = 0; j < len; ++j)
330 content[i+j] = translate_map_point<TFile,TMem>(work[j]);
331 }
332 }
333}
334
335template<typename TFile, typename TMem>
336void write_data(const std::vector<TMem>& content, FILE* f) {
337 if (std::is_same<TMem, TFile>::value) {
338 size_t len = content.size();
339 if (std::fwrite(content.data(), sizeof(TFile), len, f) != len)
340 sys_fail("Failed to write data to the map file");
341 } else {
342 constexpr size_t chunk_size = 64 * 1024;
343 std::vector<TFile> work(chunk_size);
344 for (size_t i = 0; i < content.size(); i += chunk_size) {
345 size_t len = std::min(chunk_size, content.size() - i);
346 for (size_t j = 0; j < len; ++j)
347 work[j] = static_cast<TFile>(content[i+j]);
348 if (std::fwrite(work.data(), sizeof(TFile), len, f) != len)
349 sys_fail("Failed to write data to the map file");
350 }
351 }
352}
353
354} // namespace impl
355
356// This function was tested only on little-endian machines,
357// let us know if you need support for other architectures.
358template<typename T> template<typename Stream>
359void Ccp4<T>::read_ccp4_stream(Stream f, const std::string& path) {
360 read_ccp4_header(f, path);
361 grid.data.resize(grid.point_count());
362 int mode = header_i32(4);
363 if (mode == 0)
364 impl::read_data<Stream, std::int8_t>(f, grid.data);
365 else if (mode == 1)
366 impl::read_data<Stream, std::int16_t>(f, grid.data);
367 else if (mode == 2)
368 impl::read_data<Stream, float>(f, grid.data);
369 else if (mode == 6)
370 impl::read_data<Stream, std::uint16_t>(f, grid.data);
371 else
372 fail("Mode " + std::to_string(mode) + " is not supported "
373 "(only 0, 1, 2 and 6 are supported).");
374 //if (std::fgetc(f) != EOF)
375 // fail("The map file is longer then expected.");
376
377 if (!same_byte_order) {
378 if (sizeof(T) == 2)
379 for (T& value : grid.data)
381 else if (sizeof(T) == 4)
382 for (T& value : grid.data)
384 }
385}
386
387template<typename T>
389 if (grid.axis_order == AxisOrder::XYZ || ccp4_header.empty())
390 return;
391 // cell sampling does not change
392 const std::array<int, 3> sampl = header_3i32(8);
393 // get old metadata
394 const std::array<int, 3> pos = axis_positions();
395 std::array<int, 3> start = header_3i32(5);
396 int end[3] = { start[0] + grid.nu, start[1] + grid.nv, start[2] + grid.nw };
397 // set new metadata
399 set_header_3i32(5, start[pos[0]], start[pos[1]], start[pos[2]]);
400 for (int i = 0; i < 3; ++i) {
401 end[i] -= start[i];
402 start[i] = 0;
403 }
404 int crs[3] = { grid.nu, grid.nv, grid.nw };
405 grid.nu = crs[pos[0]];
406 grid.nv = crs[pos[1]];
407 grid.nw = crs[pos[2]];
408 } else {
409 grid.nu = sampl[0];
410 grid.nv = sampl[1];
411 grid.nw = sampl[2];
412 set_header_3i32(5, 0, 0, 0); // start
413 }
414 set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ
415 set_header_3i32(17, 1, 2, 3); // axes (MAPC, MAPR, MAPS)
416 grid.axis_order = full_cell() ? AxisOrder::XYZ : AxisOrder::Unknown;
417 if (grid.axis_order == AxisOrder::XYZ)
418 grid.calculate_spacing();
419
420 // now set the data
421 {
422 std::vector<T> full(grid.point_count(), default_value);
423 int it[3];
424 int idx = 0;
425 for (it[2] = start[2]; it[2] < end[2]; it[2]++) // sections
426 for (it[1] = start[1]; it[1] < end[1]; it[1]++) // rows
427 for (it[0] = start[0]; it[0] < end[0]; it[0]++) { // cols
428 T val = grid.data[idx++];
429 size_t new_index = grid.index_s(it[pos[0]], it[pos[1]], it[pos[2]]);
430 full[new_index] = val;
431 }
432 grid.data = std::move(full);
433 }
434
435 if (mode == MapSetup::Full &&
436 // no need to apply symmetry if we started with the whole cell
437 (end[pos[0]] - start[pos[0]] < sampl[0] ||
438 end[pos[1]] - start[pos[1]] < sampl[1] ||
439 end[pos[2]] - start[pos[2]] < sampl[2]))
440 grid.symmetrize_nondefault(default_value);
441}
442
443template<typename T>
445 if (ccp4_header.empty())
446 fail("set_extent(): no header in the map. Call update_ccp4_header() first");
447 if (!full_cell())
448 fail("Ccp4::set_extent() works only after setup()");
449 if (grid.axis_order != AxisOrder::XYZ)
450 fail("Ccp4::set_extent() works only with XYZ order");
451 int u0 = (int)std::ceil(box.minimum.x * grid.nu);
452 int v0 = (int)std::ceil(box.minimum.y * grid.nv);
453 int w0 = (int)std::ceil(box.minimum.z * grid.nw);
454 int nu = (int)std::floor(box.maximum.x * grid.nu) - u0 + 1;
455 int nv = (int)std::floor(box.maximum.y * grid.nv) - v0 + 1;
456 int nw = (int)std::floor(box.maximum.z * grid.nw) - w0 + 1;
457 // set the data
458 std::vector<T> new_data((size_t)nu * nv * nw);
459 grid.get_subarray(new_data.data(), {u0, v0, w0}, {nu, nv, nw});
460 grid.data.swap(new_data);
461 // and metadata
462 grid.nu = nu;
463 grid.nv = nv;
464 grid.nw = nw;
465 set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ
466 set_header_3i32(5, u0, v0, w0);
467 // AxisOrder::XYZ is used only for grid covering full cell
468 grid.axis_order = AxisOrder::Unknown;
469}
470
471template<typename T>
472void Ccp4<T>::write_ccp4_map(const std::string& path) const {
473 assert(ccp4_header.size() >= 256);
474 fileptr_t f = file_open(path.c_str(), "wb");
475 std::fwrite(ccp4_header.data(), 4, ccp4_header.size(), f.get());
476 int mode = header_i32(4);
477 if (mode == 0)
478 impl::write_data<std::int8_t>(grid.data, f.get());
479 else if (mode == 1)
480 impl::write_data<std::int16_t>(grid.data, f.get());
481 else if (mode == 2)
482 impl::write_data<float>(grid.data, f.get());
483 else if (mode == 6)
484 impl::write_data<std::uint16_t>(grid.data, f.get());
485}
486
487} // namespace gemmi
488#endif
void swap_four_bytes(void *start)
Definition fileutil.hpp:83
std::unique_ptr< std::FILE, decltype(&std::fclose)> fileptr_t
Definition fileutil.hpp:37
MapSetup
Definition ccp4.hpp:28
bool is_little_endian()
Definition fileutil.hpp:73
const SpaceGroup * find_spacegroup_by_number(int ccp4) noexcept
fileptr_t file_open(const char *path, const char *mode)
Definition fileutil.hpp:39
void swap_two_bytes(void *start)
Definition fileutil.hpp:78
GEMMI_COLD void sys_fail(const std::string &msg)
Definition fail.hpp:71
DataStats calculate_data_statistics(const std::vector< T > &data)
Definition stats.hpp:83
void fail(const std::string &msg)
Definition fail.hpp:59
void set_header_str(int w, const std::string &str)
Definition ccp4.hpp:80
std::string header_str(int w, size_t len=80) const
Definition ccp4.hpp:60
std::array< int, 3 > axis_positions() const
Definition ccp4.hpp:84
void set_header_i32(int w, int32_t value)
Definition ccp4.hpp:65
DataStats hstats
Definition ccp4.hpp:35
void set_header_3i32(int w, int32_t x, int32_t y, int32_t z)
Definition ccp4.hpp:70
Position get_origin() const
Definition ccp4.hpp:137
std::array< int, 3 > header_3i32(int w) const
Definition ccp4.hpp:50
Box< Fractional > get_extent() const
Definition ccp4.hpp:101
int32_t header_i32(int w) const
Definition ccp4.hpp:44
float header_float(int w) const
Definition ccp4.hpp:53
void * header_word(int w)
Definition ccp4.hpp:41
double header_rfloat(int w) const
Definition ccp4.hpp:97
void set_header_float(int w, float value)
Definition ccp4.hpp:75
const void * header_word(int w) const
Definition ccp4.hpp:42
Transform get_skew_transformation() const
Definition ccp4.hpp:125
bool has_skew_transformation() const
Definition ccp4.hpp:122
bool same_byte_order
Definition ccp4.hpp:38
std::vector< int32_t > ccp4_header
Definition ccp4.hpp:37
void read_ccp4_from_memory(const char *data, size_t size, const std::string &name)
Definition ccp4.hpp:289
void update_ccp4_header(int mode=-1, bool update_stats=true)
If the header is empty, prepare it; otherwise, update only MODE and, if update_stats==true,...
Definition ccp4.hpp:187
void setup(T default_value, MapSetup mode=MapSetup::Full)
Definition ccp4.hpp:388
static int mode_for_data()
Definition ccp4.hpp:212
Grid< T > grid
Definition ccp4.hpp:144
void read_ccp4_file(const std::string &path)
Definition ccp4.hpp:284
void write_ccp4_map(const std::string &path) const
Definition ccp4.hpp:472
void prepare_ccp4_header_except_mode_and_stats()
Definition ccp4.hpp:147
void set_extent(const Box< Fractional > &box)
Definition ccp4.hpp:444
void read_ccp4_header(Stream &f, const std::string &path)
Definition ccp4.hpp:235
bool full_cell() const
Definition ccp4.hpp:224
void read_ccp4(Input &&input)
Definition ccp4.hpp:295
void read_ccp4_stream(Stream f, const std::string &path)
Definition ccp4.hpp:359
std::vector< T > data
Definition grid.hpp:235
AxisOrder axis_order
Definition grid.hpp:165
const SpaceGroup * spacegroup
Definition grid.hpp:163
size_t point_count() const
Definition grid.hpp:167
UnitCell unit_cell
Definition grid.hpp:162
void calculate_spacing()
set spacing and orth_n
Definition grid.hpp:328
Coordinates in Angstroms - orthogonal (Cartesian) coordinates.
Definition unitcell.hpp:32
void set(double a_, double b_, double c_, double alpha_, double beta_, double gamma_)
Definition unitcell.hpp:280