Gemmi C++ API
Loading...
Searching...
No Matches
ccp4.hpp
Go to the documentation of this file.
1// Copyright 2018 Global Phasing Ltd.
2//
3// CCP4 format for maps and masks.
4
5#ifndef GEMMI_CCP4_HPP_
6#define GEMMI_CCP4_HPP_
7
8#include <cassert>
9#include <cmath> // for ceil, fabs, floor, round
10#include <cstdint> // for uint16_t, int32_t
11#include <cstdio> // for FILE
12#include <cstring> // for memcpy
13#include <array>
14#include <string>
15#include <type_traits> // for is_same
16#include <vector>
17#include "symmetry.hpp"
18#include "fail.hpp" // for fail
19#include "fileutil.hpp" // for file_open, is_little_endian, ...
20#include "input.hpp" // for AnyStream, FileStream
21#include "grid.hpp"
22
23namespace gemmi {
24
25using std::int32_t;
26
27// options for Ccp4<>::setup
28enum class MapSetup {
29 Full, // reorder and expand to the whole unit cell
30 NoSymmetry, // reorder and resize to the whole cell, but no symmetry ops
31 ReorderOnly // reorder axes to X, Y, Z
32};
33
34struct Ccp4Base {
35 DataStats hstats; // data statistics read from / written to ccp4 map
36 // stores raw headers if the grid was read from ccp4 map
37 std::vector<int32_t> ccp4_header;
38 bool same_byte_order = true;
39
40 // methods to access info from ccp4 headers, w is word number from the spec
41 void* header_word(int w) { return &ccp4_header.at(w - 1); }
42 const void* header_word(int w) const { return &ccp4_header.at(w - 1); }
43
44 int32_t header_i32(int w) const {
45 int32_t value = ccp4_header.at(w - 1);
46 if (!same_byte_order)
47 swap_four_bytes(&value);
48 return value;
49 }
50 std::array<int, 3> header_3i32(int w) const {
51 return {{ header_i32(w), header_i32(w+1), header_i32(w+2) }};
52 }
53 float header_float(int w) const {
54 int32_t int_value = header_i32(w);
55 float f;
56 std::memcpy(&f, &int_value, 4);
57 return f;
58 }
59 std::string header_str(int w, size_t len=80) const {
60 if (4 * ccp4_header.size() < 4 * (w - 1) + len)
61 fail("invalid end of string");
62 return std::string(reinterpret_cast<const char*>(header_word(w)), len);
63 }
64 void set_header_i32(int w, int32_t value) {
65 if (!same_byte_order)
66 swap_four_bytes(&value);
67 ccp4_header.at(w - 1) = value;
68 }
69 void set_header_3i32(int w, int32_t x, int32_t y, int32_t z) {
70 set_header_i32(w, x);
71 set_header_i32(w+1, y);
72 set_header_i32(w+2, z);
73 }
74 void set_header_float(int w, float value) {
75 int32_t int32_value;
76 std::memcpy(&int32_value, &value, 4);
78 }
79 void set_header_str(int w, const std::string& str) {
80 std::memcpy(header_word(w), str.c_str(), str.size());
81 }
82
83 std::array<int, 3> axis_positions() const {
84 if (ccp4_header.empty())
85 return {{0, 1, 2}}; // assuming it's X,Y,Z
86 std::array<int, 3> pos{{-1, -1, -1}};
87 for (int i = 0; i != 3; ++i) {
88 int mapi = header_i32(17 + i);
89 if (mapi <= 0 || mapi > 3 || pos[mapi - 1] != -1)
90 fail("Incorrect MAPC/MAPR/MAPS records");
91 pos[mapi - 1] = i;
92 }
93 return pos;
94 }
95
96 double header_rfloat(int w) const { // rounded to 5 digits
97 return std::round(1e5 * header_float(w)) / 1e5;
98 }
99
102 // cf. setup()
103 auto pos = axis_positions();
104 std::array<int, 3> start = header_3i32(5);
105 std::array<int, 3> size = header_3i32(1);
106 std::array<int, 3> sampl = header_3i32(8);
107 for (int i = 0; i < 3; ++i) {
108 double scale = 1. / sampl[i];
109 int p = pos[i];
110 box.minimum.at(i) = scale * start[p] - 1e-9;
111 box.maximum.at(i) = scale * (start[p] + size[p] - 1) + 1e-9;
112 }
113 return box;
114 }
115
116 // Skew transformation (words 25-37) is supported by CCP4 maplib and PyMOL,
117 // but it's not in the MRC format and is not supported by most programs.
118 // From maplib.html: Skew transformation is from standard orthogonal
119 // coordinate frame (as used for atoms) to orthogonal map frame, as
120 // Xo(map) = S * (Xo(atoms) - t)
122 return header_i32(25) != 0; // LSKFLG should be 0 or 1
123 }
125 return {
126 // 26-34 SKWMAT
127 { header_float(26), header_float(27), header_float(28),
129 header_float(32), header_float(33), header_float(34) },
130 // 35-37 SKWTRN
131 { header_float(35), header_float(36), header_float(37) }
132 };
133 }
134
135 // ORIGIN (words 50-52), used in MRC format, zeros in CCP4 format
137 return Position(header_float(50), header_float(51), header_float(52));
138 }
139
140 // this function assumes that the whole unit cell is covered with offset 0
143 if (grid.spacegroup)
144 ops = grid.spacegroup->operations();
145 ccp4_header.clear();
146 ccp4_header.resize(256 + ops.order() * 20, 0);
147 set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ
148 set_header_3i32(5, 0, 0, 0); // NXSTART, NYSTART, NZSTART
149 if (grid.axis_order == AxisOrder::XYZ)
150 set_header_3i32(8, grid.nu, grid.nv, grid.nw); // MX, MY, MZ
151 else // grid.axis_order == AxisOrder::ZYX
152 set_header_3i32(8, grid.nw, grid.nv, grid.nu);
153 set_header_float(11, (float) grid.unit_cell.a);
154 set_header_float(12, (float) grid.unit_cell.b);
155 set_header_float(13, (float) grid.unit_cell.c);
156 set_header_float(14, (float) grid.unit_cell.alpha);
157 set_header_float(15, (float) grid.unit_cell.beta);
158 set_header_float(16, (float) grid.unit_cell.gamma);
159 if (grid.axis_order == AxisOrder::XYZ)
160 set_header_3i32(17, 1, 2, 3); // MAPC, MAPR, MAPS
161 else // grid.axis_order == AxisOrder::ZYX
162 set_header_3i32(17, 3, 2, 1);
163 set_header_i32(23, grid.spacegroup ? grid.spacegroup->ccp4 : 1); // ISPG
164 set_header_i32(24, ops.order() * 80); // NSYMBT
165 set_header_str(27, "CCP4"); // EXTTYP
166 set_header_i32(28, 20140); // NVERSION
167 set_header_str(53, "MAP ");
168 set_header_i32(54, is_little_endian() ? 0x00004144 : 0x11110000); // MACHST
169 set_header_i32(56, 1); // labels
170 std::memset(header_word(57), ' ', 800 + ops.order() * 80);
171 set_header_str(57, "written by GEMMI");
172 int n = 257;
173 for (Op op : ops) {
174 set_header_str(n, op.triplet());
175 n += 20;
176 }
177 }
178
181 set_header_float(20, (float) hstats.dmin);
182 set_header_float(21, (float) hstats.dmax);
183 set_header_float(22, (float) hstats.dmean);
184 set_header_float(55, (float) hstats.rms);
185 // labels could be modified but it's not important
186 }
187
188 bool full_cell_(const GridMeta& grid) const {
189 if (ccp4_header.empty())
190 return true; // assuming it's full cell
191 return
192 // NXSTART et al. must be 0
193 header_i32(5) == 0 && header_i32(6) == 0 && header_i32(7) == 0 &&
194 // MX == NX
195 header_i32(8) == grid.nu && header_i32(9) == grid.nv && header_i32(10) == grid.nw;
196 }
197
198 void read_ccp4_header_(GridMeta* grid, AnyStream& f, const std::string& path) {
199 const size_t hsize = 256;
200 ccp4_header.resize(hsize);
201 if (!f.read(ccp4_header.data(), 4 * hsize))
202 fail("Failed to read map header: " + path);
203 if (header_str(53, 4) != "MAP ")
204 fail("Not a CCP4 map: " + path);
205 std::string machst = header_str(54, 4);
206 if (machst[0] != 0x44 && machst[0] != 0x11)
207 fail("Unsupported machine stamp (endianness) in the file?");
208 same_byte_order = machst[0] == (is_little_endian() ? 0x44 : 0x11);
209 size_t ext_w = header_i32(24) / 4; // NSYMBT in words
210 if (ext_w != 0) {
211 if (ext_w > 1000000)
212 fail("Unexpectedly long extended header: " + path);
213 ccp4_header.resize(hsize + ext_w);
214 if (!f.read(ccp4_header.data() + hsize, 4 * ext_w))
215 fail("Failed to read extended header: " + path);
216 }
217 for (int i = 0; i < 3; ++i) {
218 int axis = header_i32(17 + i);
219 if (axis < 1 || axis > 3)
220 fail("Unexpected axis value in word " + std::to_string(17 + i)
221 + ": " + std::to_string(axis));
222 }
226 hstats.rms = header_float(55);
227 if (grid) {
228 grid->unit_cell.set(header_rfloat(11), header_rfloat(12), header_rfloat(13),
230 grid->nu = header_i32(1);
231 grid->nv = header_i32(2);
232 grid->nw = header_i32(3);
233 grid->spacegroup = find_spacegroup_by_number(header_i32(23));
234 auto pos = axis_positions();
235 grid->axis_order = AxisOrder::Unknown;
236 if (pos[0] == 0 && pos[1] == 1 && pos[2] == 2 && full_cell_(*grid))
237 grid->axis_order = AxisOrder::XYZ;
238 }
239 }
240};
241
242template<typename T=float>
243struct Ccp4 : public Ccp4Base {
245
248 void update_ccp4_header(int mode=-1, bool update_stats=true) {
249 if (mode > 2 && mode != 6)
250 fail("Only modes 0, 1, 2 and 6 are supported.");
251 if (grid.point_count() == 0)
252 fail("update_ccp4_header(): set the grid first (it has size 0)");
254 fail("update_ccp4_header(): run setup() first");
255 if (update_stats)
257 if (ccp4_header.empty())
259 assert(ccp4_header.size() >= 256);
260 if (mode < 0) {
262 if (mode < 0)
263 fail("update_ccp4_header: specify map mode explicitly (usually 2)");
264 }
266 }
267
268 static int mode_for_data() {
269 if (std::is_same<T, std::int8_t>::value)
270 return 0;
271 if (std::is_same<T, std::int16_t>::value)
272 return 1;
273 if (std::is_same<T, float>::value)
274 return 2;
275 if (std::is_same<T, std::uint16_t>::value)
276 return 6;
277 return -1;
278 }
279
280 bool full_cell() const { return full_cell_(grid); }
281
282 void read_ccp4_header(AnyStream& f, const std::string& path) {
283 read_ccp4_header_(&grid, f, path);
286 }
287
289 void set_extent(const Box<Fractional>& box);
290
291 void read_ccp4_stream(AnyStream& f, const std::string& path);
292
293 void read_ccp4_file(const std::string& path) {
294 FileStream stream(path.c_str(), "rb");
296 }
297
298 void read_ccp4_from_memory(const char* data, size_t size, const std::string& name) {
299 MemoryStream stream(data, size);
301 }
302
303 template<typename Input>
305 std::unique_ptr<AnyStream> stream = input.create_stream();
306 read_ccp4_stream(*stream, input.path());
307 }
308
309 void write_ccp4_map(const std::string& path) const;
310};
311
312
313namespace impl {
314
315template<typename From, typename To>
316To translate_map_point(From f) { return static_cast<To>(f); }
317// We convert map 2 to 0 by translating non-zero values to 1.
318template<> inline
319std::int8_t translate_map_point<float,std::int8_t>(float f) { return f != 0; }
320
321template<typename TFile, typename TMem>
322void read_data(AnyStream& f, std::vector<TMem>& content) {
323 if (std::is_same<TFile, TMem>::value) {
324 size_t len = content.size();
325 if (!f.read(content.data(), sizeof(TMem) * len))
326 fail("Failed to read all the data from the map file.");
327 } else {
328 constexpr size_t chunk_size = 64 * 1024;
329 std::vector<TFile> work(chunk_size);
330 for (size_t i = 0; i < content.size(); i += chunk_size) {
331 size_t len = std::min(chunk_size, content.size() - i);
332 if (!f.read(work.data(), sizeof(TFile) * len))
333 fail("Failed to read all the data from the map file.");
334 for (size_t j = 0; j < len; ++j)
335 content[i+j] = translate_map_point<TFile,TMem>(work[j]);
336 }
337 }
338}
339
340template<typename TFile, typename TMem>
341void write_data(const std::vector<TMem>& content, FILE* f) {
342 if (std::is_same<TMem, TFile>::value) {
343 size_t len = content.size();
344 if (std::fwrite(content.data(), sizeof(TFile), len, f) != len)
345 sys_fail("Failed to write data to the map file");
346 } else {
347 constexpr size_t chunk_size = 64 * 1024;
348 std::vector<TFile> work(chunk_size);
349 for (size_t i = 0; i < content.size(); i += chunk_size) {
350 size_t len = std::min(chunk_size, content.size() - i);
351 for (size_t j = 0; j < len; ++j)
352 work[j] = static_cast<TFile>(content[i+j]);
353 if (std::fwrite(work.data(), sizeof(TFile), len, f) != len)
354 sys_fail("Failed to write data to the map file");
355 }
356 }
357}
358
359} // namespace impl
360
361// This function was tested only on little-endian machines,
362// let us know if you need support for other architectures.
363template<typename T>
364void Ccp4<T>::read_ccp4_stream(AnyStream& f, const std::string& path) {
365 read_ccp4_header(f, path);
366 grid.data.resize(grid.point_count());
367 int mode = header_i32(4);
368 if (mode == 0)
369 impl::read_data<std::int8_t>(f, grid.data);
370 else if (mode == 1)
371 impl::read_data<std::int16_t>(f, grid.data);
372 else if (mode == 2)
373 impl::read_data<float>(f, grid.data);
374 else if (mode == 6)
375 impl::read_data<std::uint16_t>(f, grid.data);
376 else
377 fail("Mode " + std::to_string(mode) + " is not supported "
378 "(only 0, 1, 2 and 6 are supported).");
379 //if (std::fgetc(f) != EOF)
380 // fail("The map file is longer then expected.");
381
382 if (!same_byte_order) {
383 if (sizeof(T) == 2)
384 for (T& value : grid.data)
386 else if (sizeof(T) == 4)
387 for (T& value : grid.data)
389 }
390}
391
392template<typename T>
394 if (grid.axis_order == AxisOrder::XYZ || ccp4_header.empty())
395 return;
396 // cell sampling does not change
397 const std::array<int, 3> sampl = header_3i32(8);
398 // get old metadata
399 const std::array<int, 3> pos = axis_positions();
400 std::array<int, 3> start = header_3i32(5);
401 int end[3] = { start[0] + grid.nu, start[1] + grid.nv, start[2] + grid.nw };
402 // set new metadata
404 set_header_3i32(5, start[pos[0]], start[pos[1]], start[pos[2]]);
405 for (int i = 0; i < 3; ++i) {
406 end[i] -= start[i];
407 start[i] = 0;
408 }
409 int crs[3] = { grid.nu, grid.nv, grid.nw };
410 grid.nu = crs[pos[0]];
411 grid.nv = crs[pos[1]];
412 grid.nw = crs[pos[2]];
413 } else {
414 grid.nu = sampl[0];
415 grid.nv = sampl[1];
416 grid.nw = sampl[2];
417 set_header_3i32(5, 0, 0, 0); // start
418 }
419 set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ
420 set_header_3i32(17, 1, 2, 3); // axes (MAPC, MAPR, MAPS)
421 grid.axis_order = full_cell() ? AxisOrder::XYZ : AxisOrder::Unknown;
422 if (grid.axis_order == AxisOrder::XYZ)
423 grid.calculate_spacing();
424
425 // now set the data
426 {
427 std::vector<T> full(grid.point_count(), default_value);
428 int it[3];
429 int idx = 0;
430 for (it[2] = start[2]; it[2] < end[2]; it[2]++) // sections
431 for (it[1] = start[1]; it[1] < end[1]; it[1]++) // rows
432 for (it[0] = start[0]; it[0] < end[0]; it[0]++) { // cols
433 T val = grid.data[idx++];
434 size_t new_index = grid.index_s(it[pos[0]], it[pos[1]], it[pos[2]]);
435 full[new_index] = val;
436 }
437 grid.data = std::move(full);
438 }
439
440 if (mode == MapSetup::Full &&
441 // no need to apply symmetry if we started with the whole cell
442 (end[pos[0]] - start[pos[0]] < sampl[0] ||
443 end[pos[1]] - start[pos[1]] < sampl[1] ||
444 end[pos[2]] - start[pos[2]] < sampl[2]))
445 grid.symmetrize_nondefault(default_value);
446}
447
448template<typename T>
450 if (ccp4_header.empty())
451 fail("set_extent(): no header in the map. Call update_ccp4_header() first");
452 if (!full_cell())
453 fail("Ccp4::set_extent() works only after setup()");
454 if (grid.axis_order != AxisOrder::XYZ)
455 fail("Ccp4::set_extent() works only with XYZ order");
456 int u0 = (int)std::ceil(box.minimum.x * grid.nu);
457 int v0 = (int)std::ceil(box.minimum.y * grid.nv);
458 int w0 = (int)std::ceil(box.minimum.z * grid.nw);
459 int nu = (int)std::floor(box.maximum.x * grid.nu) - u0 + 1;
460 int nv = (int)std::floor(box.maximum.y * grid.nv) - v0 + 1;
461 int nw = (int)std::floor(box.maximum.z * grid.nw) - w0 + 1;
462 // set the data
463 std::vector<T> new_data((size_t)nu * nv * nw);
464 grid.get_subarray(new_data.data(), {u0, v0, w0}, {nu, nv, nw});
465 grid.data.swap(new_data);
466 // and metadata
467 grid.nu = nu;
468 grid.nv = nv;
469 grid.nw = nw;
470 set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ
471 set_header_3i32(5, u0, v0, w0);
472 // AxisOrder::XYZ is used only for grid covering full cell
473 grid.axis_order = AxisOrder::Unknown;
474}
475
476template<typename T>
477void Ccp4<T>::write_ccp4_map(const std::string& path) const {
478 assert(ccp4_header.size() >= 256);
479 fileptr_t f = file_open(path.c_str(), "wb");
480 std::fwrite(ccp4_header.data(), 4, ccp4_header.size(), f.get());
481 int mode = header_i32(4);
482 if (mode == 0)
483 impl::write_data<std::int8_t>(grid.data, f.get());
484 else if (mode == 1)
485 impl::write_data<std::int16_t>(grid.data, f.get());
486 else if (mode == 2)
487 impl::write_data<float>(grid.data, f.get());
488 else if (mode == 6)
489 impl::write_data<std::uint16_t>(grid.data, f.get());
490}
491
492GEMMI_DLL Ccp4<float> read_ccp4_map(const std::string& path, bool setup);
493GEMMI_DLL Ccp4<int8_t> read_ccp4_mask(const std::string& path, bool setup);
494GEMMI_DLL Ccp4Base read_ccp4_header(const std::string& path);
495
496} // namespace gemmi
497#endif
fail(), unreachable() and __declspec/__attribute__ macros
#define GEMMI_DLL
Definition fail.hpp:53
File-related utilities.
3d grids used by CCP4 maps, cell-method search and hkl data.
Input abstraction. Used to decouple file reading and decompression.
void swap_four_bytes(void *start)
Definition fileutil.hpp:94
GEMMI_DLL Ccp4< float > read_ccp4_map(const std::string &path, bool setup)
MapSetup
Definition ccp4.hpp:28
GEMMI_DLL Ccp4Base read_ccp4_header(const std::string &path)
bool is_little_endian()
Definition fileutil.hpp:84
const SpaceGroup * find_spacegroup_by_number(int ccp4) noexcept
Definition symmetry.hpp:852
fileptr_t file_open(const char *path, const char *mode)
Definition fileutil.hpp:50
void swap_two_bytes(void *start)
Definition fileutil.hpp:89
GEMMI_COLD void sys_fail(const std::string &msg)
Definition fail.hpp:71
DataStats calculate_data_statistics(const std::vector< T > &data)
Definition stats.hpp:107
GEMMI_DLL Ccp4< int8_t > read_ccp4_mask(const std::string &path, bool setup)
void fail(const std::string &msg)
Definition fail.hpp:59
std::unique_ptr< std::FILE, needs_fclose > fileptr_t
Definition fileutil.hpp:48
void set_header_str(int w, const std::string &str)
Definition ccp4.hpp:79
std::string header_str(int w, size_t len=80) const
Definition ccp4.hpp:59
std::array< int, 3 > axis_positions() const
Definition ccp4.hpp:83
void set_header_i32(int w, int32_t value)
Definition ccp4.hpp:64
DataStats hstats
Definition ccp4.hpp:35
void set_header_3i32(int w, int32_t x, int32_t y, int32_t z)
Definition ccp4.hpp:69
Position get_origin() const
Definition ccp4.hpp:136
void prepare_ccp4_header_except_mode_and_stats(GridMeta &grid)
Definition ccp4.hpp:141
std::array< int, 3 > header_3i32(int w) const
Definition ccp4.hpp:50
Box< Fractional > get_extent() const
Definition ccp4.hpp:100
int32_t header_i32(int w) const
Definition ccp4.hpp:44
float header_float(int w) const
Definition ccp4.hpp:53
void * header_word(int w)
Definition ccp4.hpp:41
double header_rfloat(int w) const
Definition ccp4.hpp:96
void set_header_float(int w, float value)
Definition ccp4.hpp:74
const void * header_word(int w) const
Definition ccp4.hpp:42
Transform get_skew_transformation() const
Definition ccp4.hpp:124
bool has_skew_transformation() const
Definition ccp4.hpp:121
bool same_byte_order
Definition ccp4.hpp:38
void update_header_mode_and_stats(int mode)
Definition ccp4.hpp:179
std::vector< int32_t > ccp4_header
Definition ccp4.hpp:37
void read_ccp4_from_memory(const char *data, size_t size, const std::string &name)
Definition ccp4.hpp:298
void update_ccp4_header(int mode=-1, bool update_stats=true)
If the header is empty, prepare it; otherwise, update only MODE and, if update_stats==true,...
Definition ccp4.hpp:248
void read_ccp4_stream(AnyStream &f, const std::string &path)
Definition ccp4.hpp:364
void setup(T default_value, MapSetup mode=MapSetup::Full)
Definition ccp4.hpp:393
static int mode_for_data()
Definition ccp4.hpp:268
Grid< T > grid
Definition ccp4.hpp:244
void read_ccp4_file(const std::string &path)
Definition ccp4.hpp:293
void write_ccp4_map(const std::string &path) const
Definition ccp4.hpp:477
void set_extent(const Box< Fractional > &box)
Definition ccp4.hpp:449
bool full_cell() const
Definition ccp4.hpp:280
void read_ccp4_header(AnyStream &f, const std::string &path)
Definition ccp4.hpp:282
void read_ccp4(Input &&input)
Definition ccp4.hpp:304
std::vector< T > data
Definition grid.hpp:245
The base of Grid classes that does not depend on stored data type.
Definition grid.hpp:168
AxisOrder axis_order
Definition grid.hpp:172
const SpaceGroup * spacegroup
Definition grid.hpp:170
size_t point_count() const
Definition grid.hpp:174
UnitCell unit_cell
Definition grid.hpp:169
void calculate_spacing()
set spacing and orth_n
Definition grid.hpp:338
Coordinates in Angstroms - orthogonal (Cartesian) coordinates.
Definition unitcell.hpp:32
Crystallographic Symmetry. Space Groups. Coordinate Triplets.