Gemmi C++ API
Loading...
Searching...
No Matches
to_cif.hpp
Go to the documentation of this file.
1// Copyright 2017 Global Phasing Ltd.
2
3// Writing cif::Document or its parts to std::ostream.
4
5#ifndef GEMMI_TO_CIF_HPP_
6#define GEMMI_TO_CIF_HPP_
7
8#include <ostream>
9#include "cifdoc.hpp"
10
11namespace gemmi {
12namespace cif {
13
14enum class Style {
15 Simple,
17 PreferPairs, // write single-row loops as pairs
18 Pdbx, // PreferPairs + put '#' (empty comments) between categories
19 Indent35, // start values in pairs from 35th column
20 Aligned, // columns in tables are left-aligned
21};
22
25 bool prefer_pairs = false;
27 bool compact = false;
29 bool misuse_hash = false;
31 std::uint16_t align_pairs = 0;
34 std::uint16_t align_loops = 0;
35
37 // implicit conversion from deprecated Style (for backward compatibility)
39 switch (style) {
40 case Style::Simple:
41 break;
43 compact = true;
44 break;
46 prefer_pairs = true;
47 break;
48 case Style::Pdbx:
49 prefer_pairs = true;
50 misuse_hash = true;
51 break;
52 case Style::Indent35:
53 align_pairs = 33;
54 break;
55 case Style::Aligned:
56 align_pairs = 33;
57 align_loops = 30;
58 break;
59 }
60 }
61 std::string str() const {
62 std::string s;
63 if (prefer_pairs)
64 s += "prefer_pairs,";
65 if (compact)
66 s += "compact,";
67 if (misuse_hash)
68 s += "misuse_hash,";
69 if (align_pairs != 0)
70 s += "align_pairs=" + std::to_string(align_pairs) + ",";
71 if (align_loops != 0)
72 s += "align_loops=" + std::to_string(align_loops) + ",";
73 if (!s.empty())
74 s.pop_back();
75 return s;
76 }
77};
78
82public:
83 explicit BufOstream(std::ostream& os_) : os(os_), ptr(buf) {}
85 void flush() {
86 os.write(buf, ptr - buf);
87 ptr = buf;
88 }
89 void write(const char* s, size_t len) {
90 constexpr int margin = sizeof(buf) - 512;
91 if (ptr - buf + len > margin) {
92 flush();
93 if (len > margin) {
94 os.write(s, len);
95 return;
96 }
97 }
98 std::memcpy(ptr, s, len);
99 ptr += len;
100 }
101 void operator<<(const std::string& s) {
102 write(s.c_str(), s.size());
103 }
104 // below we don't check the buffer boundary, these functions add <512 bytes
105 void put(char c) {
106 *ptr++ = c;
107 }
108 void pad(size_t n) {
109 std::memset(ptr, ' ', n);
110 ptr += n;
111 }
112private:
113 std::ostream& os;
114 // increasing buffer to 8kb or 64kb doesn't make significant difference
115 char buf[4096];
116 char* ptr;
117};
118
119// CIF files are read in binary mode. It makes difference only for text fields.
120// If the text field with \r\n would be written as is in text mode on Windows
121// \r would get duplicated. As a workaround, here we convert \r\n to \n.
122// Hopefully \r that gets removed here is never meaningful.
123inline void write_text_field(BufOstream& os, const std::string& value) {
124 for (size_t pos = 0, end = 0; end != std::string::npos; pos = end + 1) {
125 end = value.find("\r\n", pos);
126 size_t len = (end == std::string::npos ? value.size() : end) - pos;
127 os.write(value.c_str() + pos, len);
128 }
129}
130
131inline void write_out_pair(BufOstream& os, const std::string& name,
132 const std::string& value, WriteOptions options) {
133 os << name;
134 if (is_text_field(value)) {
135 os.put('\n');
136 write_text_field(os, value);
137 } else {
138 if (name.size() + value.size() > 120) {
139 os.put('\n');
140 } else {
141 os.put(' ');
142 if (name.size() < options.align_pairs)
143 os.pad(options.align_pairs - name.size());
144 }
145 os << value;
146 }
147 os.put('\n');
148}
149
150inline void write_out_loop(BufOstream& os, const Loop& loop, WriteOptions options) {
151 if (loop.values.empty())
152 return;
153 if (options.prefer_pairs && loop.length() == 1) {
154 for (size_t i = 0; i != loop.tags.size(); ++i)
155 write_out_pair(os, loop.tags[i], loop.values[i], options);
156 return;
157 }
158 // tags
159 os.write("loop_", 5);
160 for (const std::string& tag : loop.tags) {
161 os.put('\n');
162 os << tag;
163 }
164 // values
165 size_t ncol = loop.tags.size();
166
167 std::vector<size_t> col_width(ncol, 0);
168 if (options.align_loops > 0) {
169 size_t col = 0;
170 for (const std::string& val : loop.values) {
171 if (!is_text_field(val))
172 col_width[col] = std::max(col_width[col], val.size());
173 if (++col == ncol)
174 col = 0;
175 }
176 for (size_t& w : col_width)
177 w = std::min(w, (size_t)options.align_loops);
178 }
179
180 size_t col = 0;
181 bool need_new_line = true;
182 for (const std::string& val : loop.values) {
183 bool text_field = is_text_field(val);
184 os.put(need_new_line || text_field ? '\n' : ' ');
186 if (text_field)
187 write_text_field(os, val);
188 else
189 os << val;
190 if (col != ncol - 1) {
191 if (val.size() < col_width[col])
192 os.pad(col_width[col] - val.size());
193 ++col;
194 } else {
195 col = 0;
196 need_new_line = true;
197 }
198 }
199 os.put('\n');
200}
201
202inline void write_out_item(BufOstream& os, const Item& item, WriteOptions options) {
203 switch (item.type) {
204 case ItemType::Pair:
205 write_out_pair(os, item.pair[0], item.pair[1], options);
206 break;
207 case ItemType::Loop:
208 write_out_loop(os, item.loop, options);
209 break;
210 case ItemType::Frame:
211 os.write("save_", 5);
212 os << item.frame.name;
213 os.put('\n');
214 for (const Item& inner_item : item.frame.items)
216 os.write("save_\n", 6);
217 break;
219 os << item.pair[1];
220 os.put('\n');
221 break;
222 case ItemType::Erased:
223 break;
224 }
225}
226
227inline bool should_be_separated_(const Item& a, const Item& b) {
228 if (a.type == ItemType::Comment || b.type == ItemType::Comment)
229 return false;
230 if (a.type != ItemType::Pair || b.type != ItemType::Pair)
231 return true;
232 // check if we have mmcif-like tags from different categories
233 auto adot = a.pair[0].find('.');
234 if (adot == std::string::npos)
235 return false;
236 auto bdot = b.pair[0].find('.');
237 return adot != bdot || a.pair[0].compare(0, adot, b.pair[0], 0, adot) != 0;
238}
239
240inline void write_cif_block_to_stream(std::ostream& os_, const Block& block,
242 BufOstream os(os_);
243 os.write("data_", 5);
244 os << block.name;
245 os.put('\n');
246 if (options.misuse_hash)
247 os.write("#\n", 2);
248 const Item* prev = nullptr;
249 for (const Item& item : block.items) {
250 if (item.type == ItemType::Erased)
251 continue;
252 if (prev && !options.compact && should_be_separated_(*prev, item)) {
253 if (options.misuse_hash)
254 os.put('#');
255 os.put('\n');
256 }
257 write_out_item(os, item, options);
258 prev = &item;
259 }
260 if (options.misuse_hash)
261 os.write("#\n", 2);
262}
263
264inline void write_cif_to_stream(std::ostream& os, const Document& doc,
266 bool first = true;
267 for (const Block& block : doc.blocks) {
268 if (!first)
269 os.put('\n'); // extra blank line for readability
271 first = false;
272 }
273}
274
275} // namespace cif
276} // namespace gemmi
277
278#endif
std::ostream with buffering.
Definition to_cif.hpp:81
void operator<<(const std::string &s)
Definition to_cif.hpp:101
void pad(size_t n)
Definition to_cif.hpp:108
void write(const char *s, size_t len)
Definition to_cif.hpp:89
BufOstream(std::ostream &os_)
Definition to_cif.hpp:83
void write_cif_block_to_stream(std::ostream &os_, const Block &block, WriteOptions options=WriteOptions())
Definition to_cif.hpp:240
bool is_text_field(const std::string &val)
Definition cifdoc.hpp:1157
void write_out_loop(BufOstream &os, const Loop &loop, WriteOptions options)
Definition to_cif.hpp:150
void write_out_pair(BufOstream &os, const std::string &name, const std::string &value, WriteOptions options)
Definition to_cif.hpp:131
void write_out_item(BufOstream &os, const Item &item, WriteOptions options)
Definition to_cif.hpp:202
void write_cif_to_stream(std::ostream &os, const Document &doc, WriteOptions options=WriteOptions())
Definition to_cif.hpp:264
void write_text_field(BufOstream &os, const std::string &value)
Definition to_cif.hpp:123
std::vector< Item > items
Definition cifdoc.hpp:445
std::string name
Definition cifdoc.hpp:444
ItemType type
Definition cifdoc.hpp:514
std::vector< std::string > tags
Definition cifdoc.hpp:129
std::vector< std::string > values
Definition cifdoc.hpp:130
size_t length() const
Definition cifdoc.hpp:143
WriteOptions(Style style)
Definition to_cif.hpp:38
bool compact
no blank lines between categories, only between blocks
Definition to_cif.hpp:27
bool misuse_hash
put '#' (empty comments) before/after categories
Definition to_cif.hpp:29
std::uint16_t align_pairs
width reserved for tags in pairs (e.g. 34 = value starts at 35th column)
Definition to_cif.hpp:31
bool prefer_pairs
write single-row loops as pairs
Definition to_cif.hpp:25
std::uint16_t align_loops
if non-zero, determines max width of each column in a loop and aligns all values to this width; the w...
Definition to_cif.hpp:34
std::string str() const
Definition to_cif.hpp:61