Gemmi C++ API
Loading...
Searching...
No Matches
to_json.hpp
Go to the documentation of this file.
1// Copyright 2017 Global Phasing Ltd.
2
3// Writing cif::Document or its parts as JSON (mmJSON, CIF-JSON, etc).
4
5#ifndef GEMMI_TO_JSON_HPP_
6#define GEMMI_TO_JSON_HPP_
7#include <cctype> // for isdigit
8#include <ostream> // for ostream
9#include <set> // for set
10#include <string> // for string
11#include <vector> // for vector
12#include "cifdoc.hpp"
13#include "numb.hpp" // for is_numb
14#include "util.hpp" // for starts_with
15
16namespace gemmi {
17namespace cif {
18
20public:
21 bool comcifs = false; // conform to the COMCIFS CIF-JSON draft
22 bool group_ddl2_categories = false; // for mmJSON
23 bool with_data_keyword = false; // for mmJSON
24 bool bare_tags = false; // "tag" instead of "_tag"
25 bool values_as_arrays = false; // "_tag": ["value"]
26 bool lowercase_names = true; // write case-insensitive names as lower case
27 int quote_numbers = 1; // 0=never (no s.u.), 1=mix, 2=always
28 std::string cif_dot = "null"; // how to convert '.' from CIF
29 explicit JsonWriter(std::ostream& os) : os_(os), linesep_("\n ") {}
30 void write_json(const Document& d);
31 void set_comcifs() {
32 comcifs = true;
33 values_as_arrays = true;
34 quote_numbers = 2;
35 cif_dot = "false";
36 }
37 void set_mmjson() {
39 with_data_keyword = true;
40 bare_tags = true;
41 values_as_arrays = true;
42 lowercase_names = false;
43 quote_numbers = 0;
44 }
45
46private:
47 std::ostream& os_;
48 std::string linesep_;
49
50 void change_indent(int n) { linesep_.resize(linesep_.size() + n, ' '); }
51
52 // returns category with trailing dot
53 std::string get_tag_category(const std::string& tag) const {
55 return std::string{};
56 size_t pos = tag.find('.');
57 if (pos == std::string::npos)
58 return std::string{};
59 return tag.substr(0, pos + 1);
60 }
61
62 std::string get_loop_category(const Loop& loop) const {
63 if (loop.tags.empty())
64 return std::string{};
65 std::string cat = get_tag_category(loop.tags[0]);
66 for (size_t i = 1; i < loop.tags.size(); ++i)
67 if (!starts_with(loop.tags[i], cat))
68 return std::string{};
69 return cat;
70 }
71
72 // based on tao/json/internal/escape.hpp
73 static void escape(std::ostream& os, const std::string& s, size_t pos,
74 bool to_lower) {
75 static const char* h = "0123456789abcdef";
76 const char* p = s.data() + pos;
77 const char* l = p;
78 const char* const e = s.data() + s.size();
79 while (p != e) {
80 const unsigned char c = *p;
81 if (c == '\\') {
82 os.write(l, p - l);
83 l = ++p;
84 os << "\\\\";
85 } else if (c == '"') {
86 os.write(l, p - l);
87 l = ++p;
88 os << "\\\"";
89 } else if (c < 32) {
90 os.write(l, p - l);
91 l = ++p;
92 switch ( c ) {
93 case '\b': os << "\\b"; break;
94 case '\f': os << "\\f"; break;
95 case '\n': os << "\\n"; break;
96 case '\r': os << "\\r"; break;
97 case '\t': os << "\\t"; break;
98 default: os << "\\u00" << h[(c & 0xf0) >> 4] << h[c & 0x0f];
99 }
100 } else if (to_lower && c >= 'A' && c <= 'Z') {
101 os.write(l, p - l);
102 l = ++p;
103 os.put(c + 32);
104 } else if (c == 127) {
105 os.write(l, p - l);
106 l = ++p;
107 os << "\\u007f";
108 } else {
109 ++p;
110 }
111 }
112 os.write(l, p - l);
113 }
114
115 void write_string(const std::string& s, size_t pos=0, bool to_lower=false) {
116 os_.put('"');
117 escape(os_, s, pos, to_lower);
118 os_.put('"');
119 }
120
121 void write_as_number(const std::string& value) {
122 // if we are here, value is not empty
123 if (value[0] == '.') // in JSON numbers cannot start with dot
124 os_.put('0');
125 // in JSON the number cannot start with +
126 size_t pos = 0;
127 if (value[pos] == '+') {
128 pos = 1;
129 } else if (value[pos] == '-') { // make handling -001 easier
130 os_.put('-');
131 pos = 1;
132 }
133 // in JSON left-padding with 0s is not allowed
134 while (value[pos] == '0' && std::isdigit(value[pos+1]))
135 ++pos;
136 // in JSON dot must be followed by digit
137 size_t dotpos = value.find('.');
138 if (dotpos != std::string::npos && !std::isdigit(value[dotpos+1])) {
139 os_ << value.substr(pos, dotpos+1-pos) << '0';
140 pos = dotpos + 1;
141 }
142 if (value.back() != ')')
143 os_ << value.c_str() + pos;
144 else
145 os_ << value.substr(pos, value.find('(', pos) - pos);
146 }
147
148 void write_value(const std::string& value) {
149 if (value == "?")
150 os_ << "null";
151 else if (value == ".")
152 os_ << cif_dot;
153 else if (quote_numbers < 2 && is_numb(value) &&
154 // exception: 012 (but not 0.12) is assumed to be a string
155 (value[0] != '0' || value[1] == '.' || value[1] == '\0') &&
156 (quote_numbers == 0 || value.back() != ')'))
157 write_as_number(value);
158 else
159 write_string(as_string(value));
160 }
161
162 void open_cat(const std::string& cat, size_t* tag_pos) {
163 if (!cat.empty()) {
164 change_indent(+1);
165 write_string(cat.substr(0, cat.size() - 1), bare_tags ? 1 : 0, lowercase_names);
166 os_ << ": {" << linesep_;
167 *tag_pos += cat.size() - 1;
168 }
169 }
170
171 void close_cat(std::string& cat, size_t* tag_pos) {
172 if (!cat.empty()) {
173 change_indent(-1);
174 os_ << linesep_ << '}';
175 *tag_pos -= cat.size() - 1;
176 cat.clear();
177 }
178 }
179
180 void write_loop(const Loop& loop) {
181 size_t ncol = loop.tags.size();
182 const auto& vals = loop.values;
183 std::string cat = get_loop_category(loop);
184 size_t tag_pos = bare_tags ? 1 : 0;
185 open_cat(cat, &tag_pos);
186 for (size_t i = 0; i < ncol; i++) {
187 if (i != 0)
188 os_ << "," << linesep_;
189 write_string(loop.tags[i], tag_pos, lowercase_names);
190 os_ << ": [";
191 for (size_t j = i; j < vals.size(); j += ncol) {
192 if (j != i)
193 os_.put(',');
194 write_value(vals[j]);
195 }
196 os_.put(']');
197 }
198 close_cat(cat, &tag_pos);
199 }
200
201
202 // works for both block and frame
203 void write_map(const std::string& name, const std::vector<Item>& items) {
204 write_string(name, 0, lowercase_names);
205 os_ << ": ";
206 change_indent(+1);
207 char first = '{';
208 bool has_frames = false;
209 std::string cat;
210 size_t tag_pos = bare_tags ? 1 : 0;
211 // When grouping into categories, only consecutive tags are grouped.
212 std::set<std::string> seen_cats;
213 for (const Item& item : items) {
214 switch (item.type) {
215 case ItemType::Pair:
216 if (!cat.empty() && !starts_with(item.pair[0], cat))
217 close_cat(cat, &tag_pos);
218 os_ << first << linesep_;
219 if (group_ddl2_categories && cat.empty()) {
220 cat = get_tag_category(item.pair[0]);
221 if (seen_cats.insert(cat).second)
222 open_cat(cat, &tag_pos);
223 }
224 write_string(item.pair[0], tag_pos, lowercase_names);
225 os_ << ": ";
227 os_.put('[');
228 write_value(item.pair[1]);
230 os_.put(']');
231 first = ',';
232 break;
233 case ItemType::Loop:
234 close_cat(cat, &tag_pos);
235 os_ << first << linesep_;
236 write_loop(item.loop);
237 first = ',';
238 break;
239 case ItemType::Frame:
240 has_frames = true;
241 break;
243 break;
244 case ItemType::Erased:
245 break;
246 }
247 }
248 if (has_frames) { // usually, we don't have any frames
249 os_ << first << linesep_ << "\"Frames\": ";
250 change_indent(+1);
251 first = '{';
252 for (const Item& item : items)
253 if (item.type == ItemType::Frame) {
254 os_ << first << linesep_;
255 write_map(item.frame.name, item.frame.items);
256 first = ',';
257 }
258 change_indent(-1);
259 os_ << linesep_ << '}';
260 }
261 close_cat(cat, &tag_pos);
262 change_indent(-1);
263 os_ << linesep_ << '}';
264 }
265};
266
267inline void JsonWriter::write_json(const Document& d) {
268 os_.put('{');
269 if (comcifs) {
270 os_ << R"(
271 "CIF-JSON": {
272 "Metadata": {
273 "cif-version": "2.0",
274 "schema-name": "CIF-JSON",
275 "schema-version": "1.0.0",
276 "schema-uri": "http://www.iucr.org/resources/cif/cif-json.json"
277 },)";
278 change_indent(+1);
279 }
280 for (const Block& block : d.blocks) {
281 if (&block != &d.blocks[0])
282 os_.put(',');
283 // start mmJSON with {"data_ so it can be easily recognized
284 if (&block != &d.blocks[0] || comcifs || !with_data_keyword)
285 os_ << linesep_;
286 write_map((with_data_keyword ? "data_" : "") + block.name, block.items);
287 }
288 if (comcifs)
289 os_ << "\n }";
290 os_ << "\n}\n";
291}
292
293inline void write_mmjson_to_stream(std::ostream& os, const Document& doc) {
295 writer.set_mmjson();
296 writer.write_json(doc);
297}
298
299} // namespace cif
300} // namespace gemmi
301#endif
void write_json(const Document &d)
Definition to_json.hpp:267
std::string cif_dot
Definition to_json.hpp:28
JsonWriter(std::ostream &os)
Definition to_json.hpp:29
std::string as_string(const std::string &value)
Definition cifdoc.hpp:81
bool is_numb(const std::string &s)
Definition numb.hpp:43
void write_mmjson_to_stream(std::ostream &os, const Document &doc)
Definition to_json.hpp:293
std::string to_lower(std::string str)
Definition util.hpp:62
std::string cat(Args const &... args)
Definition util.hpp:32
bool starts_with(const std::string &str, const std::string &prefix)
Definition util.hpp:38