Gemmi C++ API
Loading...
Searching...
No Matches
utf.hpp
Go to the documentation of this file.
1// Conversion between UTF-8 and wchar. Used only for file names on Windows.
2
3#ifndef GEMMI_UTF_HPP_
4#define GEMMI_UTF_HPP_
5
6#include <string>
7
8namespace gemmi {
9
10// from Mark Ransom's answer
11// https://stackoverflow.com/questions/148403/utf8-to-from-wide-char-conversion-in-stl/148766#148766
12inline std::wstring UTF8_to_wchar(const char* in) {
13 std::wstring out;
14 unsigned int codepoint = 0;
15 while (*in != 0) {
16 unsigned char ch = static_cast<unsigned char>(*in);
17 if (ch <= 0x7f)
18 codepoint = ch;
19 else if (ch <= 0xbf)
20 codepoint = (codepoint << 6) | (ch & 0x3f);
21 else if (ch <= 0xdf)
22 codepoint = ch & 0x1f;
23 else if (ch <= 0xef)
24 codepoint = ch & 0x0f;
25 else
26 codepoint = ch & 0x07;
27 ++in;
28 if ((*in & 0xc0) != 0x80 && codepoint <= 0x10ffff) {
29 if (sizeof(wchar_t) > 2) {
30 out.append(1, static_cast<wchar_t>(codepoint));
31 } else if (codepoint > 0xffff) {
32 out.append(1, static_cast<wchar_t>(0xd800 + (codepoint >> 10)));
33 out.append(1, static_cast<wchar_t>(0xdc00 + (codepoint & 0x03ff)));
34 } else if (codepoint < 0xd800 || codepoint >= 0xe000) {
35 out.append(1, static_cast<wchar_t>(codepoint));
36 }
37 }
38 }
39 return out;
40}
41
42inline std::string wchar_to_UTF8(const wchar_t* in) {
43 std::string out;
44 unsigned int codepoint = 0;
45 while (*in != 0) {
46 if (*in >= 0xd800 && *in <= 0xdbff) {
47 codepoint = ((*in - 0xd800) << 10) + 0x10000;
48 } else {
49 if (*in >= 0xdc00 && *in <= 0xdfff)
50 codepoint |= *in - 0xdc00;
51 else
52 codepoint = *in;
53 if (codepoint <= 0x7f) {
54 out += static_cast<char>(codepoint);
55 } else if (codepoint <= 0x7ff) {
56 out += static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f));
57 out += static_cast<char>(0x80 | (codepoint & 0x3f));
58 } else if (codepoint <= 0xffff) {
59 out += static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f));
60 out += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f));
61 out += static_cast<char>(0x80 | (codepoint & 0x3f));
62 } else {
63 out += static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07));
64 out += static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f));
65 out += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f));
66 out += static_cast<char>(0x80 | (codepoint & 0x3f));
67 }
68 codepoint = 0;
69 }
70 ++in;
71 }
72 return out;
73}
74
75} // namespace gemmi
76#endif
std::wstring UTF8_to_wchar(const char *in)
Definition utf.hpp:12
std::string wchar_to_UTF8(const wchar_t *in)
Definition utf.hpp:42