Gemmi C++ API
Loading...
Searching...
No Matches
monlib.hpp
Go to the documentation of this file.
1// Copyright 2018 Global Phasing Ltd.
2//
3// Monomer library - (Refmac) restraints dictionary,
4// which is made of monomers (chemical components), links and modifications.
5
6#ifndef GEMMI_MONLIB_HPP_
7#define GEMMI_MONLIB_HPP_
8
9#include <map>
10#include <string>
11#include <vector>
12#include "cifdoc.hpp"
13#include "elem.hpp" // for Element
14#include "numb.hpp" // for as_number
15#include "fail.hpp" // for fail, unreachable
16#include "model.hpp" // for Residue, Atom
17#include "chemcomp.hpp" // for ChemComp
18
19namespace gemmi {
20
21typedef cif::Document (*read_cif_func)(const std::string&);
22
23inline void add_distinct_altlocs(const Residue& res, std::string& altlocs) {
24 for (const Atom& atom : res.atoms)
25 if (atom.altloc && altlocs.find(atom.altloc) == std::string::npos)
26 altlocs += atom.altloc;
27}
28
29inline bool atom_match_with_alias(const std::string& atom_id, const std::string& atom,
31 if (aliasing)
32 if (const std::string* real_id = aliasing->name_from_alias(atom_id))
33 return *real_id == atom;
34 return atom_id == atom;
35}
36
37struct ChemLink {
38 struct Side {
40 std::string comp;
41 std::string mod;
42 Group group = Group::Null;
43 bool matches_group(Group res) const {
44 if (group == Group::Null)
45 return false;
46 return res == group || (group == Group::Peptide && ChemComp::is_peptide_group(res))
47 || (group == Group::DnaRna && ChemComp::is_nucleotide_group(res));
48 }
49 int specificity() const {
50 if (!comp.empty())
51 return 3;
52 return group == Group::PPeptide || group == Group::MPeptide ? 1 : 0;
53 }
54 };
55 std::string id;
56 std::string name;
60 cif::Block block; // temporary, until we have ChemLink->Block function
61
63 int calculate_score(const Residue& res1, const Residue* res2,
64 char alt, char alt2,
65 const ChemComp::Aliasing* aliasing1,
66 const ChemComp::Aliasing* aliasing2) const;
67};
68
70 struct AtomMod {
71 int func;
72 std::string old_id;
73 std::string new_id;
75 float charge;
76 std::string chem_type;
77 };
78
79 std::string id;
80 std::string name;
81 std::string comp_id;
82 std::string group_id;
83 std::vector<AtomMod> atom_mods;
85 cif::Block block; // temporary, until we have ChemMod->Block function
86
88};
89
90struct EnerLib {
91 enum class RadiusType {Vdw, Vdwh, Ion};
92 struct Atom {
94 char hb_type;
95 double vdw_radius;
97 double ion_radius;
99 int sp;
100 };
101 struct Bond {
102 std::string atom_type_2;
104 double length;
105 double value_esd;
106 };
107
109 void read(const cif::Document& doc) {
110 cif::Block& block = const_cast<cif::Block&>(doc.blocks[0]);
111 for (const auto& row : block.find("_lib_atom.",
112 {"type", "hb_type", "vdw_radius", "vdwh_radius",
113 "ion_radius", "element", "valency", "sp"}))
114 atoms.emplace(row[0], Atom{Element(row[5]), row[1][0], cif::as_number(row[2]),
116 cif::as_int(row[6], -1), cif::as_int(row[7], -1)});
117 for (const auto& row : block.find("_lib_bond.",
118 {"atom_type_1", "atom_type_2", "type", "length", "value_esd"}))
119 bonds.emplace(row.str(0), Bond{row.str(1), bond_type_from_string(row[2]),
121 }
122 std::map<std::string, Atom> atoms; // type->Atom
123 std::multimap<std::string, Bond> bonds; // atom_type_1->Bond
124};
125
127 std::string monomer_dir;
128 std::string lib_version;
130 std::map<std::string, ChemComp> monomers;
131 std::map<std::string, ChemLink> links;
132 std::map<std::string, ChemMod> modifications;
133 std::map<std::string, ChemComp::Group> cc_groups;
134
135 const ChemLink* get_link(const std::string& link_id) const {
136 auto link = links.find(link_id);
137 return link != links.end() ? &link->second : nullptr;
138 }
139 const ChemMod* get_mod(const std::string& name) const {
140 auto modif = modifications.find(name);
141 return modif != modifications.end() ? &modif->second : nullptr;
142 }
143
144 // Returns the most specific link and a flag that is true
145 // if the order is comp2-comp1 in the link definition.
146 std::tuple<const ChemLink*, bool, const ChemComp::Aliasing*, const ChemComp::Aliasing*>
147 match_link(const Residue& res1, const std::string& atom1, char alt1,
148 const Residue& res2, const std::string& atom2, char alt2,
149 double min_bond_sq=0) const {
150 const ChemLink* best_link = nullptr;
151 bool inverted = false;
152 const ChemComp::Aliasing* aliasing1 = nullptr;
153 const ChemComp::Aliasing* aliasing2 = nullptr;
154 const ChemComp::Aliasing* aliasing1_final = nullptr;
155 const ChemComp::Aliasing* aliasing2_final = nullptr;
156 int best_score = -1000;
157 for (auto& ml : links) {
158 const ChemLink& link = ml.second;
159 if (link.rt.bonds.empty() || starts_with(link.name, "auto-"))
160 continue;
161 // for now we don't have link definitions with >1 bonds
162 const Restraints::Bond& bond = link.rt.bonds[0];
163 if (sq(bond.value) < min_bond_sq)
164 continue;
165 if (link_side_matches_residue(link.side1, res1.name, &aliasing1) &&
166 link_side_matches_residue(link.side2, res2.name, &aliasing2) &&
167 atom_match_with_alias(bond.id1.atom, atom1, aliasing1) &&
168 atom_match_with_alias(bond.id2.atom, atom2, aliasing2)) {
169 int score = link.calculate_score(res1, &res2, alt1, alt2, aliasing1, aliasing2);
170 if (score > best_score) {
171 best_link = &link;
172 best_score = score;
173 aliasing1_final = aliasing1;
174 aliasing2_final = aliasing2;
175 inverted = false;
176 }
177 }
178 if (link_side_matches_residue(link.side1, res2.name, &aliasing2) &&
179 link_side_matches_residue(link.side2, res1.name, &aliasing1) &&
180 atom_match_with_alias(bond.id1.atom, atom2, aliasing2) &&
181 atom_match_with_alias(bond.id2.atom, atom1, aliasing1)) {
182 int score = link.calculate_score(res2, &res1, alt2, alt1, aliasing2, aliasing1);
183 if (score > best_score) {
184 best_link = &link;
185 best_score = score;
186 aliasing1_final = aliasing1;
187 aliasing2_final = aliasing2;
188 inverted = true;
189 }
190 }
191 }
192 return std::make_tuple(best_link, inverted, aliasing1_final, aliasing2_final);
193 }
194
196 if (block.has_tag("_chem_comp_atom.atom_id")) {
198 if (cc.group == ChemComp::Group::Null) {
199 auto it = cc_groups.find(cc.name);
200 if (it != cc_groups.end())
201 cc.group = it->second;
202 }
203 std::string name = cc.name;
204 monomers.emplace(name, std::move(cc));
205 }
206 }
207
209 const std::string& res_name,
210 ChemComp::Aliasing const** aliasing) const {
212 *aliasing = nullptr;
213 if (!side.comp.empty())
214 return side.comp == res_name;
215 auto it = monomers.find(res_name);
216 if (it != monomers.end()) {
217 if (side.matches_group(it->second.group))
218 return true;
219 for (const ChemComp::Aliasing& a : it->second.aliases)
220 if (side.matches_group(a.group)) {
221 *aliasing = &a;
222 return true;
223 }
224 }
225 return false;
226 }
227
229 std::string path(const std::string& code) {
230 return monomer_dir + relative_monomer_path(code);
231 }
232
233 static std::string relative_monomer_path(const std::string& code) {
234 std::string path;
235 if (!code.empty()) {
236 path += lower(code[0]);
237 path += '/'; // works also on Windows
238 path += code;
239 // On Windows several names are reserved (CON, PRN, AUX, ...), see
240 // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
241 // The workaround in CCP4 monomer library is to use CON_CON.cif, etc.
242 if (code.size() == 3)
243 switch (ialpha3_id(code.c_str())) {
244 case ialpha3_id("AUX"):
245 case ialpha3_id("COM"):
246 case ialpha3_id("CON"):
247 case ialpha3_id("LPT"):
248 case ialpha3_id("PRN"):
249 path += '_';
250 path += code;
251 }
252 path += ".cif";
253 }
254 return path;
255 }
256
258
259 void read_monomer_cif(const std::string& path_, read_cif_func read_cif) {
260 const cif::Document& doc = (*read_cif)(path_);
261 if (!doc.blocks.empty() && doc.blocks[0].name == "lib")
262 if (const std::string* ver = doc.blocks[0].find_value("_lib.version"))
263 lib_version = *ver;
264 read_monomer_doc(doc);
265 }
266
267 void set_monomer_dir(const std::string& monomer_dir_) {
268 monomer_dir = monomer_dir_;
269 if (monomer_dir.back() != '/' && monomer_dir.back() != '\\')
270 monomer_dir += '/';
271 }
272
275 bool read_monomer_lib(const std::string& monomer_dir_,
276 const std::vector<std::string>& resnames,
277 read_cif_func read_cif,
278 std::string* error=nullptr) {
279 if (monomer_dir_.empty())
280 fail("read_monomer_lib: monomer_dir not specified.");
281 set_monomer_dir(monomer_dir_);
282
283 read_monomer_cif(monomer_dir + "list/mon_lib_list.cif", read_cif);
284 ener_lib.read((*read_cif)(monomer_dir + "ener_lib.cif"));
285
286 bool ok = true;
287 for (const std::string& name : resnames) {
288 if (monomers.find(name) != monomers.end())
289 continue;
290 try {
291 const cif::Document& doc = (*read_cif)(path(name));
292 read_monomer_doc(doc);
293 } catch (std::system_error& err) {
294 if (error) {
295 if (err.code().value() == ENOENT)
296 cat_to(*error, "Monomer not in the library: ", name, ".\n");
297 else
298 cat_to(*error, "Failed to read ", name, ": ", err.what(), ".\n");
299 }
300 ok = false;
301 } catch (std::runtime_error& err) {
302 if (error)
303 cat_to(*error, "Failed to read ", name, ": ", err.what(), ".\n");
304 ok = false;
305 }
306 }
307 return ok;
308 }
309
310 double find_ideal_distance(const const_CRA& cra1, const const_CRA& cra2) const;
311 std::string update_old_atom_names(Structure& st) const;
312};
313
314// to be deprecated
315inline MonLib read_monomer_lib(const std::string& monomer_dir,
316 const std::vector<std::string>& resnames,
318 const std::string& libin="",
319 bool ignore_missing=false) {
321 if (!libin.empty())
322 monlib.read_monomer_cif(libin, read_cif);
323 std::string error;
324 bool ok = monlib.read_monomer_lib(monomer_dir, resnames, read_cif, &error);
325 if (!ignore_missing && !ok)
326 fail(error + "Please create definitions for missing monomers.");
327 return monlib;
328}
329
330} // namespace gemmi
331#endif
#define GEMMI_DLL
Definition fail.hpp:53
double as_number(const std::string &s, double nan=NAN)
Definition numb.hpp:19
int as_int(const std::string &str)
Definition cifdoc.hpp:108
char lower(char c)
Definition util.hpp:53
ChemComp make_chemcomp_from_block(const cif::Block &block_)
Definition chemcomp.hpp:583
constexpr int ialpha3_id(const char *s)
Definition util.hpp:309
void add_distinct_altlocs(const Residue &res, std::string &altlocs)
Definition monlib.hpp:23
bool atom_match_with_alias(const std::string &atom_id, const std::string &atom, const ChemComp::Aliasing *aliasing)
Definition monlib.hpp:29
constexpr float sq(float x)
Definition math.hpp:34
void cat_to(std::string &)
Definition util.hpp:25
bool starts_with(const std::string &str, const std::string &prefix)
Definition util.hpp:38
void fail(const std::string &msg)
Definition fail.hpp:59
cif::Document(* read_cif_func)(const std::string &)
Definition monlib.hpp:21
MonLib read_monomer_lib(const std::string &monomer_dir, const std::vector< std::string > &resnames, read_cif_func read_cif, const std::string &libin="", bool ignore_missing=false)
Definition monlib.hpp:315
BondType bond_type_from_string(const std::string &s)
Definition chemcomp.hpp:509
Represents atom site in macromolecular structure (~100 bytes).
Definition model.hpp:112
std::string name
Definition chemcomp.hpp:369
static bool is_nucleotide_group(Group g)
Check if the group is DNA/RNA.
Definition chemcomp.hpp:468
static bool is_peptide_group(Group g)
Check if the group (M-|P-)peptide.
Definition chemcomp.hpp:463
std::string chem_type
Definition monlib.hpp:76
void apply_to(ChemComp &chemcomp, ChemComp::Group alias_group) const
std::string id
Definition monlib.hpp:79
std::vector< AtomMod > atom_mods
Definition monlib.hpp:83
std::string name
Definition monlib.hpp:80
std::string comp_id
Definition monlib.hpp:81
std::string group_id
Definition monlib.hpp:82
Restraints rt
Definition monlib.hpp:84
cif::Block block
Definition monlib.hpp:85
std::string atom_type_2
Definition monlib.hpp:102
std::multimap< std::string, Bond > bonds
Definition monlib.hpp:123
std::map< std::string, Atom > atoms
Definition monlib.hpp:122
void read(const cif::Document &doc)
Definition monlib.hpp:109
EnerLib ener_lib
Definition monlib.hpp:129
void read_monomer_doc(const cif::Document &doc)
double find_ideal_distance(const const_CRA &cra1, const const_CRA &cra2) const
const ChemLink * get_link(const std::string &link_id) const
Definition monlib.hpp:135
std::map< std::string, ChemMod > modifications
Definition monlib.hpp:132
std::string lib_version
Definition monlib.hpp:128
bool read_monomer_lib(const std::string &monomer_dir_, const std::vector< std::string > &resnames, read_cif_func read_cif, std::string *error=nullptr)
Read mon_lib_list.cif, ener_lib.cif and required monomers.
Definition monlib.hpp:275
static std::string relative_monomer_path(const std::string &code)
Definition monlib.hpp:233
void read_monomer_cif(const std::string &path_, read_cif_func read_cif)
Definition monlib.hpp:259
std::map< std::string, ChemLink > links
Definition monlib.hpp:131
std::map< std::string, ChemComp::Group > cc_groups
Definition monlib.hpp:133
void set_monomer_dir(const std::string &monomer_dir_)
Definition monlib.hpp:267
void add_monomer_if_present(const cif::Block &block)
Definition monlib.hpp:195
std::tuple< const ChemLink *, bool, const ChemComp::Aliasing *, const ChemComp::Aliasing * > match_link(const Residue &res1, const std::string &atom1, char alt1, const Residue &res2, const std::string &atom2, char alt2, double min_bond_sq=0) const
Definition monlib.hpp:147
bool link_side_matches_residue(const ChemLink::Side &side, const std::string &res_name, ChemComp::Aliasing const **aliasing) const
Definition monlib.hpp:208
std::string path(const std::string &code)
Returns path to the monomer cif file (the file may not exist).
Definition monlib.hpp:229
std::string monomer_dir
Definition monlib.hpp:127
std::map< std::string, ChemComp > monomers
Definition monlib.hpp:130
const ChemMod * get_mod(const std::string &name) const
Definition monlib.hpp:139
std::string update_old_atom_names(Structure &st) const
std::string name
Definition seqid.hpp:91
std::vector< Atom > atoms
Definition model.hpp:188
Table find(const std::string &prefix, const std::vector< std::string > &tags)
Definition cifdoc.hpp:961
bool has_tag(const std::string &tag) const
Definition cifdoc.hpp:458