Gemmi C++ API
Loading...
Searching...
No Matches
linkhunt.hpp
Go to the documentation of this file.
1// Copyright 2019 Global Phasing Ltd.
2//
3// Searching for links based on the _chem_link table from monomer dictionary.
4
5#ifndef GEMMI_LINKHUNT_HPP_
6#define GEMMI_LINKHUNT_HPP_
7
8#include <map>
9#include "elem.hpp"
10#include "model.hpp"
11#include "monlib.hpp"
12#include "neighbor.hpp"
13#include "contact.hpp"
14
15namespace gemmi {
16
17struct LinkHunt {
18 struct Match {
19 const ChemLink* chem_link = nullptr;
21 int score = -1000;
25 double bond_length = 0;
26 Connection* conn = nullptr;
27 };
28
29 double global_max_dist = 2.34; // ZN-CYS
30 const MonLib* monlib_ptr = nullptr;
31 std::multimap<std::string, const ChemLink*> links;
32
33 void index_chem_links(const MonLib& monlib, bool use_alias=true) {
34 std::map<ChemComp::Group, std::map<std::string, std::vector<std::string>>> aliases;
35 if (use_alias)
36 for (const auto& iter : monlib.monomers)
37 for (const ChemComp::Aliasing& a : iter.second.aliases)
38 for (const std::pair<std::string, std::string>& r : a.related) {
40 aliases[gr][r.second].push_back(r.first);
41 }
42
43 for (const auto& iter : monlib.links) {
44 const ChemLink& link = iter.second;
45 if (link.rt.bonds.empty())
46 continue;
47 if (link.rt.bonds.size() > 1)
48 fprintf(stderr, "Note: considering only the first bond in %s\n",
49 link.id.c_str());
50 if (link.side1.comp.empty() && link.side2.comp.empty())
51 if (link.side1.group == ChemComp::Group::Null ||
52 link.side2.group == ChemComp::Group::Null ||
53 link.id == "SS")
54 continue;
55 const Restraints::Bond& bond = link.rt.bonds[0];
58 links.emplace(bond.lexicographic_str(), &link);
59
60 if (!use_alias || (!link.side1.comp.empty() && !link.side2.comp.empty()))
61 continue;
62 std::vector<std::string> *names1 = nullptr, *names2 = nullptr;
63 if (link.side1.comp.empty()) {
64 auto i = aliases.find(link.side1.group);
65 if (i != aliases.end()) {
66 auto j = i->second.find(bond.id1.atom);
67 if (j != i->second.end())
68 names1 = &j->second;
69 }
70 }
71 if (link.side2.comp.empty()) {
72 auto i = aliases.find(link.side2.group);
73 if (i != aliases.end()) {
74 auto j = i->second.find(bond.id2.atom);
75 if (j != i->second.end())
76 names2 = &j->second;
77 }
78 }
79 if (names1 && names2)
80 for (const std::string& n1 : *names1)
81 for (const std::string& n2 : *names2)
83 else if (names1 || names2) {
84 const std::string& n1 = names1 ? bond.id2.atom : bond.id1.atom;
85 for (const std::string& n2 : (names1 ? *names1 : *names2))
87 }
88 }
90 }
91
92 std::vector<Match> find_possible_links(Structure& st,
93 double bond_margin,
94 double radius_margin,
95 ContactSearch::Ignore ignore) {
96 std::vector<Match> results;
97 Model& model = st.first_model();
98 double search_radius = std::max(global_max_dist * bond_margin,
99 /*max r1+r2 ~=*/3.0 * radius_margin);
100 NeighborSearch ns(model, st.cell, std::max(5.0, search_radius));
101 ns.populate();
102
103 ContactSearch contacts((float) search_radius);
104 contacts.ignore = ignore;
105 contacts.for_each_contact(ns, [&](const CRA& cra1, const CRA& cra2,
106 int image_idx, double dist_sq) {
107 Match match;
108
109 // search for a match in chem_links
110 if (bond_margin > 0) {
111 auto range = links.equal_range(Restraints::lexicographic_str(
112 cra1.atom->name, cra2.atom->name));
113 // similar to MonLib::match_link()
114 for (auto iter = range.first; iter != range.second; ++iter) {
115 const ChemLink& link = *iter->second;
116 const Restraints::Bond& bond = link.rt.bonds[0];
117 if (dist_sq > sq(bond.value * bond_margin))
118 continue;
119 const ChemComp::Aliasing* aliasing1 = nullptr;
120 const ChemComp::Aliasing* aliasing2 = nullptr;
121 bool order1;
122 if (monlib_ptr->link_side_matches_residue(link.side1, cra1.residue->name, &aliasing1) &&
123 monlib_ptr->link_side_matches_residue(link.side2, cra2.residue->name, &aliasing2) &&
124 atom_match_with_alias(bond.id1.atom, cra1.atom->name, aliasing1))
125 order1 = true;
126 else if (monlib_ptr->link_side_matches_residue(link.side2, cra1.residue->name, &aliasing1) &&
127 monlib_ptr->link_side_matches_residue(link.side1, cra2.residue->name, &aliasing2) &&
128 atom_match_with_alias(bond.id2.atom, cra1.atom->name, aliasing1))
129 order1 = false;
130 else
131 continue;
132 int link_score = link.calculate_score(
133 order1 ? *cra1.residue : *cra2.residue,
134 order1 ? cra2.residue : cra1.residue,
135 order1 ? cra1.atom->altloc : cra2.atom->altloc,
136 order1 ? cra2.atom->altloc : cra1.atom->altloc,
137 order1 ? aliasing1 : aliasing2,
138 order1 ? aliasing2 : aliasing1);
139 match.chem_link_count++;
140 if (link_score > match.score) {
141 match.chem_link = &link;
142 match.score = link_score;
143 if (order1) {
144 match.cra1 = cra1;
145 match.cra2 = cra2;
146 } else {
147 match.cra1 = cra2;
148 match.cra2 = cra1;
149 }
150 }
151 }
152 }
153
154 // potential other links according to covalent radii
155 if (!match.chem_link) {
156 float r1 = cra1.atom->element.covalent_r();
157 float r2 = cra2.atom->element.covalent_r();
158 if (dist_sq > sq((r1 + r2) * radius_margin))
159 return;
160 match.cra1 = cra1;
161 match.cra2 = cra2;
162 }
163
164 // finalize
165 match.same_image = !image_idx;
166 match.bond_length = std::sqrt(dist_sq);
167 results.push_back(match);
168 });
169
170 // add references to st.connections
171 for (Match& match : results)
172 match.conn = st.find_connection_by_cra(match.cra1, match.cra2);
173
174 return results;
175 }
176};
177
178} // namespace gemmi
179#endif
bool atom_match_with_alias(const std::string &atom_id, const std::string &atom, const ChemComp::Aliasing *aliasing)
Definition monlib.hpp:29
constexpr float sq(float x)
Definition math.hpp:34
char altloc
Definition model.hpp:115
Element element
Definition model.hpp:117
std::string name
Definition model.hpp:114
Residue * residue
Definition model.hpp:605
Atom * atom
Definition model.hpp:606
static bool is_nucleotide_group(Group g)
Check if the group is DNA/RNA.
Definition chemcomp.hpp:468
float covalent_r() const
Definition elem.hpp:305
const ChemLink * chem_link
Definition linkhunt.hpp:19
void index_chem_links(const MonLib &monlib, bool use_alias=true)
Definition linkhunt.hpp:33
std::multimap< std::string, const ChemLink * > links
Definition linkhunt.hpp:31
const MonLib * monlib_ptr
Definition linkhunt.hpp:30
double global_max_dist
Definition linkhunt.hpp:29
std::vector< Match > find_possible_links(Structure &st, double bond_margin, double radius_margin, ContactSearch::Ignore ignore)
Definition linkhunt.hpp:92
std::string name
Definition seqid.hpp:91
static std::string lexicographic_str(const std::string &name1, const std::string &name2)
Definition chemcomp.hpp:64