5#ifndef GEMMI_MODEL_HPP_
6#define GEMMI_MODEL_HPP_
16#include <unordered_map>
33T* find_or_null(std::vector<T>& vec,
const std::string& name) {
34 auto it = std::find_if(vec.begin(), vec.end(),
35 [&name](
const T& m) { return m.name == name; });
36 return it != vec.end() ? &*it :
nullptr;
40T& find_or_add(std::vector<T>& vec,
const std::string& name) {
41 if (
T* ret = find_or_null(vec, name))
43 vec.emplace_back(name);
47template<
typename Span,
typename T =
typename Span::value_type>
48typename Span::iterator find_iter(Span& span,
const std::string& name) {
49 auto i = std::find_if(span.begin(), span.end(),
50 [&](
const T& x) { return x.name == name; });
52 throw std::invalid_argument(
53 T::what() + (
" " + name) +
" not found (only [" +
54 join_str(span.begin(), span.end(),
' ',
55 [](
const T& x) { return x.name; }) +
60template<
typename Group>
61typename Group::element_type& get_by_altloc(Group& group,
char alt) {
62 for (
auto& atom : group)
63 if (atom.altloc == alt)
65 fail(
"No such altloc");
68template<
typename T,
typename M> std::vector<T> model_subchains(M* model) {
70 for (
auto& chain : model->chains)
103 using Item =
typename T::child_type;
113 static const char*
what() {
return "Atom"; }
159template<
typename AtomType>
160struct AtomGroup_ : ItemGroup<AtomType> {
161 using ItemGroup<AtomType>::ItemGroup;
162 std::string name()
const {
return !this->
empty() ? this->
front().name :
""; }
163 AtomType& by_altloc(
char alt) {
164 for (
int i = 0; i != this->
extent(); ++i) {
165 AtomType* a = &this->
front() + i;
166 if (a->altloc == alt && (a->name == this->front().name))
169 fail(
"No such altloc");
178 static const char*
what() {
return "Residue"; }
217 if (a.name == atom_name && a.altloc_matches(altloc) && (el ==
El::X || a.element == el))
229 fail(
"Atom not found.");
234 if (atom.name == atom_name)
236 fail(
"No such atom: " + atom_name);
242 fail(
"Multiple alternative atoms " + atom_name);
255 return atoms.empty() || other.
atoms.empty() ||
264 if (
name.length() != 3)
278struct ConstResidueGroup;
287 for (
int n =
length - 1; n > 0; --n)
288 if ((
begin() + n)->group_key() == (
begin() + n - 1)->group_key())
297 if (
auto num = label ?
r.label_seq :
r.seqid.num)
310 throw std::out_of_range(
"subchain_id(): empty span");
311 if (this->
size() > 1 && this->
front().subchain != this->
back().subchain)
312 fail(
"subchain id varies in a residue span: ", this->
front().subchain,
313 " vs ", this->
back().subchain);
314 return this->
begin()->subchain;
320 std::vector<std::string> seq;
322 seq.push_back(res.name);
330 throw std::out_of_range(
"label_seq_id_to_auth(): empty span");
346 throw std::out_of_range(
"auth_seq_id_to_label(): empty span");
378 int length()
const {
return const_().length(); }
380 return const_().extreme_num(label, sign);
385 const std::string&
subchain_id()
const {
return const_().subchain_id(); }
407 return *impl::find_iter(*
this, name);
410 erase(impl::find_iter(*
this, name));
418 return *impl::find_iter(*
this, name);
449template<
typename T,
typename Ch> std::vector<T> chain_subchains(
Ch*
ch) {
451 for (
auto start =
ch->residues.begin(); start !=
ch->residues.end(); ) {
452 auto end = start + 1;
453 while (end !=
ch->residues.end() && end->subchain == start->subchain)
455 v.push_back(
ch->whole().sub(start, end));
463 static const char*
what() {
return "Chain"; }
491 && end->subchain == begin->subchain)
526 return impl::chain_subchains<ResidueSpan>(
this);
529 return impl::chain_subchains<ConstResidueSpan>(
this);
562 for (
const Residue*
p = &res;
p-- != start; )
564 while (
p != start &&
p->group_key() == (
p-1)->group_key() &&
575 for (
const Residue*
p = &res + 1;
p != end; ++
p)
577 while (
p+1 != end &&
p->group_key() == (
p+1)->group_key() &&
615 cra.
atom ? cra.
atom->altloc :
'\0');
631template<
typename CraT>
639 if (cra.atom ==
nullptr)
645 if (++cra.chain == chains_end) {
649 }
while (cra.chain->residues.empty());
650 cra.residue = &cra.chain->residues[0];
652 }
while (cra.residue->atoms.empty());
653 cra.atom = &cra.residue->atoms[0];
657 while (cra.atom ==
nullptr || cra.atom == cra.residue->atoms.data()) {
658 while (cra.residue ==
nullptr || cra.residue == cra.chain->residues.data()) {
660 while ((--cra.chain)->residues.empty()) {}
673 const Chain* chains_end;
677template<
typename CraT,
typename ChainsRefT>
680 using iterator = BidirIterator<CraIterPolicy<CraT>>;
682 for (
auto& chain : chains)
683 for (auto& residue : chain.residues)
684 for (auto& atom : residue.atoms)
685 return CraIterPolicy<CraT>{
vector_end_ptr(chains), CraT{&chain, &residue, &atom}};
690 return CraIterPolicy<CraT>{chains_end, CraT{chains_end,
nullptr,
nullptr}};
698 static const char*
what() {
return "Model"; }
705 return impl::find_or_null(
chains, chain_name);
714 [&](
const Chain& c) { return c.name == chain_name; });
721 [&](
const Chain& c) {
return c.
name == chain_name; });
727 if (
i->name ==
j->name) {
728 i->append_residues(
j->residues,
min_sep);
744 return impl::model_subchains<ResidueSpan>(
this);
747 return impl::model_subchains<ConstResidueSpan>(
this);
751 std::map<std::string, std::string>
mapping;
754 for (
const Residue& res : chain.residues)
755 if (!res.subchain.empty() && res.subchain != prev) {
757 mapping[res.subchain] = chain.name;
765 if (chain.name == chain_name)
766 if (
Residue* residue = chain.find_residue(
rid))
777 if (chain.name == chain_name)
780 fail(
"No such chain or residue: " + chain_name +
" " + seqid.
str());
786 fail(
"Multiple residues " + chain_name +
" " + seqid.
str());
791 std::vector<std::string>
names;
793 for (
const Residue& res : chain.residues)
795 names.push_back(res.name);
801 if (chain.name ==
address.chain_name) {
802 for (
Residue& res : chain.residues)
803 if (
address.res_id.matches_noseg(res) &&
806 if (!
address.atom_name.empty())
808 return {&chain, &res, at};
811 return {
nullptr,
nullptr,
nullptr};
825 const Atom* a)
const {
826 return {{ c ?
static_cast<int>(c -
chains.data()) : -1,
827 c &&
r ?
static_cast<int>(
r - c->
residues.data()) : -1,
828 r && a ?
static_cast<int>(a -
r->atoms.data()) : -1 }};
832 std::bitset<(size_t)
El::END> table;
834 for (
const Residue& res : chain.residues)
835 for (
const Atom& a : res.atoms)
836 table.set(a.element.ordinal());
851 using index_type = std::uint32_t;
852 std::unordered_map<const Atom*, std::array<index_type, 3>> atom_parents;
853 std::unordered_map<const Residue*, std::array<index_type, 2>> residue_parents;
854 void update(
const Model& model) {
856 for (index_type
ic = 0;
ic < model.
chains.size(); ++
ic) {
860 residue_parents.emplace(&res, std::array<index_type,2>{
ic,
ir});
861 for (index_type ia = 0; ia < res.
atoms.size(); ++ia)
862 atom_parents.emplace(&res.
atoms[ia], std::array<index_type,3>{ic, ir, ia});
867 atom_parents.clear();
868 residue_parents.clear();
870 CRA get_cra(Model& model, Atom* atom) {
872 auto it = atom_parents.find(atom);
873 if (it != atom_parents.end()) {
874 auto ic = it->second[0];
875 auto ir = it->second[1];
876 auto ia = it->second[2];
877 if (ic < model.chains.size()) {
878 Chain& chain = model.chains[ic];
879 if (ir < chain.residues.size()) {
880 Residue& res = chain.residues[ir];
881 if (ia < res.atoms.size() && atom == &res.atoms[ia])
882 return {&chain, &res, atom};
889 Chain* get_parent_of(Model& model, Residue* res) {
891 auto it = residue_parents.find(res);
892 if (it != residue_parents.end()) {
893 auto ic = it->second[0];
894 auto ir = it->second[1];
895 if (ic < model.chains.size()) {
896 Chain& chain = model.chains[ic];
897 if (ir < chain.residues.size() && res == &chain.residues[ir])
905 ParentIndex parent_index;
909 std::vector<Entity>& entities) {
910 if (!subchain_id.empty())
917 const std::vector<Entity>& entities) {
947 std::map<std::string, std::string>
info;
959 const std::string&
get_info(
const std::string& tag)
const {
960 static const std::string empty;
962 return it !=
info.end() ?
it->second : empty;
967 fail(
"no structural models");
989 for (
size_t i = 0;
i !=
models.size(); ++
i)
1032 if ((
a1 == c.partner1 &&
a2 == c.partner2) ||
1033 (
a1 == c.partner2 &&
a2 == c.partner1))
1039 return std::count_if(
ncs.begin(),
ncs.end(), [](
const NcsOp&
o) { return o.given; });
1045 return std::any_of(
ncs.begin(),
ncs.end(), [](
const NcsOp&
o) { return !o.given; });
1050 for (
int i = 0;
i < order; ++
i)
1051 vec.insert(std::upper_bound(vec.begin(), vec.end(),
serial2),
serial2);
1060 model.merge_chain_parts(
min_sep);
1098 [&](
const Residue&
r) { return r.matches(rid); });
CraIterPolicy(const Chain *end, CraT cra_)
CraIterPolicy< const_CRA > const_policy
bool equal(const CraIterPolicy &o) const
CoorFormat
File format of a macromolecular model.
T * vector_end_ptr(std::vector< T > &v)
bool atom_matches(const const_CRA &cra, const AtomAddress &addr, bool ignore_segment=false)
std::string atom_str(const Chain &chain, const ResidueId &res_id, const Atom &atom)
void vector_remove_if(std::vector< T > &v, F &&condition)
Entity * find_entity_of_subchain(const std::string &subchain_id, std::vector< Entity > &entities)
AtomGroup_< Atom > AtomGroup
const SpaceGroup * find_spacegroup_by_name(std::string name, double alpha=0., double gamma=0.) noexcept
bool is_same_conformer(char altloc1, char altloc2)
void remove_empty_children(T &obj)
std::string join_str(T begin, T end, const S &sep, const F &getter)
bool in_vector(const T &x, const std::vector< T > &v)
CalcFlag
corresponds to _atom_site.calc_flag in mmCIF
void fail(const std::string &msg)
AtomAddress make_address(const Chain &ch, const Residue &res, const Atom &at)
constexpr double u_to_b()
constexpr int ialpha4_id(const char *s)
void vector_move_extend(std::vector< T > &dst, std::vector< T > &&src)
Represents atom site in macromolecular structure (~100 bytes).
bool altloc_matches(char request) const
const std::string & group_key() const
std::string padded_name() const
bool same_conformer(const Atom &other) const
char altloc_or(char null_char) const
static const char * what()
Residue * find_residue(const ResidueId &rid)
UniqProxy< Residue > first_conformer()
const Residue * next_residue(const Residue &res) const
ConstResidueSpan get_waters() const
ResidueSpan get_polymer()
ConstResidueSpan get_residue_span(F &&func) const
ResidueSpan get_ligands()
std::vector< Residue > residues
const std::vector< Residue > & children() const
ResidueSpan get_subchain(const std::string &s)
const Residue * find_residue(const ResidueId &rid) const
ConstUniqProxy< Residue > first_conformer() const
Residue * find_or_add_residue(const ResidueId &rid)
ResidueGroup find_residue_group(SeqId id)
ResidueSpan get_residue_span(F &&func)
ConstResidueGroup find_residue_group(SeqId id) const
bool is_first_in_group(const Residue &res) const
ConstResidueSpan get_polymer() const
std::vector< ConstResidueSpan > subchains() const
ConstResidueSpan get_subchain(const std::string &s) const
std::vector< ResidueSpan > subchains()
ConstResidueSpan get_ligands() const
ConstResidueSpan whole() const
const Residue * previous_residue(const Residue &res) const
std::vector< Residue > & children()
void append_residues(std::vector< Residue > new_resi, int min_sep=0)
static const char * what()
Chain(std::string cname) noexcept
const Residue & by_resname(const std::string &name)
ConstResidueGroup()=default
ConstResidueGroup(ConstResidueSpan &&sp)
ConstResidueGroup find_residue_group(SeqId id) const
SeqId label_seq_id_to_auth(SeqId::OptionalNum label_seq_id) const
std::vector< std::string > extract_sequence() const
ConstResidueSpan(Parent &&span)
SeqId::OptionalNum auth_seq_id_to_label(SeqId auth_seq_id) const
ConstUniqProxy< Residue, ConstResidueSpan > first_conformer() const
const std::string & subchain_id() const
SeqId::OptionalNum extreme_num(bool label, int sign) const
const char * uname() const
ResidueGroup find_residue_group(const std::string &chain_name, SeqId seqid)
void remove_chain(const std::string &chain_name)
ConstCraProxy all() const
std::vector< std::string > get_all_residue_names() const
std::map< std::string, std::string > subchain_to_chain() const
Residue * find_residue(const std::string &chain_name, const ResidueId &rid)
const Residue * find_residue(const std::string &chain_name, const ResidueId &rid) const
ConstResidueSpan get_subchain(const std::string &sub_name) const
const Chain * find_chain(const std::string &chain_name) const
const std::vector< Chain > & children() const
ResidueSpan get_subchain(const std::string &sub_name)
const_CRA find_cra(const AtomAddress &address, bool ignore_segment=false) const
std::vector< ConstResidueSpan > subchains() const
static const char * what()
const Atom * find_atom(const AtomAddress &address) const
std::vector< Chain > chains
std::vector< ResidueSpan > subchains()
Chain * find_chain(const std::string &chain_name)
Chain * find_last_chain(const std::string &chain_name)
void merge_chain_parts(int min_sep=0)
Model(std::string mname) noexcept
Chain * get_parent_of(Residue *res)
Atom * find_atom(const AtomAddress &address)
std::bitset<(size_t) El::END > present_elements() const
CRA find_cra(const AtomAddress &address, bool ignore_segment=false)
std::array< int, 3 > get_indices(const Chain *c, const Residue *r, const Atom *a) const
Residue & sole_residue(const std::string &chain_name, SeqId seqid)
std::vector< Chain > & children()
typename Span< Residue >::iterator iterator
std::vector< typename Span< Residue >::value_type > vector_type
MutableVectorSpan< Item > sub(Iter first, Iter last)
MutableVectorSpan< Residue > subspan(F &&func)
Non-crystallographic symmetry operation (such as in the MTRIXn record)
helper type used for Structure::shortened_ccd_codes
options affecting how pdb file is read
Coordinates in Angstroms - orthogonal (Cartesian) coordinates.
void remove_residue(const std::string &name)
ResidueGroup(ResidueSpan &&span)
Residue & by_resname(const std::string &name)
GroupingProxy residue_groups()
ConstUniqProxy< Residue, ResidueSpan > first_conformer() const
std::vector< std::string > extract_sequence() const
SeqId::OptionalNum auth_seq_id_to_label(SeqId auth_seq_id) const
ResidueSpan(vector_type &v, iterator begin, std::size_t n)
ResidueGroup find_residue_group(SeqId id)
SeqId::OptionalNum extreme_num(bool label, int sign) const
SeqId label_seq_id_to_auth(SeqId::OptionalNum label_seq_id) const
UniqProxy< Residue, ResidueSpan > first_conformer()
const std::string & subchain_id() const
ResidueSpan(Parent &&span)
const Atom * get_p() const
Atom * find_atom(const std::string &atom_name, char altloc, El el=El::X)
bool same_conformer(const Residue &other) const
AtomGroup get(const std::string &atom_name)
bool is_water() const
Convenience function that duplicates functionality from resinfo.hpp.
std::vector< Atom > & children()
SiftsUnpResidue sifts_unp
UniqProxy< Atom > first_conformer()
ConstUniqProxy< Atom > first_conformer() const
const std::vector< Atom > & children() const
const Atom * get_o3prim() const
Residue(const ResidueId &rid) noexcept
const Atom * find_atom(const std::string &atom_name, char altloc, El el=El::X) const
const Atom * get_ca() const
std::vector< Atom > atoms
static const char * what()
const Atom * find_by_element(El el) const
const Atom * get_c() const
Residue empty_copy() const
std::vector< Atom >::iterator find_atom_iter(const std::string &atom_name, char altloc, El el=El::X)
const Atom * get_n() const
Atom & sole_atom(const std::string &atom_name)
OptionalInt< INT_MIN > OptionalNum
Reference to UniProt residue, based on _pdbx_sifts_xref_db.
Span< V > subspan(F &&func)
const_iterator begin() const
const_iterator end() const
const std::vector< Model > & children() const
void merge_chain_parts(int min_sep=0)
std::vector< Sheet > sheets
std::vector< Connection > connections
const Entity * get_entity_of(const ConstResidueSpan &sub) const
const Model * find_model(const std::string &model_name) const
std::vector< ModRes > mod_residues
bool ncs_not_expanded() const
Entity * get_entity_of(const ConstResidueSpan &sub)
std::vector< Assembly > assemblies
const Entity * get_entity(const std::string &ent_id) const
const Model & first_model() const
Model & find_or_add_model(const std::string &model_name)
Model * find_model(const std::string &model_name)
const Connection * find_connection_by_name(const std::string &conn_name) const
std::vector< Model > models
void remove_empty_chains()
void add_conect_one_way(int serial1, int serial2, int order)
Entity * get_entity(const std::string &ent_id)
std::vector< Model > & children()
Assembly * find_assembly(const std::string &assembly_id)
std::vector< Helix > helices
double get_ncs_multiplier() const
Connection * find_connection(const AtomAddress &a1, const AtomAddress &a2)
std::map< int, std::vector< int > > conect_map
Connection * find_connection_by_cra(const const_CRA &cra1, const const_CRA &cra2, bool ignore_segment=false)
Structure empty_copy() const
std::vector< CisPep > cispeps
void remove_model(const std::string &model_name)
char ter_status
in input PDB file: y = TER records were read, e = errors were detected
double resolution
simplistic resolution value from/for REMARK 2
std::vector< OldToNew > shortened_ccd_codes
Mapping of long (4+) CCD codes (residue names) to PDB-compatible ones.
bool has_origx
Store ORIGXn / _database_PDB_matrix.origx*.
const std::string & get_info(const std::string &tag) const
size_t ncs_given_count() const
const SpaceGroup * find_spacegroup() const
std::map< std::string, std::string > info
Minimal metadata with keys being mmcif tags: _entry.id, _cell.Z_PDB, ...
std::vector< std::string > raw_remarks
original REMARK records stored if the file was read from the PDB format
std::string spacegroup_hm
void add_conect(int serial1, int serial2, int order)
Connection * find_connection_by_name(const std::string &conn_name)
std::vector< Entity > entities
void add_ncs_images_to_cs_images(const std::vector< NcsOp > &ncs)
void set_cell_images_from_spacegroup(const SpaceGroup *sg)