5#ifndef GEMMI_MODEL_HPP_
6#define GEMMI_MODEL_HPP_
32auto get_id(
const T& m) ->
decltype(m.name) {
return m.name; }
34auto get_id(
const T& m) ->
decltype(m.num) {
return m.num; }
37template<
typename Vec,
typename S>
38auto find_iter_(Vec& vec,
const S& name) {
39 return std::find_if(vec.begin(), vec.end(), [&name](
const auto& m) { return get_id(m) == name; });
42template<
typename T,
typename S>
43T* find_or_null(std::vector<T>& vec,
const S& name) {
44 auto it = find_iter_(vec, name);
45 return it != vec.end() ? &*it :
nullptr;
48template<
typename T,
typename S>
49T& find_or_add(std::vector<T>& vec,
const S& name) {
50 if (
T* ret = find_or_null(vec, name))
52 vec.emplace_back(name);
56template<
typename Span,
typename S>
59 auto it = find_iter_(span, name);
61 throw std::invalid_argument(
cat(
62 T::what(),
' ', name,
" not found (only [",
63 join_str(span.begin(), span.end(),
' ', [](
const T& item) { return cat(get_id(item)); }),
68template<
typename Group>
69typename Group::element_type& get_by_altloc(Group& group,
char alt) {
70 for (
auto& atom : group)
71 if (atom.altloc == alt)
73 fail(
"No such altloc");
76template<
typename T,
typename M> std::vector<T> model_subchains(M* model) {
78 for (
auto& chain : model->chains)
111 using Item =
typename T::child_type;
121 static const char*
what() {
return "Atom"; }
167template<
typename AtomType>
168struct AtomGroup_ : ItemGroup<AtomType> {
169 using ItemGroup<AtomType>::ItemGroup;
170 std::string name()
const {
return !this->
empty() ? this->
front().name :
""; }
171 AtomType& by_altloc(
char alt) {
172 for (
int i = 0; i != this->
extent(); ++i) {
173 AtomType* a = &this->
front() + i;
174 if (a->altloc == alt && (a->name == this->front().name))
177 fail(
"No such altloc");
186 static const char*
what() {
return "Residue"; }
231 if (a.name == atom_name && a.altloc_matches(altloc) && (el ==
El::X || a.element == el))
244 fail(
"Atom not found.");
249 if (atom.name == atom_name)
251 fail(
"No such atom: " + atom_name);
257 fail(
"Multiple alternative atoms " + atom_name);
270 return atoms.empty() || other.
atoms.empty() ||
279 if (
name.length() != 3)
294 if (atom.altloc && altlocs.find(atom.altloc) == std::string::npos)
295 altlocs += atom.altloc;
299struct ConstResidueGroup;
308 for (
int n =
length - 1; n > 0; --n)
309 if ((
begin() + n)->group_key() == (
begin() + n - 1)->group_key())
318 if (
auto num = label ?
r.label_seq :
r.seqid.num)
331 throw std::out_of_range(
"subchain_id(): empty span");
332 if (this->
size() > 1 && this->
front().subchain != this->
back().subchain)
333 fail(
"subchain id varies in a residue span: ", this->
front().subchain,
334 " vs ", this->
back().subchain);
335 return this->
begin()->subchain;
341 std::vector<std::string> seq;
343 seq.push_back(res.name);
351 throw std::out_of_range(
"label_seq_id_to_auth(): empty span");
367 throw std::out_of_range(
"auth_seq_id_to_label(): empty span");
399 int length()
const {
return const_().length(); }
401 return const_().extreme_num(label, sign);
406 const std::string&
subchain_id()
const {
return const_().subchain_id(); }
428 return *impl::find_iter(*
this, name);
431 erase(impl::find_iter(*
this, name));
439 return *impl::find_iter(*
this, name);
470template<
typename T,
typename Ch> std::vector<T> chain_subchains(
Ch*
ch) {
472 for (
auto start =
ch->residues.begin(); start !=
ch->residues.end(); ) {
473 auto end = start + 1;
474 while (end !=
ch->residues.end() && end->subchain == start->subchain)
476 v.push_back(
ch->whole().sub(start, end));
484 static const char*
what() {
return "Chain"; }
513 && end->subchain == begin->subchain)
548 return impl::chain_subchains<ResidueSpan>(
this);
551 return impl::chain_subchains<ConstResidueSpan>(
this);
584 for (
const Residue*
p = &res;
p-- != start; )
586 while (
p != start &&
p->group_key() == (
p-1)->group_key() &&
597 for (
const Residue*
p = &res + 1;
p != end; ++
p)
599 while (
p+1 != end &&
p->group_key() == (
p+1)->group_key() &&
637 cra.
atom ? cra.
atom->altloc :
'\0');
653template<
typename CraT>
661 if (cra.atom ==
nullptr)
667 if (++cra.chain == chains_end) {
671 }
while (cra.chain->residues.empty());
672 cra.residue = &cra.chain->residues[0];
674 }
while (cra.residue->atoms.empty());
675 cra.atom = &cra.residue->atoms[0];
679 while (cra.atom ==
nullptr || cra.atom == cra.residue->atoms.data()) {
680 while (cra.residue ==
nullptr || cra.residue == cra.chain->residues.data()) {
682 while ((--cra.chain)->residues.empty()) {}
695 const Chain* chains_end;
699template<
typename CraT,
typename ChainsRefT>
702 using iterator = BidirIterator<CraIterPolicy<CraT>>;
704 for (
auto& chain : chains)
705 for (auto& residue : chain.residues)
706 for (auto& atom : residue.atoms)
707 return CraIterPolicy<CraT>{
vector_end_ptr(chains), CraT{&chain, &residue, &atom}};
712 return CraIterPolicy<CraT>{chains_end, CraT{chains_end,
nullptr,
nullptr}};
720 static const char*
what() {
return "Model"; }
729 return impl::find_or_null(
chains, chain_name);
738 [&](
const Chain& c) { return c.name == chain_name; });
745 [&](
const Chain& c) {
return c.
name == chain_name; });
751 if (
i->name ==
j->name) {
752 i->append_residues(
j->residues,
min_sep);
768 return impl::model_subchains<ResidueSpan>(
this);
771 return impl::model_subchains<ConstResidueSpan>(
this);
775 std::map<std::string, std::string>
mapping;
778 for (
const Residue& res : chain.residues)
779 if (!res.subchain.empty() && res.subchain != prev) {
781 mapping[res.subchain] = chain.name;
789 if (chain.name == chain_name)
790 if (
Residue* residue = chain.find_residue(
rid))
801 if (chain.name == chain_name)
804 fail(
"No such chain or residue: " + chain_name +
" " + seqid.
str());
810 fail(
"Multiple residues " + chain_name +
" " + seqid.
str());
815 std::vector<std::string>
names;
817 for (
const Residue& res : chain.residues)
819 names.push_back(res.name);
825 if (chain.name ==
address.chain_name) {
826 for (
Residue& res : chain.residues)
827 if (
address.res_id.matches_noseg(res) &&
830 if (!
address.atom_name.empty())
832 return {&chain, &res, at};
835 return {
nullptr,
nullptr,
nullptr};
849 const Atom* a)
const {
850 return {{ c ?
static_cast<int>(c -
chains.data()) : -1,
851 c &&
r ?
static_cast<int>(
r - c->
residues.data()) : -1,
852 r && a ?
static_cast<int>(a -
r->atoms.data()) : -1 }};
856 std::bitset<(size_t)
El::END> table;
858 for (
const Residue& res : chain.residues)
859 for (
const Atom& a : res.atoms)
860 table.set(a.element.ordinal());
872 std::vector<Entity>& entities) {
873 if (!subchain_id.empty())
880 const std::vector<Entity>& entities) {
885 static const char*
what() {
return "Structure"; }
911 std::map<std::string, std::string>
info;
923 const std::string&
get_info(
const std::string& tag)
const {
924 static const std::string empty;
926 return it !=
info.end() ?
it->second : empty;
931 fail(
"no structural models");
939 return impl::find_or_null(
models, model_num);
945 return impl::find_or_add(
models, model_num);
949 for (
size_t i = 0;
i !=
models.size(); ++
i)
992 if ((
a1 == c.partner1 &&
a2 == c.partner2) ||
993 (
a1 == c.partner2 &&
a2 == c.partner1))
999 return std::count_if(
ncs.begin(),
ncs.end(), [](
const NcsOp&
o) { return o.given; });
1005 return std::any_of(
ncs.begin(),
ncs.end(), [](
const NcsOp&
o) { return !o.given; });
1010 for (
int i = 0;
i < order; ++
i)
1020 model.merge_chain_parts(
min_sep);
1061 [&](
const Residue&
r) { return r.matches(rid); });
CraIterPolicy(const Chain *end, CraT cra_)
CraIterPolicy< const_CRA > const_policy
bool equal(const CraIterPolicy &o) const
Elements from the periodic table.
fail(), unreachable() and __declspec/__attribute__ macros
Bidirectional iterators (over elements of any container) that can filter, uniquify,...
T * vector_end_ptr(std::vector< T > &v)
bool atom_matches(const const_CRA &cra, const AtomAddress &addr, bool ignore_segment=false)
std::string atom_str(const Chain &chain, const ResidueId &res_id, const Atom &atom)
void vector_remove_if(std::vector< T > &v, F &&condition)
Entity * find_entity_of_subchain(const std::string &subchain_id, std::vector< Entity > &entities)
AtomGroup_< Atom > AtomGroup
bool is_same_conformer(char altloc1, char altloc2)
void remove_empty_children(T &obj)
GEMMI_DLL const SpaceGroup * find_spacegroup_by_name(std::string name, double alpha=0., double gamma=0., const char *prefer=nullptr)
If angles alpha and gamma are provided, they are used to distinguish hexagonal and rhombohedral setti...
void add_distinct_altlocs(const Residue &res, std::string &altlocs)
CoorFormat
File format of a macromolecular model.
std::string join_str(T begin, T end, const S &sep, const F &getter)
bool in_vector(const T &x, const std::vector< T > &v)
std::string cat(Args const &... args)
CalcFlag
corresponds to _atom_site.calc_flag in mmCIF
void fail(const std::string &msg)
AtomAddress make_address(const Chain &ch, const Residue &res, const Atom &at)
constexpr double u_to_b()
constexpr int ialpha4_id(const char *s)
void vector_move_extend(std::vector< T > &dst, std::vector< T > &&src)
SeqId – residue number and insertion code together.
Span - span of array or std::vector. MutableVectorSpan - span of std::vector with insert() and erase(...
Represents atom site in macromolecular structure (~100 bytes).
bool altloc_matches(char request) const
const std::string & group_key() const
std::string padded_name() const
bool same_conformer(const Atom &other) const
char altloc_or(char null_char) const
static const char * what()
Residue * find_residue(const ResidueId &rid)
UniqProxy< Residue > first_conformer()
const Residue * next_residue(const Residue &res) const
ConstResidueSpan get_waters() const
ResidueSpan get_polymer()
ConstResidueSpan get_residue_span(F &&func) const
ResidueSpan get_ligands()
std::vector< Residue > residues
const std::vector< Residue > & children() const
ResidueSpan get_subchain(const std::string &s)
Chain(const std::string &name_) noexcept
const Residue * find_residue(const ResidueId &rid) const
ConstUniqProxy< Residue > first_conformer() const
Residue * find_or_add_residue(const ResidueId &rid)
ResidueGroup find_residue_group(SeqId id)
ResidueSpan get_residue_span(F &&func)
ConstResidueGroup find_residue_group(SeqId id) const
bool is_first_in_group(const Residue &res) const
ConstResidueSpan get_polymer() const
std::vector< ConstResidueSpan > subchains() const
ConstResidueSpan get_subchain(const std::string &s) const
std::vector< ResidueSpan > subchains()
ConstResidueSpan get_ligands() const
ConstResidueSpan whole() const
const Residue * previous_residue(const Residue &res) const
std::vector< Residue > & children()
void append_residues(std::vector< Residue > new_resi, int min_sep=0)
static const char * what()
const Residue & by_resname(const std::string &name)
ConstResidueGroup()=default
ConstResidueGroup(ConstResidueSpan &&sp)
ConstResidueGroup find_residue_group(SeqId id) const
SeqId label_seq_id_to_auth(SeqId::OptionalNum label_seq_id) const
std::vector< std::string > extract_sequence() const
ConstResidueSpan(Parent &&span)
SeqId::OptionalNum auth_seq_id_to_label(SeqId auth_seq_id) const
ConstUniqProxy< Residue, ConstResidueSpan > first_conformer() const
const std::string & subchain_id() const
SeqId::OptionalNum extreme_num(bool label, int sign) const
const char * uname() const
ResidueGroup find_residue_group(const std::string &chain_name, SeqId seqid)
void remove_chain(const std::string &chain_name)
ConstCraProxy all() const
std::vector< std::string > get_all_residue_names() const
std::map< std::string, std::string > subchain_to_chain() const
Residue * find_residue(const std::string &chain_name, const ResidueId &rid)
const Residue * find_residue(const std::string &chain_name, const ResidueId &rid) const
ConstResidueSpan get_subchain(const std::string &sub_name) const
const Chain * find_chain(const std::string &chain_name) const
const std::vector< Chain > & children() const
ResidueSpan get_subchain(const std::string &sub_name)
const_CRA find_cra(const AtomAddress &address, bool ignore_segment=false) const
std::vector< ConstResidueSpan > subchains() const
static const char * what()
const Atom * find_atom(const AtomAddress &address) const
std::vector< Chain > chains
std::vector< ResidueSpan > subchains()
Chain * find_chain(const std::string &chain_name)
Chain * find_last_chain(const std::string &chain_name)
void merge_chain_parts(int min_sep=0)
Atom * find_atom(const AtomAddress &address)
std::bitset<(size_t) El::END > present_elements() const
CRA find_cra(const AtomAddress &address, bool ignore_segment=false)
std::array< int, 3 > get_indices(const Chain *c, const Residue *r, const Atom *a) const
Residue & sole_residue(const std::string &chain_name, SeqId seqid)
std::vector< Chain > & children()
typename Span< Residue >::iterator iterator
std::vector< typename Span< Residue >::value_type > vector_type
MutableVectorSpan< Item > sub(Iter first, Iter last)
MutableVectorSpan< Residue > subspan(F &&func)
Non-crystallographic symmetry operation (such as in the MTRIXn record)
options affecting how pdb file is read
Coordinates in Angstroms - orthogonal (Cartesian) coordinates.
void remove_residue(const std::string &name)
ResidueGroup(ResidueSpan &&span)
Residue & by_resname(const std::string &name)
GroupingProxy residue_groups()
ConstUniqProxy< Residue, ResidueSpan > first_conformer() const
std::vector< std::string > extract_sequence() const
SeqId::OptionalNum auth_seq_id_to_label(SeqId auth_seq_id) const
ResidueSpan(vector_type &v, iterator begin, std::size_t n)
ResidueGroup find_residue_group(SeqId id)
SeqId::OptionalNum extreme_num(bool label, int sign) const
SeqId label_seq_id_to_auth(SeqId::OptionalNum label_seq_id) const
UniqProxy< Residue, ResidueSpan > first_conformer()
const std::string & subchain_id() const
ResidueSpan(Parent &&span)
const Atom * get_p() const
bool same_conformer(const Residue &other) const
Atom * find_atom(const std::string &atom_name, char altloc, El el=El::X, bool strict_altloc=true)
El::X means anything; in strict_altloc mode, '*' = any altloc, otherwise it's \0.
AtomGroup get(const std::string &atom_name)
bool is_water() const
Convenience function that duplicates functionality from resinfo.hpp.
std::vector< Atom > & children()
const Atom * find_atom(const std::string &atom_name, char altloc, El el=El::X, bool strict_altloc=true) const
SiftsUnpResidue sifts_unp
UniqProxy< Atom > first_conformer()
ConstUniqProxy< Atom > first_conformer() const
const std::vector< Atom > & children() const
const Atom * get_o3prim() const
Residue(const ResidueId &rid) noexcept
const Atom * get_ca() const
std::vector< Atom > atoms
static const char * what()
const Atom * find_by_element(El el) const
const Atom * get_c() const
Residue empty_copy() const
std::vector< Atom >::iterator find_atom_iter(const std::string &atom_name, char altloc, El el=El::X)
const Atom * get_n() const
Atom & sole_atom(const std::string &atom_name)
OptionalInt< INT_MIN > OptionalNum
Reference to UniProt residue, based on _pdbx_sifts_xref_db.
Span< V > subspan(F &&func)
const_iterator begin() const
const_iterator end() const
typename std::remove_cv< Item >::type value_type
const Model * find_model(int model_num) const
const std::vector< Model > & children() const
void merge_chain_parts(int min_sep=0)
std::vector< Sheet > sheets
std::vector< Connection > connections
const Entity * get_entity_of(const ConstResidueSpan &sub) const
std::vector< ModRes > mod_residues
bool ncs_not_expanded() const
Entity * get_entity_of(const ConstResidueSpan &sub)
std::vector< Assembly > assemblies
const Entity * get_entity(const std::string &ent_id) const
const Model & first_model() const
const Connection * find_connection_by_name(const std::string &conn_name) const
std::vector< Model > models
void remove_empty_chains()
Model * find_model(int model_num)
Entity * get_entity(const std::string &ent_id)
std::vector< Model > & children()
Assembly * find_assembly(const std::string &assembly_id)
std::vector< Helix > helices
double get_ncs_multiplier() const
void add_conect_one_way(int serial_a, int serial_b, int order)
Connection * find_connection(const AtomAddress &a1, const AtomAddress &a2)
std::map< int, std::vector< int > > conect_map
Connection * find_connection_by_cra(const const_CRA &cra1, const const_CRA &cra2, bool ignore_segment=false)
Structure empty_copy() const
std::vector< CisPep > cispeps
Model & find_or_add_model(int model_num)
char ter_status
in input PDB file: y = TER records were read, e = errors were detected
static const char * what()
double resolution
simplistic resolution value from/for REMARK 2
bool has_origx
Store ORIGXn / _database_PDB_matrix.origx*.
const std::string & get_info(const std::string &tag) const
size_t ncs_given_count() const
const SpaceGroup * find_spacegroup() const
std::map< std::string, std::string > info
Minimal metadata with keys being mmcif tags: _entry.id, _cell.Z_PDB, ...
std::vector< std::string > raw_remarks
original REMARK records stored if the file was read from the PDB format
std::string spacegroup_hm
std::vector< std::pair< std::string, std::string > > shortened_ccd_codes
Mapping of long (4+) CCD codes (residue names) to PDB-compatible ones.
void add_conect(int serial1, int serial2, int order)
Connection * find_connection_by_name(const std::string &conn_name)
std::vector< Entity > entities
void add_ncs_images_to_cs_images(const std::vector< NcsOp > &ncs)
void set_cell_images_from_spacegroup(const SpaceGroup *sg)
Crystallographic Symmetry. Space Groups. Coordinate Triplets.
Utilities. Mostly for working with strings and vectors.