Gemmi C++ API
Loading...
Searching...
No Matches
resinfo.hpp
Go to the documentation of this file.
1// Copyright 2018 Global Phasing Ltd.
2//
3// List of common residues with basic data.
4
5#ifndef GEMMI_RESINFO_HPP_
6#define GEMMI_RESINFO_HPP_
7
8#include <cstdint> // for uint8_t
9#include <string>
10#include <vector>
11#include "fail.hpp"
12
13namespace gemmi {
14
15// Simple approximate classification.
16// AA - aminoacid
17// AAD - D-aminoacid
18// PAA - proline-like aminoacid
19// MAA - methylated aminoacid
20// RNA, DNA - nucleic acids
21// HOH - water or heavy water (OH, H3O, D3O are not included here)
22// PYR - pyranose according to the refmac dictionary
23// KET - ketopyranose according to the refmac dictionary
24// BUF - agent from crystallization buffer according to PISA agents.dat
25// ELS - something else (ligand).
26enum class ResidueKind : unsigned char {
27 // when changing this list update check_polymer_type()
28 UNKNOWN=0, AA, AAD, PAA, MAA, RNA, DNA, BUF, HOH, PYR, KET, ELS
29};
30
32 char name[8];
34 // linking type: 0=n/a, 1=peptide-linking, 2=nucl.-linking, 3=(2|1)
35 std::uint8_t linking_type;
36 // one-letter code or space (uppercase iff it is a standard residues)
38 // rough count of hydrogens used to estimate mass with implicit hydrogens
39 std::uint8_t hydrogen_count;
40 // molecular weight
41 float weight;
42
43 bool found() const { return kind != ResidueKind::UNKNOWN; }
44 bool is_water() const { return kind == ResidueKind::HOH; }
45 bool is_dna() const { return kind == ResidueKind::DNA; }
46 bool is_rna() const { return kind == ResidueKind::RNA; }
47 bool is_nucleic_acid() const { return is_dna() || is_rna(); }
48 bool is_amino_acid() const {
49 return kind == ResidueKind::AA || kind == ResidueKind::AAD ||
51 }
52 bool is_buffer_or_water() const {
54 }
55 // PDB format has non-standard residues (modified AA) marked as HETATM.
56 bool is_standard() const { return (one_letter_code & 0x20) == 0; }
57 char fasta_code() const { return is_standard() ? one_letter_code : 'X'; }
58 bool is_peptide_linking() const { return (linking_type & 1); }
59 bool is_na_linking() const { return (linking_type & 2); }
60};
61
63GEMMI_DLL size_t find_tabulated_residue_idx(const std::string& name);
65
67inline const char* expand_one_letter(char c, ResidueKind kind) {
68 static const char* names =
69 // amino-acids (all letters but J are used)
70 "ALA\0ASX\0CYS\0ASP\0GLU\0PHE\0GLY\0HIS\0ILE\0\0 LYS\0LEU\0MET\0" // A-M
71 "ASN\0PYL\0PRO\0GLN\0ARG\0SER\0THR\0SEC\0VAL\0TRP\0UNK\0TYR\0GLX\0" // N-Z
72 // DNA
73 "DA\0 \0\0 DC\0 \0\0 \0\0 \0\0 DG\0 \0\0 DI\0 \0\0 \0\0 \0\0 \0\0 " // A-M
74 "DN\0 \0\0 \0\0 \0\0 \0\0 \0\0 DT\0 DU\0 \0\0 \0\0 \0\0 \0\0 \0\0 "; // N-Z
75 c &= ~0x20;
76 const char* ret = nullptr;
77 if (c >= 'A' && c <= 'Z') {
78 ret = &names[4 * (c - 'A')];
79 if (kind == ResidueKind::AA) {
80 // ret is already set
81 } else if (kind == ResidueKind::DNA) {
82 ret += 4 * 26;
83 } else if (kind == ResidueKind::RNA && c != 'T') {
84 ret += 4 * 26 + 1;
85 } else {
86 ret = nullptr;
87 }
88 }
89 return (ret && *ret) ? ret : nullptr;
90}
91
93GEMMI_DLL std::vector<std::string> expand_one_letter_sequence(const std::string& seq,
94 ResidueKind kind);
95
96// deprecated
97inline const char* expand_protein_one_letter(char c) {
99}
100// deprecated
101GEMMI_DLL std::vector<std::string> expand_protein_one_letter_string(const std::string& s);
102
103
104} // namespace gemmi
105#endif
fail(), unreachable() and __declspec/__attribute__ macros
#define GEMMI_DLL
Definition fail.hpp:53
GEMMI_DLL std::vector< std::string > expand_protein_one_letter_string(const std::string &s)
const char * expand_one_letter(char c, ResidueKind kind)
kind can be AA, RNA or DNA
Definition resinfo.hpp:67
GEMMI_DLL ResidueInfo & find_tabulated_residue(const std::string &name)
GEMMI_DLL size_t find_tabulated_residue_idx(const std::string &name)
const char * expand_protein_one_letter(char c)
Definition resinfo.hpp:97
GEMMI_DLL ResidueInfo & get_residue_info(size_t idx)
GEMMI_DLL std::vector< std::string > expand_one_letter_sequence(const std::string &seq, ResidueKind kind)
kind can be AA, RNA or DNA
ResidueKind
Definition resinfo.hpp:26
bool is_nucleic_acid() const
Definition resinfo.hpp:47
bool is_na_linking() const
Definition resinfo.hpp:59
bool is_peptide_linking() const
Definition resinfo.hpp:58
char fasta_code() const
Definition resinfo.hpp:57
bool is_standard() const
Definition resinfo.hpp:56
ResidueKind kind
Definition resinfo.hpp:33
bool is_water() const
Definition resinfo.hpp:44
bool is_amino_acid() const
Definition resinfo.hpp:48
bool found() const
Definition resinfo.hpp:43
bool is_rna() const
Definition resinfo.hpp:46
bool is_dna() const
Definition resinfo.hpp:45
bool is_buffer_or_water() const
Definition resinfo.hpp:52
std::uint8_t hydrogen_count
Definition resinfo.hpp:39
std::uint8_t linking_type
Definition resinfo.hpp:35