Gemmi C++ API
Loading...
Searching...
No Matches
resinfo.hpp
Go to the documentation of this file.
1// Copyright 2018 Global Phasing Ltd.
2//
3// List of common residues with basic data.
4
5#ifndef GEMMI_RESINFO_HPP_
6#define GEMMI_RESINFO_HPP_
7
8#include <cstdint> // for uint8_t
9#include <string>
10#include <vector>
11#include "fail.hpp"
12
13namespace gemmi {
14
15// Simple approximate classification.
16// AA - aminoacid
17// AAD - D-aminoacid
18// PAA - proline-like aminoacid
19// MAA - methylated aminoacid
20// RNA, DNA - nucleic acids
21// HOH - water or heavy water (OH, H3O, D3O are not included here)
22// PYR - pyranose according to the refmac dictionary
23// KET - ketopyranose according to the refmac dictionary
24// BUF - agent from crystallization buffer according to PISA agents.dat
25// ELS - something else (ligand).
26enum class ResidueKind : unsigned char {
27 // when changing this list update check_polymer_type()
28 UNKNOWN=0, AA, AAD, PAA, MAA, RNA, DNA, BUF, HOH, PYR, KET, ELS
29};
30
33 // linking type: 0=n/a, 1=peptide-linking, 2=nucl.-linking, 3=(2|1)
34 std::uint8_t linking_type;
35 // one-letter code or space (uppercase iff it is a standard residues)
37 // rough count of hydrogens used to estimate mass with implicit hydrogens
38 std::uint8_t hydrogen_count;
39 // molecular weight
40 float weight;
41
42 bool found() const { return kind != ResidueKind::UNKNOWN; }
43 bool is_water() const { return kind == ResidueKind::HOH; }
44 bool is_dna() const { return kind == ResidueKind::DNA; }
45 bool is_rna() const { return kind == ResidueKind::RNA; }
46 bool is_nucleic_acid() const { return is_dna() || is_rna(); }
47 bool is_amino_acid() const {
48 return kind == ResidueKind::AA || kind == ResidueKind::AAD ||
50 }
51 bool is_buffer_or_water() const {
53 }
54 // PDB format has non-standard residues (modified AA) marked as HETATM.
55 bool is_standard() const { return (one_letter_code & 0x20) == 0; }
56 char fasta_code() const { return is_standard() ? one_letter_code : 'X'; }
57 bool is_peptide_linking() const { return (linking_type & 1); }
58 bool is_na_linking() const { return (linking_type & 2); }
59};
60
62
64inline const char* expand_one_letter(char c, ResidueKind kind) {
65 static const char* names =
66 // amino-acids (all letters but J are used)
67 "ALA\0ASX\0CYS\0ASP\0GLU\0PHE\0GLY\0HIS\0ILE\0\0 LYS\0LEU\0MET\0" // A-M
68 "ASN\0PYL\0PRO\0GLN\0ARG\0SER\0THR\0SEC\0VAL\0TRP\0UNK\0TYR\0GLX\0" // N-Z
69 // DNA
70 "DA\0 \0\0 DC\0 \0\0 \0\0 \0\0 DG\0 \0\0 DI\0 \0\0 \0\0 \0\0 \0\0 " // A-M
71 "DN\0 \0\0 \0\0 \0\0 \0\0 \0\0 DT\0 DU\0 \0\0 \0\0 \0\0 \0\0 \0\0 "; // N-Z
72 c &= ~0x20;
73 const char* ret = nullptr;
74 if (c >= 'A' && c <= 'Z') {
75 ret = &names[4 * (c - 'A')];
76 if (kind == ResidueKind::AA) {
77 // ret is already set
78 } else if (kind == ResidueKind::DNA) {
79 ret += 4 * 26;
80 } else if (kind == ResidueKind::RNA && c != 'T') {
81 ret += 4 * 26 + 1;
82 } else {
83 ret = nullptr;
84 }
85 }
86 return (ret && *ret) ? ret : nullptr;
87}
88
90GEMMI_DLL std::vector<std::string> expand_one_letter_sequence(const std::string& seq,
91 ResidueKind kind);
92
93// deprecated
94inline const char* expand_protein_one_letter(char c) {
96}
97// deprecated
98inline std::vector<std::string> expand_protein_one_letter_string(const std::string& s) {
100}
101
102
103} // namespace gemmi
104#endif
#define GEMMI_DLL
Definition fail.hpp:53
const char * expand_one_letter(char c, ResidueKind kind)
kind can be AA, RNA or DNA
Definition resinfo.hpp:64
std::vector< std::string > expand_protein_one_letter_string(const std::string &s)
Definition resinfo.hpp:98
const char * expand_protein_one_letter(char c)
Definition resinfo.hpp:94
GEMMI_DLL std::vector< std::string > expand_one_letter_sequence(const std::string &seq, ResidueKind kind)
kind can be AA, RNA or DNA
ResidueKind
Definition resinfo.hpp:26
GEMMI_DLL ResidueInfo find_tabulated_residue(const std::string &name)
bool is_nucleic_acid() const
Definition resinfo.hpp:46
bool is_na_linking() const
Definition resinfo.hpp:58
bool is_peptide_linking() const
Definition resinfo.hpp:57
char fasta_code() const
Definition resinfo.hpp:56
bool is_standard() const
Definition resinfo.hpp:55
ResidueKind kind
Definition resinfo.hpp:32
bool is_water() const
Definition resinfo.hpp:43
bool is_amino_acid() const
Definition resinfo.hpp:47
bool found() const
Definition resinfo.hpp:42
bool is_rna() const
Definition resinfo.hpp:45
bool is_dna() const
Definition resinfo.hpp:44
bool is_buffer_or_water() const
Definition resinfo.hpp:51
std::uint8_t hydrogen_count
Definition resinfo.hpp:38
std::uint8_t linking_type
Definition resinfo.hpp:34