OpenStructure
Loading...
Searching...
No Matches
mmcif_reader.hh
Go to the documentation of this file.
1//------------------------------------------------------------------------------
2// This file is part of the OpenStructure project <www.openstructure.org>
3//
4// Copyright (C) 2008-2020 by the OpenStructure authors
5//
6// This library is free software; you can redistribute it and/or modify it under
7// the terms of the GNU Lesser General Public License as published by the Free
8// Software Foundation; either version 3.0 of the License, or (at your option)
9// any later version.
10// This library is distributed in the hope that it will be useful, but WITHOUT
11// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13// details.
14//
15// You should have received a copy of the GNU Lesser General Public License
16// along with this library; if not, write to the Free Software Foundation, Inc.,
17// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18//------------------------------------------------------------------------------
19#ifndef OST_MMCIF_READER_HH
20#define OST_MMCIF_READER_HH
21
22#include <map>
23
24#include <ost/geom/geom.hh>
27#include <ost/mol/chain_type.hh>
33
34namespace ost { namespace io {
35
61class DLLEXPORT_OST_IO MMCifReader : public StarParser {
62public:
68
69 MMCifReader(std::istream& stream, mol::EntityHandle& ent_handle,
70 const IOProfile& profile);
71
77 MMCifReader(const String& filename, mol::EntityHandle& ent_handle,
78 const IOProfile& profile);
79
82 void Init();
83
85 void ClearState();
86
90 void SetRestrictChains(const String& restrict_chains);
91
93 {
94 return restrict_chains_;
95 }
96
101 void SetAuthChainID(bool id)
102 {
103 auth_chain_id_ = id;
104 }
105
112 virtual bool OnBeginData(const StringRef& data_name);
113
119 virtual bool OnBeginLoop(const StarLoopDesc& header); // tested
120
125 virtual void OnDataRow(const StarLoopDesc& header,
126 const std::vector<StringRef>& columns);
127
129 virtual void OnEndData();
130
135
139 const MMCifInfo& GetInfo() { return info_; }
140
141protected:
148 void TryStoreIdx(const int mapping,
149 const String& item,
150 const StarLoopDesc& header)
151 {
152 indices_[mapping] = header.GetIndex(item);
153
154 if (indices_[mapping] == -1) {
155 throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
156 "No item '" + item +
157 "' found in '" +
158 header.GetCategory()+
159 "' header",
160 this->GetCurrentLinenum()));
161 }
162 } // tested
163
177 bool ParseAtomIdent(const std::vector<StringRef>& columns,
178 String& auth_chain_name,
179 String& cif_chain_name,
180 StringRef& res_name,
181 mol::ResNum& resnum,
182 bool& valid_res_num,
183 StringRef& atom_name,
184 char& alt_loc);
185
189 void ParseAndAddAtom(const std::vector<StringRef>& columns);
190
194 void ParseEntity(const std::vector<StringRef>& columns);
195
199 void ParseEntityPoly(const std::vector<StringRef>& columns);
200
204 void ParseCitation(const std::vector<StringRef>& columns);
205
206 const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
207
211 void ParseCitationAuthor(const std::vector<StringRef>& columns);
212
214 void ParseStructRef(const std::vector<StringRef>& columns);
215
217 void ParseStructRefSeq(const std::vector<StringRef>& columns);
218
220 void ParseStructRefSeqDif(const std::vector<StringRef>& columns);
224 void ParseExptl(const std::vector<StringRef>& columns);
225
229 void ParseRefine(const std::vector<StringRef>& columns);
230
234 void ParsePdbxStructAssembly(const std::vector<StringRef>& columns);
235
239 void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns);
240
241 std::vector<std::vector<String> > UnPackOperExperession(StringRef expression);
242
243 void StoreExpression(const char* l, const char* s,
244 bool& is_range, int lborder,
245 std::vector<String>& single_block);
246
247 void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder,
248 std::vector<String>& single_block);
249
253 void ParsePdbxStructOperList(const std::vector<StringRef>& columns);
254
258 void ParseDatabasePDBRev(const std::vector<StringRef>& columns);
259
263 void ParsePdbxAuditRevisionHistory(const std::vector<StringRef>& columns);
264
268 void ParsePdbxAuditRevisionDetails(const std::vector<StringRef>& columns);
269
273 void ParsePdbxDatabaseStatus(const std::vector<StringRef>& columns);
274
278 void ParseStruct(const std::vector<StringRef>& columns);
279
283 void ParseStructConf(const std::vector<StringRef>& columns);
284
288 void ParseStructSheetRange(const std::vector<StringRef>& columns);
289
293 void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns);
294
298 void ParsePdbxEntityBranch(const std::vector<StringRef>& columns);
299
303 void ParsePdbxEntityBranchLink(const std::vector<StringRef>& columns);
304
308 void ParseEntityPolySeq(const std::vector<StringRef>& columns);
309
313 void ParseEm3DReconstruction(const std::vector<StringRef>& columns);
314
316 typedef enum {
317 MMCIF_HELIX,
318 MMCIF_STRAND,
319 MMCIF_TURN,
320 MMCIF_COIL
321 } MMCifSecStructElement;
322
327
332
333private:
335 typedef enum {
336 MAX_ITEMS_IN_ROW=19
337 } MMCifMagicNos;
338
340 typedef enum {
341 AUTH_ASYM_ID,
342 AS_ID,
343 LABEL_ALT_ID,
344 LABEL_ASYM_ID,
345 LABEL_ATOM_ID,
346 LABEL_COMP_ID,
347 LABEL_ENTITY_ID,
348 LABEL_SEQ_ID,
349 AUTH_SEQ_ID,
350 TYPE_SYMBOL,
351 CARTN_X,
352 CARTN_Y,
353 CARTN_Z,
354 OCCUPANCY,
355 B_ISO_OR_EQUIV,
356 PDBX_PDB_INS_CODE,
357 GROUP_PDB,
358 PDBX_PDB_MODEL_NUM,
359 FORMAL_CHARGE
360 } AtomSiteItems;
361
363 typedef enum {
364 E_ID,
365 E_TYPE,
366 PDBX_DESCRIPTION
367 } EntityItems;
368
370 typedef enum {
371 ENTITY_ID,
372 EP_TYPE,
373 PDBX_SEQ_ONE_LETTER_CODE,
374 PDBX_SEQ_ONE_LETTER_CODE_CAN
375 } EntityPolyItems;
376
378 typedef enum {
379 CITATION_ID,
380 ABSTRACT_ID_CAS,
381 BOOK_ID_ISBN,
382 BOOK_TITLE,
383 BOOK_PUBLISHER,
384 BOOK_PUBLISHER_CITY,
385 JOURNAL_ABBREV,
386 JOURNAL_VOLUME,
387 PAGE_FIRST,
388 PAGE_LAST,
389 PDBX_DATABASE_ID_DOI,
390 PDBX_DATABASE_ID_PUBMED,
391 YEAR,
392 TITLE
393 } CitationItems;
394
396 typedef enum {
397 AUTHOR_CITATION_ID,
398 AUTHOR_NAME,
399 ORDINAL
400 } CitationAuthorItems;
401
403 typedef enum {
404 EXPTL_ENTRY_ID,
405 METHOD
406 } ExptlItems;
407
409 typedef enum {
410 REFINE_ENTRY_ID,
411 LS_D_RES_HIGH,
412 LS_D_RES_LOW,
413 LS_R_FACTOR_R_WORK,
414 LS_R_FACTOR_R_FREE
415 } RefineItems;
416
418 typedef enum {
419 PSA_DETAILS,
420 PSA_ID,
421 METHOD_DETAILS
422 } PdbxStructAssemblyItems;
423
424 // \enum items of the struct_ref category
425 typedef enum {
426 SR_ENTITY_ID,
427 SR_ID,
428 SR_DB_CODE,
429 SR_DB_NAME,
430 SR_DB_ACCESS
431 } StructRefItems;
432
434 typedef enum {
435 SRS_ALIGN_ID,
436 SRS_STRUCT_REF_ID,
437 SRS_PDBX_STRAND_ID,
438 SRS_DB_ALIGN_BEG,
439 SRS_DB_ALIGN_END,
440 SRS_ENT_ALIGN_BEG,
441 SRS_ENT_ALIGN_END
442 } StructRefSeqItems;
443
445 typedef enum {
446 SRSD_ALIGN_ID,
447 SRSD_SEQ_RNUM,
448 SRSD_DB_RNUM,
449 SRSD_DETAILS
450 } StructRefSeqDifItems;
451
453 typedef enum {
454 ASSEMBLY_ID,
455 ASYM_ID_LIST,
456 OPER_EXPRESSION
457 } PdbxStructAssemblyGenItems;
458
460 typedef enum {
461 PSOL_ID,
462 PSOL_TYPE,
463 VECTOR_1,
464 VECTOR_2,
465 VECTOR_3,
466 MATRIX_1_1,
467 MATRIX_1_2,
468 MATRIX_1_3,
469 MATRIX_2_1,
470 MATRIX_2_2,
471 MATRIX_2_3,
472 MATRIX_3_1,
473 MATRIX_3_2,
474 MATRIX_3_3
475 } PdbxStructOperListItems;
476
478 typedef enum {
479 STRUCT_ENTRY_ID,
480 PDBX_CASP_FLAG,
481 PDBX_DESCRIPTOR,
482 PDBX_FORMULA_WEIGHT,
483 PDBX_FORMULA_WEIGHT_METHOD,
484 PDBX_MODEL_DETAILS,
485 PDBX_MODEL_TYPE_DETAILS,
486 STRUCT_TITLE
487 } StructItems;
488
490 typedef enum {
491 SC_BEG_AUTH_ASYM_ID,
492 SC_BEG_LABEL_ASYM_ID,
493 SC_BEG_LABEL_COMP_ID,
494 SC_BEG_LABEL_SEQ_ID,
495 SC_CONF_TYPE_ID,
496 SC_END_AUTH_ASYM_ID,
497 SC_END_LABEL_ASYM_ID,
498 SC_END_LABEL_COMP_ID,
499 SC_END_LABEL_SEQ_ID,
500 SC_ID,
501 } StructConfItems;
502
504 typedef enum {
505 SSR_BEG_LABEL_ASYM_ID,
506 SSR_BEG_LABEL_COMP_ID,
507 SSR_BEG_LABEL_SEQ_ID,
508 SSR_END_LABEL_ASYM_ID,
509 SSR_END_LABEL_COMP_ID,
510 SSR_END_LABEL_SEQ_ID,
511 SSR_SHEET_ID,
512 SSR_ID,
513 SSR_BEG_AUTH_ASYM_ID,
514 SSR_END_AUTH_ASYM_ID,
515 } StructSheetRangeItems;
516
518 typedef enum {
519 DATE,
520 PDPOS_ID,
521 PDB_ID,
522 REPLACE_PDB_ID,
523 } PdbxDatabasePDBObsSpr;
524
526 typedef enum {
527 DPI_NUM,
528 DPI_DATE,
529 DPI_DATE_ORIGINAL,
530 DPI_STATUS,
531 } DatabasePDBRevItems;
532
534 typedef enum {
535 PARH_ORDINAL,
536 PARH_REVISION_DATE,
537 PARH_MAJOR,
538 PARH_MINOR,
539 } PdbxAuditRevisionHistoryItems;
540
542 typedef enum {
543 PARD_REVISION_ORDINAL,
544 PARD_TYPE,
545 } PdbxAuditRevisionDetailsItems;
546
548 typedef enum {
549 PDS_RECVD_INITIAL_DEPOSITION_DATE,
550 } PdbxDatabaseStatusItems;
551
553 typedef enum {
554 BR_ENTITY_ID,
555 BR_ENTITY_TYPE
556 } EntityBranchItems;
557
559 typedef enum {
560 BL_ENTITY_ID,
561 BL_ATOM_ID_1,
562 BL_ATOM_ID_2,
563 BL_COMP_ID_1,
564 BL_COMP_ID_2,
565 BL_ENTITY_BRANCH_LIST_NUM_1,
566 BL_ENTITY_BRANCH_LIST_NUM_2,
567 BL_ATOM_STEREO_CONFIG_1,
568 BL_ATOM_STEREO_CONFIG_2,
569 BL_VALUE_ORDER
570 } EntityBranchLinkItems;
571
573 typedef enum {
574 EPS_ENTITY_ID,
575 EPS_MON_ID,
576 EPS_NUM,
577 EPS_HETERO
578 } EntityPolySeqItems;
579
581 typedef enum {
582 EM_RESOLUTION
583 } Em3DReconstructionItems;
584
586 typedef enum {
587 ATOM_SITE,
588 ENTITY,
589 ENTITY_POLY,
590 CITATION,
591 CITATION_AUTHOR,
592 EXPTL,
593 REFINE,
594 PDBX_STRUCT_ASSEMBLY,
595 PDBX_STRUCT_ASSEMBLY_GEN,
596 PDBX_STRUCT_OPER_LIST,
597 STRUCT,
598 STRUCT_CONF,
599 STRUCT_SHEET_RANGE,
600 PDBX_DATABASE_PDB_OBS_SPR,
601 STRUCT_REF,
602 STRUCT_REF_SEQ,
603 STRUCT_REF_SEQ_DIF,
604 DATABASE_PDB_REV,
605 PDBX_AUDIT_REVISION_HISTORY,
606 PDBX_AUDIT_REVISION_DETAILS,
607 PDBX_DATABASE_STATUS,
608 PDBX_ENTITY_BRANCH,
609 PDBX_ENTITY_BRANCH_LINK,
610 ENTITY_POLY_SEQ,
611 EM_3D_RECONSTRUCTION,
613 } MMCifCategory;
614
618 MMCifEntityDescMap::iterator GetEntityDescMapIterator(const String& entity_id);
619
621 typedef struct {
622 String biounit_id;
623 std::vector<String> chains;
625 std::vector<std::vector<String> > operations;
627 } MMCifBioUAssembly;
628 typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector;
629
630 typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > >
631 MMCifCitationAuthorMap;
632
634 typedef struct {
635 mol::ResNum start;
636 mol::ResNum end;
637 String chain_name;
638 } MMCifHSEntry;
639 typedef std::vector<MMCifHSEntry> MMCifHSVector;
640
642 typedef struct {
643 String details;
644 String method_details;
645 } MMCifPSAEntry;
646 typedef std::map<String, MMCifPSAEntry> MMCifPSAMap;
647
649 struct MMCifRevisionDesc {
650 // silly GCC note: major() & minor() exist as macros...facepalm
651 MMCifRevisionDesc(int _num, const String& _date, int _major, int _minor)
652 : date(_date) {
653 num = _num;
654 major = _major;
655 minor = _minor;
656 }
657 int num;
658 String date;
659 int major;
660 int minor;
661 };
662
664 typedef struct {
665 int res_num_1;
666 String cmp_1;
667 String atm_nm_1;
668 int res_num_2;
669 String cmp_2;
670 String atm_nm_2;
671 unsigned char bond_order;
672 } MMCifPdbxEntityBranchLink;
673 typedef std::map<String, std::vector<MMCifPdbxEntityBranchLink> >
674 MMCifPdbxEntityBranchLinkMap;
675
676 // members
677 MMCifCategory category_;
678 int category_counts_[DONT_KNOW+1];
679 int indices_[MAX_ITEMS_IN_ROW];
680 const IOProfile& profile_;
681 mol::EntityHandle& ent_handle_;
682 String restrict_chains_;
683 bool auth_chain_id_;
684 mol::ChainHandle curr_chain_;
685 mol::ResidueHandle curr_residue_;
686 int chain_count_;
687 int residue_count_;
688 int atom_count_;
689 bool warned_name_mismatch_;
690 bool warned_rule_based_;
691 String subst_res_id_;
692 bool has_model_;
693 int curr_model_;
694 std::set<int> warned_ignored_model_; // keep track of ignored model warnings
695 std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_;
697 MMCifEntityDescMap entity_desc_map_;
698 MMCifInfo info_;
699 MMCifCitationAuthorMap authors_map_;
700 MMCifBioUAssemblyVector bu_assemblies_;
701 MMCifPSAMap bu_origin_map_;
702 MMCifHSVector helix_list_;
703 MMCifHSVector strand_list_;
704 MMCifInfoStructRefs struct_refs_;
705 // for storing revisions
706 std::vector<MMCifRevisionDesc> revisions_;
707 std::map<int, String> revision_types_;
708 bool database_PDB_rev_added_;
709 // for entity_branch connections
710 MMCifPdbxEntityBranchLinkMap entity_branch_link_map_;
711 // for storing entity_poly_seq
712 std::map<String, std::map<int, String> > entity_poly_seq_map_;
713 std::map<String, std::vector<std::pair<int, String> > > entity_poly_seq_h_map_;
714 String data_block_name_;
715};
716
722 const StringRef value_order);
723
728 const unsigned char bond_order);
729}}
730
731#endif
732
733// LocalWords: MMCifEntityDescMap
convenient datatype for referencing character data
Definition string_ref.hh:39
container class for additional information from MMCif files
void ParseExptl(const std::vector< StringRef > &columns)
Fetch mmCIF exptl information.
MMCifReader(std::istream &stream, mol::EntityHandle &ent_handle, const IOProfile &profile)
create a MMCifReader
void ParsePdbxDatabaseStatus(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_database_status information.
virtual bool OnBeginData(const StringRef &data_name)
check mmcif input to be read. Substitutional function for StarParser.
void ParsePdbxStructAssembly(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_struct_assembly information.
void ParseStruct(const std::vector< StringRef > &columns)
Fetch mmCIF struct information.
void ParseStructRef(const std::vector< StringRef > &columns)
\ brief parse a row in the struct_ref category
void Init()
Initialise the reader.
void ParsePdbxAuditRevisionDetails(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_audit_revision_details information.
MMCifSecStructElement DetermineSecStructType(const StringRef &type) const
Check whether an element was classified sheet or helix.
bool ParseAtomIdent(const std::vector< StringRef > &columns, String &auth_chain_name, String &cif_chain_name, StringRef &res_name, mol::ResNum &resnum, bool &valid_res_num, StringRef &atom_name, char &alt_loc)
fetch values identifying atoms
const MMCifInfoStructRefs & GetStructRefs() const
virtual void OnEndData()
Finalise parsing.
void ParsePdbxStructOperList(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_struct_oper_list information.
void ParseEntityPoly(const std::vector< StringRef > &columns)
Fetch mmCIF entity_poly information.
void StoreExpression(const char *l, const char *s, bool &is_range, int lborder, std::vector< String > &single_block)
virtual void OnDataRow(const StarLoopDesc &header, const std::vector< StringRef > &columns)
read a row of data
void ParseEntity(const std::vector< StringRef > &columns)
Fetch mmCIF entity information.
const String & GetRestrictChains() const
void SetAuthChainID(bool id)
Enable or disable reading of auth_chain_id instead aof label_chain id (default)
void SetRestrictChains(const String &restrict_chains)
Set names of restricted chains for the reader.
void AssignSecStructure(mol::EntityHandle ent)
Transform data from struct_conf entry into secondary structure.
void ParseEntityPolySeq(const std::vector< StringRef > &columns)
Fetch mmCIF entity_poly_seq information.
void ParseStructRefSeqDif(const std::vector< StringRef > &columns)
parse row in the struct_ref_seq_dif category
void ParsePdbxEntityBranchLink(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_entity_branch_link information.
seq::SequenceList GetSeqRes() const
Return sequences.
void ParsePdbxStructAssemblyGen(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_struct_assembly_gen information.
void ParseDatabasePDBRev(const std::vector< StringRef > &columns)
Fetch mmCIF database_PDB_rev information.
void StoreRange(const char *&l, const char *s, bool &is_range, int &lborder, std::vector< String > &single_block)
void ClearState()
Set up a fresh instance.
void ParsePdbxAuditRevisionHistory(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_audit_revision_history information.
void ParseCitationAuthor(const std::vector< StringRef > &columns)
Fetch mmCIF citation_author information.
void ParseStructConf(const std::vector< StringRef > &columns)
Fetch mmCIF struct_conf (secondary structure) information.
void ParsePdbxEntityBranch(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_entity_branch information.
void ParsePdbxDatabasePdbObsSpr(const std::vector< StringRef > &columns)
Fetch mmCIF pdbx_database_PDB_obs_spr information.
const MMCifInfo & GetInfo()
Get additional information of the mmCIF file.
std::vector< std::vector< String > > UnPackOperExperession(StringRef expression)
void ParseAndAddAtom(const std::vector< StringRef > &columns)
Fetch atom information and store it.
void TryStoreIdx(const int mapping, const String &item, const StarLoopDesc &header)
Store an item index from loop header in preparation for reading a row. Throws an exception if the ite...
MMCifReader(const String &filename, mol::EntityHandle &ent_handle, const IOProfile &profile)
create a MMCifReader
void ParseStructRefSeq(const std::vector< StringRef > &columns)
parse row in the struct_ref_seq category
virtual bool OnBeginLoop(const StarLoopDesc &header)
check if a current loop is to be parsed
void ParseCitation(const std::vector< StringRef > &columns)
Fetch mmCIF citation information.
void ParseRefine(const std::vector< StringRef > &columns)
Fetch mmCIF refine information.
void ParseStructSheetRange(const std::vector< StringRef > &columns)
Fetch mmCIF struct_sheet_range (beta sheets) information.
void ParseEm3DReconstruction(const std::vector< StringRef > &columns)
Fetch mmCIF entity_poly_seq information.
int GetIndex(const String &name) const
const String & GetCategory() const
Protein or molecule.
list of sequences.
#define DLLEXPORT_OST_IO
std::string String
Definition base.hh:54
@ STAR_DIAG_ERROR
DLLEXPORT_OST_IO String OSTBondOrderToMMCifValueOrder(const unsigned char bond_order)
Translate an OST bond_order to mmCIF value_order.
std::vector< MMCifInfoStructRefPtr > MMCifInfoStructRefs
std::map< String, MMCifEntityDesc > MMCifEntityDescMap
DLLEXPORT_OST_IO unsigned char MMCifValueOrderToOSTBondOrder(const StringRef value_order)
Translate mmCIF info on bond type (e.g. pdbx_entity_branch_link.value_order) to OST bond_order.
pointer_it< T > end(const std::vector< T > &values)
Definition base.dox:1