00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef OST_MMCIF_READER_HH
00020 #define OST_MMCIF_READER_HH
00021
00022 #include <map>
00023
00024 #include <ost/geom/geom.hh>
00025 #include <ost/seq/sequence_list.hh>
00026 #include <ost/mol/residue_handle.hh>
00027 #include <ost/mol/chain_type.hh>
00028 #include <ost/conop/compound_lib.hh>
00029 #include <ost/io/mol/io_profile.hh>
00030 #include <ost/io/io_exception.hh>
00031 #include <ost/io/mol/star_parser.hh>
00032 #include <ost/io/mol/mmcif_info.hh>
00033
00034 namespace ost { namespace io {
00035
00059 class DLLEXPORT_OST_IO MMCifReader : public StarParser {
00060 public:
00066
00067 MMCifReader(std::istream& stream, mol::EntityHandle& ent_handle,
00068 const IOProfile& profile);
00069
00075 MMCifReader(const String& filename, mol::EntityHandle& ent_handle,
00076 const IOProfile& profile);
00077
00080 void Init();
00081
00083 void ClearState();
00084
00088 void SetRestrictChains(const String& restrict_chains);
00089
00095 void SetReadCanonicalSeqRes(bool flag)
00096 {
00097 seqres_can_ = flag;
00098 }
00099
00100 const String& GetRestrictChains() const
00101 {
00102 return restrict_chains_;
00103 }
00104
00109 void SetAuthChainID(bool id)
00110 {
00111 auth_chain_id_ = id;
00112 }
00113
00120 virtual bool OnBeginData(const StringRef& data_name);
00121
00127 virtual bool OnBeginLoop(const StarLoopDesc& header);
00128
00133 virtual void OnDataRow(const StarLoopDesc& header,
00134 const std::vector<StringRef>& columns);
00135
00137 virtual void OnEndData();
00138
00142 seq::SequenceList GetSeqRes() const {
00143 return seqres_;
00144 }
00145
00149 void SetReadSeqRes(bool flag)
00150 {
00151 read_seqres_ = flag;
00152 }
00153
00157 bool GetReadSeqRes() const
00158 {
00159 return read_seqres_;
00160 }
00161
00165 const MMCifInfo& GetInfo() { return info_; }
00166
00167 protected:
00174 void TryStoreIdx(const int mapping,
00175 const String& item,
00176 const StarLoopDesc& header)
00177 {
00178 indices_[mapping] = header.GetIndex(item);
00179
00180 if (indices_[mapping] == -1) {
00181 throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
00182 "No item '" + item +
00183 "' found in '" +
00184 header.GetCategory()+
00185 "' header",
00186 this->GetCurrentLinenum()));
00187 }
00188 }
00189
00195 bool IsValidPDBIdent(const StringRef& pdbid);
00196
00210 bool ParseAtomIdent(const std::vector<StringRef>& columns,
00211 String& auth_chain_name,
00212 String& cif_chain_name,
00213 StringRef& res_name,
00214 mol::ResNum& resnum,
00215 bool& valid_res_num,
00216 StringRef& atom_name,
00217 char& alt_loc);
00218
00222 void ParseAndAddAtom(const std::vector<StringRef>& columns);
00223
00227 void ParseEntity(const std::vector<StringRef>& columns);
00228
00232 void ParseEntityPoly(const std::vector<StringRef>& columns);
00233
00237 void ParseCitation(const std::vector<StringRef>& columns);
00238
00239 const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
00248 String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib);
00252 void ParseCitationAuthor(const std::vector<StringRef>& columns);
00253
00255 void ParseStructRef(const std::vector<StringRef>& columns);
00256
00258 void ParseStructRefSeq(const std::vector<StringRef>& columns);
00259
00261 void ParseStructRefSeqDif(const std::vector<StringRef>& columns);
00265 void ParseExptl(const std::vector<StringRef>& columns);
00266
00270 void ParseRefine(const std::vector<StringRef>& columns);
00271
00275 void ParsePdbxStructAssembly(const std::vector<StringRef>& columns);
00276
00280 void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns);
00281
00282 std::vector<std::vector<String> > UnPackOperExperession(StringRef expression);
00283
00284 void StoreExpression(const char* l, const char* s,
00285 bool& is_range, int lborder,
00286 std::vector<String>& single_block);
00287
00288 void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder,
00289 std::vector<String>& single_block);
00290
00294 void ParsePdbxStructOperList(const std::vector<StringRef>& columns);
00295
00299 void ParseDatabasePDBRev(const std::vector<StringRef>& columns);
00300
00304 void ParsePdbxAuditRevisionHistory(const std::vector<StringRef>& columns);
00305
00309 void ParsePdbxAuditRevisionDetails(const std::vector<StringRef>& columns);
00310
00314 void ParsePdbxDatabaseStatus(const std::vector<StringRef>& columns);
00315
00319 void ParseStruct(const std::vector<StringRef>& columns);
00320
00324 void ParseStructConf(const std::vector<StringRef>& columns);
00325
00329 void ParseStructSheetRange(const std::vector<StringRef>& columns);
00330
00334 void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns);
00335
00337 typedef enum {
00338 MMCIF_HELIX,
00339 MMCIF_STRAND,
00340 MMCIF_TURN
00341 } MMCifSecStructElement;
00342
00346 MMCifSecStructElement DetermineSecStructType(const StringRef& type) const;
00347
00351 void AssignSecStructure(mol::EntityHandle ent);
00352
00353 private:
00355 typedef enum {
00356 PDBID_LEN=4,
00357 MAX_ITEMS_IN_ROW=18,
00358 } MMCifMagicNos;
00359
00361 typedef enum {
00362 AUTH_ASYM_ID,
00363 AS_ID,
00364 LABEL_ALT_ID,
00365 LABEL_ASYM_ID,
00366 LABEL_ATOM_ID,
00367 LABEL_COMP_ID,
00368 LABEL_ENTITY_ID,
00369 LABEL_SEQ_ID,
00370 AUTH_SEQ_ID,
00371 TYPE_SYMBOL,
00372 CARTN_X,
00373 CARTN_Y,
00374 CARTN_Z,
00375 OCCUPANCY,
00376 B_ISO_OR_EQUIV,
00377 PDBX_PDB_INS_CODE,
00378 GROUP_PDB,
00379 PDBX_PDB_MODEL_NUM
00380 } AtomSiteItems;
00381
00383 typedef enum {
00384 E_ID,
00385 E_TYPE,
00386 PDBX_DESCRIPTION
00387 } EntityItems;
00388
00390 typedef enum {
00391 ENTITY_ID,
00392 EP_TYPE,
00393 PDBX_SEQ_ONE_LETTER_CODE,
00394 PDBX_SEQ_ONE_LETTER_CODE_CAN
00395 } EntityPolyItems;
00396
00398 typedef enum {
00399 CITATION_ID,
00400 ABSTRACT_ID_CAS,
00401 BOOK_ID_ISBN,
00402 BOOK_TITLE,
00403 JOURNAL_ABBREV,
00404 JOURNAL_VOLUME,
00405 PAGE_FIRST,
00406 PAGE_LAST,
00407 PDBX_DATABASE_ID_DOI,
00408 PDBX_DATABASE_ID_PUBMED,
00409 YEAR,
00410 TITLE
00411 } CitationItems;
00412
00414 typedef enum {
00415 AUTHOR_CITATION_ID,
00416 AUTHOR_NAME,
00417 ORDINAL
00418 } CitationAuthorItems;
00419
00421 typedef enum {
00422 EXPTL_ENTRY_ID,
00423 METHOD
00424 } ExptlItems;
00425
00427 typedef enum {
00428 REFINE_ENTRY_ID,
00429 LS_D_RES_HIGH,
00430 LS_D_RES_LOW,
00431 LS_R_FACTOR_R_WORK,
00432 LS_R_FACTOR_R_FREE
00433 } RefineItems;
00434
00436 typedef enum {
00437 PSA_DETAILS,
00438 PSA_ID,
00439 METHOD_DETAILS
00440 } PdbxStructAssemblyItems;
00441
00442
00443 typedef enum {
00444 SR_ENTITY_ID,
00445 SR_ID,
00446 SR_DB_CODE,
00447 SR_DB_NAME,
00448 SR_DB_ACCESS
00449 } StructRefItems;
00450
00452 typedef enum {
00453 SRS_ALIGN_ID,
00454 SRS_STRUCT_REF_ID,
00455 SRS_PDBX_STRAND_ID,
00456 SRS_DB_ALIGN_BEG,
00457 SRS_DB_ALIGN_END,
00458 SRS_ENT_ALIGN_BEG,
00459 SRS_ENT_ALIGN_END
00460 } StructRefSeqItems;
00461
00463 typedef enum {
00464 SRSD_ALIGN_ID,
00465 SRSD_SEQ_RNUM,
00466 SRSD_DB_RNUM,
00467 SRSD_DETAILS
00468 } StructRefSeqDifItems;
00469
00471 typedef enum {
00472 ASSEMBLY_ID,
00473 ASYM_ID_LIST,
00474 OPER_EXPRESSION
00475 } PdbxStructAssemblyGenItems;
00476
00478 typedef enum {
00479 PSOL_ID,
00480 PSOL_TYPE,
00481 VECTOR_1,
00482 VECTOR_2,
00483 VECTOR_3,
00484 MATRIX_1_1,
00485 MATRIX_1_2,
00486 MATRIX_1_3,
00487 MATRIX_2_1,
00488 MATRIX_2_2,
00489 MATRIX_2_3,
00490 MATRIX_3_1,
00491 MATRIX_3_2,
00492 MATRIX_3_3
00493 } PdbxStructOperListItems;
00494
00496 typedef enum {
00497 STRUCT_ENTRY_ID,
00498 PDBX_CASP_FLAG,
00499 PDBX_DESCRIPTOR,
00500 PDBX_FORMULA_WEIGHT,
00501 PDBX_FORMULA_WEIGHT_METHOD,
00502 PDBX_MODEL_DETAILS,
00503 PDBX_MODEL_TYPE_DETAILS,
00504 STRUCT_TITLE
00505 } StructItems;
00506
00508 typedef enum {
00509 SC_BEG_AUTH_ASYM_ID,
00510 SC_BEG_LABEL_ASYM_ID,
00511 SC_BEG_LABEL_COMP_ID,
00512 SC_BEG_LABEL_SEQ_ID,
00513 SC_CONF_TYPE_ID,
00514 SC_END_AUTH_ASYM_ID,
00515 SC_END_LABEL_ASYM_ID,
00516 SC_END_LABEL_COMP_ID,
00517 SC_END_LABEL_SEQ_ID,
00518 SC_ID,
00519 } StructConfItems;
00520
00522 typedef enum {
00523 SSR_BEG_LABEL_ASYM_ID,
00524 SSR_BEG_LABEL_COMP_ID,
00525 SSR_BEG_LABEL_SEQ_ID,
00526 SSR_END_LABEL_ASYM_ID,
00527 SSR_END_LABEL_COMP_ID,
00528 SSR_END_LABEL_SEQ_ID,
00529 SSR_SHEET_ID,
00530 SSR_ID,
00531 SSR_BEG_AUTH_ASYM_ID,
00532 SSR_END_AUTH_ASYM_ID,
00533 } StructSheetRangeItems;
00534
00536 typedef enum {
00537 DATE,
00538 PDPOS_ID,
00539 PDB_ID,
00540 REPLACE_PDB_ID,
00541 } PdbxDatabasePDBObsSpr;
00542
00544 typedef enum {
00545 DPI_NUM,
00546 DPI_DATE,
00547 DPI_DATE_ORIGINAL,
00548 DPI_STATUS,
00549 } DatabasePDBRevItems;
00550
00552 typedef enum {
00553 PARH_ORDINAL,
00554 PARH_REVISION_DATE,
00555 } PdbxAuditRevisionHistoryItems;
00556
00558 typedef enum {
00559 PARD_REVISION_ORDINAL,
00560 PARD_TYPE,
00561 } PdbxAuditRevisionDetailsItems;
00562
00564 typedef enum {
00565 PDS_RECVD_INITIAL_DEPOSITION_DATE,
00566 } PdbxDatabaseStatusItems;
00567
00569 typedef enum {
00570 ATOM_SITE,
00571 ENTITY,
00572 ENTITY_POLY,
00573 CITATION,
00574 CITATION_AUTHOR,
00575 EXPTL,
00576 REFINE,
00577 PDBX_STRUCT_ASSEMBLY,
00578 PDBX_STRUCT_ASSEMBLY_GEN,
00579 PDBX_STRUCT_OPER_LIST,
00580 STRUCT,
00581 STRUCT_CONF,
00582 STRUCT_SHEET_RANGE,
00583 PDBX_DATABASE_PDB_OBS_SPR,
00584 STRUCT_REF,
00585 STRUCT_REF_SEQ,
00586 STRUCT_REF_SEQ_DIF,
00587 DATABASE_PDB_REV,
00588 PDBX_AUDIT_REVISION_HISTORY,
00589 PDBX_AUDIT_REVISION_DETAILS,
00590 PDBX_DATABASE_STATUS,
00591 DONT_KNOW
00592 } MMCifCategory;
00593
00595 typedef struct {
00596 mol::ChainType type;
00597 String details;
00598 String seqres;
00599 } MMCifEntityDesc;
00600 typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap;
00601
00603 typedef struct {
00604 String biounit_id;
00605 std::vector<String> chains;
00606
00607 std::vector<std::vector<String> > operations;
00608
00609 } MMCifBioUAssembly;
00610 typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector;
00611
00612 typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > >
00613 MMCifCitationAuthorMap;
00614
00616 typedef struct {
00617 mol::ResNum start;
00618 mol::ResNum end;
00619 String chain_name;
00620 } MMCifHSEntry;
00621 typedef std::vector<MMCifHSEntry> MMCifHSVector;
00622
00624 typedef struct {
00625 String details;
00626 String method_details;
00627 } MMCifPSAEntry;
00628 typedef std::map<String, MMCifPSAEntry> MMCifPSAMap;
00629
00630
00631 MMCifCategory category_;
00632 int category_counts_[DONT_KNOW+1];
00633 int indices_[MAX_ITEMS_IN_ROW];
00634 const IOProfile& profile_;
00635 mol::EntityHandle& ent_handle_;
00636 String restrict_chains_;
00637 bool auth_chain_id_;
00638 bool seqres_can_;
00639 mol::ChainHandle curr_chain_;
00640 mol::ResidueHandle curr_residue_;
00641 int chain_count_;
00642 int residue_count_;
00643 int atom_count_;
00644 bool warned_name_mismatch_;
00645 bool warned_rule_based_;
00646 String subst_res_id_;
00647 bool has_model_;
00648 int curr_model_;
00649 std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_;
00651 MMCifEntityDescMap entity_desc_map_;
00652 seq::SequenceList seqres_;
00653 bool read_seqres_;
00654 MMCifInfo info_;
00655 MMCifCitationAuthorMap authors_map_;
00656 MMCifBioUAssemblyVector bu_assemblies_;
00657 MMCifPSAMap bu_origin_map_;
00658 MMCifHSVector helix_list_;
00659 MMCifHSVector strand_list_;
00660 MMCifInfoStructRefs struct_refs_;
00661
00662 std::map<int, String> revision_dates_;
00663 std::map<int, String> revision_types_;
00664 bool database_PDB_rev_added_;
00665 };
00666
00667 }}
00668
00669 #endif