00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef OST_MMCIF_READER_HH
00020 #define OST_MMCIF_READER_HH
00021
00022 #include <map>
00023
00024 #include <ost/geom/geom.hh>
00025 #include <ost/seq/sequence_list.hh>
00026 #include <ost/mol/residue_handle.hh>
00027 #include <ost/mol/chain_type.hh>
00028 #include <ost/conop/compound_lib.hh>
00029 #include <ost/io/mol/io_profile.hh>
00030 #include <ost/io/io_exception.hh>
00031 #include <ost/io/mol/star_parser.hh>
00032 #include <ost/io/mol/mmcif_info.hh>
00033
00034 namespace ost { namespace io {
00035
00059 class DLLEXPORT_OST_IO MMCifReader : public StarParser {
00060 public:
00066
00067 MMCifReader(std::istream& stream, mol::EntityHandle& ent_handle,
00068 const IOProfile& profile);
00069
00075 MMCifReader(const String& filename, mol::EntityHandle& ent_handle,
00076 const IOProfile& profile);
00077
00080 void Init();
00081
00083 void ClearState();
00084
00088 void SetRestrictChains(const String& restrict_chains);
00089
00095 void SetReadCanonicalSeqRes(bool flag)
00096 {
00097 seqres_can_ = flag;
00098 }
00099
00100 const String& GetRestrictChains() const
00101 {
00102 return restrict_chains_;
00103 }
00104
00109 void SetAuthChainID(bool id)
00110 {
00111 auth_chain_id_ = id;
00112 }
00113
00120 virtual bool OnBeginData(const StringRef& data_name);
00121
00127 virtual bool OnBeginLoop(const StarLoopDesc& header);
00128
00133 virtual void OnDataRow(const StarLoopDesc& header,
00134 const std::vector<StringRef>& columns);
00135
00137 virtual void OnEndData();
00138
00142 seq::SequenceList GetSeqRes() const {
00143 return seqres_;
00144 }
00145
00149 void SetReadSeqRes(bool flag)
00150 {
00151 read_seqres_ = flag;
00152 }
00153
00157 bool GetReadSeqRes() const
00158 {
00159 return read_seqres_;
00160 }
00161
00165 const MMCifInfo& GetInfo() { return info_; }
00166
00167 protected:
00174 void TryStoreIdx(const int mapping,
00175 const String& item,
00176 const StarLoopDesc& header)
00177 {
00178 indices_[mapping] = header.GetIndex(item);
00179
00180 if (indices_[mapping] == -1) {
00181 throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
00182 "No item '" + item +
00183 "' found in '" +
00184 header.GetCategory()+
00185 "' header",
00186 this->GetCurrentLinenum()));
00187 }
00188 }
00189
00195 bool IsValidPDBIdent(const StringRef& pdbid);
00196
00210 bool ParseAtomIdent(const std::vector<StringRef>& columns,
00211 String& auth_chain_name,
00212 String& cif_chain_name,
00213 StringRef& res_name,
00214 mol::ResNum& resnum,
00215 bool& valid_res_num,
00216 StringRef& atom_name,
00217 char& alt_loc);
00218
00222 void ParseAndAddAtom(const std::vector<StringRef>& columns);
00223
00227 void ParseEntity(const std::vector<StringRef>& columns);
00228
00232 void ParseEntityPoly(const std::vector<StringRef>& columns);
00233
00237 void ParseCitation(const std::vector<StringRef>& columns);
00238
00239 const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
00248 String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib);
00252 void ParseCitationAuthor(const std::vector<StringRef>& columns);
00253
00255 void ParseStructRef(const std::vector<StringRef>& columns);
00256
00258 void ParseStructRefSeq(const std::vector<StringRef>& columns);
00259
00261 void ParseStructRefSeqDif(const std::vector<StringRef>& columns);
00265 void ParseExptl(const std::vector<StringRef>& columns);
00266
00270 void ParseRefine(const std::vector<StringRef>& columns);
00271
00275 void ParsePdbxStructAssembly(const std::vector<StringRef>& columns);
00276
00280 void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns);
00281
00282 std::vector<std::vector<String> > UnPackOperExperession(StringRef expression);
00283
00284 void StoreExpression(const char* l, const char* s,
00285 bool& is_range, int lborder,
00286 std::vector<String>& single_block);
00287
00288 void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder,
00289 std::vector<String>& single_block);
00290
00294 void ParsePdbxStructOperList(const std::vector<StringRef>& columns);
00295
00299 void ParseDatabasePDBRev(const std::vector<StringRef>& columns);
00300
00304 void ParseStruct(const std::vector<StringRef>& columns);
00305
00309 void ParseStructConf(const std::vector<StringRef>& columns);
00310
00314 void ParseStructSheetRange(const std::vector<StringRef>& columns);
00315
00319 void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns);
00320
00322 typedef enum {
00323 MMCIF_HELIX,
00324 MMCIF_STRAND,
00325 MMCIF_TURN
00326 } MMCifSecStructElement;
00327
00331 MMCifSecStructElement DetermineSecStructType(const StringRef& type) const;
00332
00336 void AssignSecStructure(mol::EntityHandle ent);
00337
00338 private:
00340 typedef enum {
00341 PDBID_LEN=4,
00342 MAX_ITEMS_IN_ROW=18,
00343 } MMCifMagicNos;
00344
00346 typedef enum {
00347 AUTH_ASYM_ID,
00348 AS_ID,
00349 LABEL_ALT_ID,
00350 LABEL_ASYM_ID,
00351 LABEL_ATOM_ID,
00352 LABEL_COMP_ID,
00353 LABEL_ENTITY_ID,
00354 LABEL_SEQ_ID,
00355 AUTH_SEQ_ID,
00356 TYPE_SYMBOL,
00357 CARTN_X,
00358 CARTN_Y,
00359 CARTN_Z,
00360 OCCUPANCY,
00361 B_ISO_OR_EQUIV,
00362 PDBX_PDB_INS_CODE,
00363 GROUP_PDB,
00364 PDBX_PDB_MODEL_NUM
00365 } AtomSiteItems;
00366
00368 typedef enum {
00369 E_ID,
00370 E_TYPE,
00371 PDBX_DESCRIPTION
00372 } EntityItems;
00373
00375 typedef enum {
00376 ENTITY_ID,
00377 EP_TYPE,
00378 PDBX_SEQ_ONE_LETTER_CODE,
00379 PDBX_SEQ_ONE_LETTER_CODE_CAN
00380 } EntityPolyItems;
00381
00383 typedef enum {
00384 CITATION_ID,
00385 ABSTRACT_ID_CAS,
00386 BOOK_ID_ISBN,
00387 BOOK_TITLE,
00388 JOURNAL_ABBREV,
00389 JOURNAL_VOLUME,
00390 PAGE_FIRST,
00391 PAGE_LAST,
00392 PDBX_DATABASE_ID_DOI,
00393 PDBX_DATABASE_ID_PUBMED,
00394 YEAR,
00395 TITLE
00396 } CitationItems;
00397
00399 typedef enum {
00400 AUTHOR_CITATION_ID,
00401 AUTHOR_NAME,
00402 ORDINAL
00403 } CitationAuthorItems;
00404
00406 typedef enum {
00407 EXPTL_ENTRY_ID,
00408 METHOD
00409 } ExptlItems;
00410
00412 typedef enum {
00413 REFINE_ENTRY_ID,
00414 LS_D_RES_HIGH,
00415 LS_D_RES_LOW
00416 } RefineItems;
00417
00419 typedef enum {
00420 PSA_DETAILS,
00421 PSA_ID,
00422 METHOD_DETAILS
00423 } PdbxStructAssemblyItems;
00424
00425
00426 typedef enum {
00427 SR_ENTITY_ID,
00428 SR_ID,
00429 SR_DB_CODE,
00430 SR_DB_NAME,
00431 SR_DB_ACCESS
00432 } StructRefItems;
00433
00435 typedef enum {
00436 SRS_ALIGN_ID,
00437 SRS_STRUCT_REF_ID,
00438 SRS_PDBX_STRAND_ID,
00439 SRS_DB_ALIGN_BEG,
00440 SRS_DB_ALIGN_END,
00441 SRS_ENT_ALIGN_BEG,
00442 SRS_ENT_ALIGN_END
00443 } StructRefSeqItems;
00444
00446 typedef enum {
00447 SRSD_ALIGN_ID,
00448 SRSD_SEQ_RNUM,
00449 SRSD_DB_RNUM,
00450 SRSD_DETAILS
00451 } StructRefSeqDifItems;
00452
00454 typedef enum {
00455 ASSEMBLY_ID,
00456 ASYM_ID_LIST,
00457 OPER_EXPRESSION
00458 } PdbxStructAssemblyGenItems;
00459
00461 typedef enum {
00462 PSOL_ID,
00463 PSOL_TYPE,
00464 VECTOR_1,
00465 VECTOR_2,
00466 VECTOR_3,
00467 MATRIX_1_1,
00468 MATRIX_1_2,
00469 MATRIX_1_3,
00470 MATRIX_2_1,
00471 MATRIX_2_2,
00472 MATRIX_2_3,
00473 MATRIX_3_1,
00474 MATRIX_3_2,
00475 MATRIX_3_3
00476 } PdbxStructOperListItems;
00477
00479 typedef enum {
00480 STRUCT_ENTRY_ID,
00481 PDBX_CASP_FLAG,
00482 PDBX_DESCRIPTOR,
00483 PDBX_FORMULA_WEIGHT,
00484 PDBX_FORMULA_WEIGHT_METHOD,
00485 PDBX_MODEL_DETAILS,
00486 PDBX_MODEL_TYPE_DETAILS,
00487 STRUCT_TITLE
00488 } StructItems;
00489
00491 typedef enum {
00492 SC_BEG_AUTH_ASYM_ID,
00493 SC_BEG_LABEL_ASYM_ID,
00494 SC_BEG_LABEL_COMP_ID,
00495 SC_BEG_LABEL_SEQ_ID,
00496 SC_CONF_TYPE_ID,
00497 SC_END_AUTH_ASYM_ID,
00498 SC_END_LABEL_ASYM_ID,
00499 SC_END_LABEL_COMP_ID,
00500 SC_END_LABEL_SEQ_ID,
00501 SC_ID,
00502 } StructConfItems;
00503
00505 typedef enum {
00506 SSR_BEG_LABEL_ASYM_ID,
00507 SSR_BEG_LABEL_COMP_ID,
00508 SSR_BEG_LABEL_SEQ_ID,
00509 SSR_END_LABEL_ASYM_ID,
00510 SSR_END_LABEL_COMP_ID,
00511 SSR_END_LABEL_SEQ_ID,
00512 SSR_SHEET_ID,
00513 SSR_ID,
00514 SSR_BEG_AUTH_ASYM_ID,
00515 SSR_END_AUTH_ASYM_ID,
00516 } StructSheetRangeItems;
00517
00519 typedef enum {
00520 DATE,
00521 PDPOS_ID,
00522 PDB_ID,
00523 REPLACE_PDB_ID,
00524 } PdbxDatabasePDBObsSpr;
00525
00527 typedef enum {
00528 DPI_NUM,
00529 DPI_DATE,
00530 DPI_DATE_ORIGINAL,
00531 DPI_STATUS,
00532 } DatabasePDBRevItems;
00533
00535 typedef enum {
00536 ATOM_SITE,
00537 ENTITY,
00538 ENTITY_POLY,
00539 CITATION,
00540 CITATION_AUTHOR,
00541 EXPTL,
00542 REFINE,
00543 PDBX_STRUCT_ASSEMBLY,
00544 PDBX_STRUCT_ASSEMBLY_GEN,
00545 PDBX_STRUCT_OPER_LIST,
00546 STRUCT,
00547 STRUCT_CONF,
00548 STRUCT_SHEET_RANGE,
00549 PDBX_DATABASE_PDB_OBS_SPR,
00550 STRUCT_REF,
00551 STRUCT_REF_SEQ,
00552 STRUCT_REF_SEQ_DIF,
00553 DATABASE_PDB_REV,
00554 DONT_KNOW
00555 } MMCifCategory;
00556
00558 typedef struct {
00559 mol::ChainType type;
00560 String details;
00561 String seqres;
00562 } MMCifEntityDesc;
00563 typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap;
00564
00566 typedef struct {
00567 String biounit_id;
00568 std::vector<String> chains;
00569
00570 std::vector<std::vector<String> > operations;
00571
00572 } MMCifBioUAssembly;
00573 typedef std::vector<MMCifBioUAssembly> MMCifBioUAssemblyVector;
00574
00575 typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > >
00576 MMCifCitationAuthorMap;
00577
00579 typedef struct {
00580 mol::ResNum start;
00581 mol::ResNum end;
00582 String chain_name;
00583 } MMCifHSEntry;
00584 typedef std::vector<MMCifHSEntry> MMCifHSVector;
00585
00587 typedef struct {
00588 String details;
00589 String method_details;
00590 } MMCifPSAEntry;
00591 typedef std::map<String, MMCifPSAEntry> MMCifPSAMap;
00592
00593
00594 MMCifCategory category_;
00595 int category_counts_[DONT_KNOW+1];
00596 int indices_[MAX_ITEMS_IN_ROW];
00597 const IOProfile& profile_;
00598 mol::EntityHandle& ent_handle_;
00599 String restrict_chains_;
00600 bool auth_chain_id_;
00601 bool seqres_can_;
00602 mol::ChainHandle curr_chain_;
00603 mol::ResidueHandle curr_residue_;
00604 int chain_count_;
00605 int residue_count_;
00606 int atom_count_;
00607 bool warned_name_mismatch_;
00608 bool warned_rule_based_;
00609 String subst_res_id_;
00610 bool has_model_;
00611 int curr_model_;
00612 std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_;
00614 MMCifEntityDescMap entity_desc_map_;
00615 seq::SequenceList seqres_;
00616 bool read_seqres_;
00617 MMCifInfo info_;
00618 MMCifCitationAuthorMap authors_map_;
00619 MMCifBioUAssemblyVector bu_assemblies_;
00620 MMCifPSAMap bu_origin_map_;
00621 MMCifHSVector helix_list_;
00622 MMCifHSVector strand_list_;
00623 MMCifInfoStructRefs struct_refs_;
00624 };
00625
00626 }}
00627
00628 #endif