/* $Id$ */ /******************************************************************** * Copyright (c) 1995 - 2004, EMBL, Peter Keller * * CCIF: a library to access and output data conforming to the * CIF (Crystallographic Information File) and mmCIF (Macromolecular * CIF) specifications, and other related specifications. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * version 3, modified in accordance with the provisions of the * license to address the requirements of UK law. * * You should have received a copy of the modified GNU Lesser General * Public License along with this library. If not, copies may be * downloaded from http://www.ccp4.ac.uk/ccp4license.php * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * ********************************************************************/ /* Dictionary stuff. This was derived originally from TJP's PCCTS symbol table support code, but has been heavily modified for mmCIF information. 'Sym' is a bit of a misnomer, now, since this stuff involves things other than the symbol table, but I'm not going to change it. */ #ifndef ZZSYM_H #define ZZSYM_H /* Need SList typedef */ #include "sorlist.h" #ifdef ESV #define TOLOWER_ADV_PTR(c) (char) (isupper(*c)?tolower(*c++):*c++) #else #define TOLOWER_ADV_PTR(c) tolower(*c++) #endif /* define some hash function, and make it case-independent */ /* This version of TJP's original, makes the hash function * case-independent, and immune to overflow */ #ifndef HASH #define HASH(p, h) while ( *p ) h = (h<<1) ^ TOLOWER_ADV_PTR(p) ; #endif enum item_mandatory_code { undefined = 0, no = 1, yes = 2, implicit = 3 }; /* Use this union to store pointers to other records (those which are derived from * mmcifdic information - head, next and prev don't need this treatment). After id's * are set up, can set id = rec->id + 1 before dumping, and the reverse after undumping. * Use rec->id + 1, so that a NULL pointer can be used as a null value. */ union _sym_ptr { struct _sym *rec; int id; }; /* Important porting note: arrange the members of this structure in order of size, more or less, * with the widest member last. This ensures that member alignment: * * (1) Does not inflate the size more than necessary * (2) The size of this structure is an exact multiple of the size of the widest member, i.e. * we don't have alignment problems when dereferencing pointers to members, when the * symbol table is memory-mapped. * * A sensible ordering is probably: * short < int <= size_t <= pointer types <= double * * If this doesn't work, it may be necessary to specify alignment explicitly as a compiler option * (e.g. -Zp4 under OSF1). */ typedef struct _sym { int width; /* Minimum field width for item on output. Set at the same * time as fmt below */ int no_in_cat; /* Index of item in category. If record is a category, * this is the number of items which it contains. * Fill this in for aliased items as well - this will * save a step when processing looped data. */ enum item_mandatory_code mandatory_code; int type_code[2]; /* This is an index to the appropriate member of item_type_list. * (Keep item_type_list[0][0] as NULL's, for unknown.) * * First index initially set negative; made positive by routine which * stores default value. So, check ( item->type_code[0] > 0 ) * for existence of valid default value. */ int id; /* struct id used to restore pointers after an undump. * Once table is undumped, this item is used to store * the number of elements which have been allocated to * node, loop_offset and rows. */ int esd; /* If item can contain an esd in ()'s, this is the number of * the subexpression in _item_type_list.construct which * contains the esd. * * ('number' in the Posix sense of P1003.2/D11.2 section B.5.2 * page 788). This means that the first subexpression ('number 0') * corresponds to the entire construct, so '0' can be used as a * null value for this element of Sym. */ unsigned int hash; size_t symbol; struct _sym *next, *prev, **head; /* hash, symbol, next, prev. head are required for * the symbol table routines. symbol is now an * offset into the string table, to allow it to be enlarged * by realloc if necessary */ union _sym_ptr scope; /* This is a change from the original TJP form, to allow them * to be dumped easily. next, prev and head can easily be restored * during the undump, so don't bother changing them */ union _sym_ptr /* Pointers to other records in the symbol table */ mandatory_scope, /* If record is a CIF category, this item is used to indicate * the scope which contains the mandatory items (yes or implicit) * in the category */ key, /* Link in list of key items within a category. This is a circular list, which includes item->category->key.rec */ item_list, /* link in list of all items within a category. The first item * in the list is pointed to by item->category->item_list.rec */ true_name, /* If symbol is an alias, * point this at the record for the true name. */ parent, /* Point this at parent item */ category, associated_esd; /* Record for item which can store value of esd */ AST **node, **last; union { AST **loop; SList **sort_tree; } l; int *loop_offset; /* After setting up symbol table, point these at * the appropriate occurrences of the item/category * in input data block (during the CIF parse): * * Data name records: * node: node containing Data_item * or Loop token as appropriate. * l.sort_tree: SList of sort_tree_index's which * item is involved with in this * block. * last: not used * loop_offset: offset of data name from first * column of loop * * Data category records: * node, last: First-occurring/last-occuring data * structures of category in data block * l.loop: Loop node for looped data in this category * (can only be one per data block), otherwise NULL. * loop_offset: If one or more data names that do not appear in * the dictionary are part of a category that does * appear in the dictionary (as determined by the part * of the data name before '.'), loop_offset[block] * is set to an offset into the temporary string table * where the first such item name in the category is * held. See also ccif_add_value and ccif_new_context. * * * As data blocks are processed, more memory is allocated * for these pointers (this is done in ccif_add_value). * The id element is used to keep track of how * many elements these pointers have. */ char *fmt; /* Items: Set by applications to the C sprintf string used to format * output of data */ union _defval { size_t dtext; /* for all character/string types */ double dfloat; int dint; } defval ; } Sym, *SymPtr; /* This is a structure to keep the contents of the ITEM_TYPE_LIST category. code, primitive_code and construct are all offsets in the string table. */ typedef struct _item_type_list { int single_line, /* 0 for types which may span lines, non-0 for others */ block, /* Block of dictionary in which category row is found - used for scoping */ basic_type; /* 0 for undefined (perhaps because of problems with construct ?), 1 for text/char, 2 for int (i.e. converted by atoi), 3 for float (i.e. converted by atof). Further types may be added by the fortran interface routines, to indicate data type to f_interface routine which should be used to retrieve this type of data */ size_t code, primitive_code, construct; regex_t *preg; } ItemTypeList ; /* This is a structure to keep the contents of the DICTIONARY category */ typedef struct _dictionary { int block; /* Block of dictionary in which category row is found - used for scoping */ size_t title, version, datablock_id; /* Offsets into string table */ } Dictionary; #if defined __STDC__ || defined (_AIX) void zzs_init(int, int); void zzs_done(void); void zzs_add(char *, Sym *); Sym *zzs_get(const char * const); void zzs_del(Sym *); void zzs_keydel(char *); Sym **zzs_scope(Sym **); void zzs_init_scope ( register Sym **scope , const int block); void zzs_add_to_scope (Sym *); Sym *zzs_rmscope(Sym **); void zzs_stat(FILE *symdump); Sym *zzs_new(char *); Sym *zzs_newadd(char *); Sym *zzs_tmpadd(char *); size_t zzs_strdup(char *); size_t zzs_tmpstrdup(char *); size_t zzs_strcdup(char *); int zzs_keycmp(const char *, const char *); int zzs_keyncmp(const char *, const char *, const size_t); char * zzs_symname( const size_t offset); char * zzs_tmpsymname( const size_t offset); char *zzs_recname (const Sym * const rec ); ItemTypeList * zzs_get_item_type ( const Sym * const ); void zzs_noscope(void); void zzs_dump( char *filename, FILE *symdump); void zzs_undump( const char * const, const char * const, const int); void zzs_put_string_offset(size_t off); void zzs_expand_item_record(Sym * item, const int blocks); Sym * zzs_start_of_bucket(const int i); int zzs_hsize(void); #else void zzs_init(); void zzs_done(); void zzs_add(); Sym *zzs_get(); void zzs_del(); void zzs_keydel(); Sym **zzs_scope(); void zzs_init_scope ( ); void zzs_add_to_scope ( ); Sym *zzs_rmscope(); void zzs_stat(); Sym *zzs_new(); Sym *zzs_newadd(); Sym *zzs_tmpadd(); size_t zzs_strdup(); size_t zzs_tmpstrdup(); size_t zzs_strcdup(); int zzs_keycmp(); int zzs_keyncmp(); char * zzs_symname(); char * zzs_tmpsymname(); char *zzs_recname (); ItemTypeList * zzs_get_item_type ( ); void zzs_noscope(); void zzs_dump( ); void zzs_undump( ); void zzs_put_string_offset(); void zzs_expand_item_record(); Sym * zzs_start_of_bucket(); int zzs_hsize(); #endif #endif /* #ifndef ZZSYM_H */