00001 /** 00002 * @file odb.h 00003 * This file contains various definitions and interface for management 00004 * of in-memory, through mmaped file, growable hash table, that stores 00005 * sample files. 00006 * 00007 * @remark Copyright 2002 OProfile authors 00008 * @remark Read the file COPYING 00009 * 00010 * @author Philippe Elie 00011 */ 00012 00013 #ifndef ODB_HASH_H 00014 #define ODB_HASH_H 00015 00016 #include <stddef.h> 00017 #include <stdint.h> 00018 00019 #include "op_list.h" 00020 00021 /** the type of key. 64-bit because CG needs 32-bit pair {from,to} */ 00022 typedef uint64_t odb_key_t; 00023 /** the type of an information in the database */ 00024 typedef unsigned int odb_value_t; 00025 /** the type of index (node number), list are implemented through index */ 00026 typedef unsigned int odb_index_t; 00027 /** the type store node number */ 00028 typedef odb_index_t odb_node_nr_t; 00029 /** store the hash mask, hash table size are always power of two */ 00030 typedef odb_index_t odb_hash_mask_t; 00031 00032 /* there is (bucket factor * nr node) entry in hash table, this can seem 00033 * excessive but hash coding eip don't give a good distributions and our 00034 * goal is to get a O(1) amortized insert time. bucket factor must be a 00035 * power of two. FIXME: see big comment in odb_hash_add_node, you must 00036 * re-enable zeroing hash table if BUCKET_FACTOR > 2 (roughly exact, you 00037 * want to read the comment in odb_hash_add_node() if you tune this define) 00038 */ 00039 #define BUCKET_FACTOR 1 00040 00041 /** a db hash node */ 00042 typedef struct { 00043 odb_key_t key; /**< eip */ 00044 odb_value_t value; /**< samples count */ 00045 odb_index_t next; /**< next entry for this bucket */ 00046 } odb_node_t; 00047 00048 /** the minimal information which must be stored in the file to reload 00049 * properly the data base, following this header is the node array then 00050 * the hash table (when growing we avoid to copy node array) 00051 */ 00052 typedef struct { 00053 odb_node_nr_t size; /**< in node nr (power of two) */ 00054 odb_node_nr_t current_size; /**< nr used node + 1, node 0 unused */ 00055 int padding[6]; /**< for padding and future use */ 00056 } odb_descr_t; 00057 00058 /** a "database". this is an in memory only description. 00059 * 00060 * We allow to manage a database inside a mapped file with an "header" of 00061 * unknown size so odb_open get a parameter to specify the size of this header. 00062 * A typical use is: 00063 * 00064 * struct header { int etc; ... }; 00065 * odb_open(&hash, filename, ODB_RW, sizeof(header)); 00066 * so on this library have no dependency on the header type. 00067 * 00068 * the internal memory layout from base_memory is: 00069 * the unknown header (sizeof_header) 00070 * odb_descr_t 00071 * the node array: (descr->size * sizeof(odb_node_t) entries 00072 * the hash table: array of odb_index_t indexing the node array 00073 * (descr->size * BUCKET_FACTOR) entries 00074 */ 00075 typedef struct odb_data { 00076 odb_node_t * node_base; /**< base memory area of the page */ 00077 odb_index_t * hash_base; /**< base memory of hash table */ 00078 odb_descr_t * descr; /**< the current state of database */ 00079 odb_hash_mask_t hash_mask; /**< == descr->size - 1 */ 00080 unsigned int sizeof_header; /**< from base_memory to odb header */ 00081 unsigned int offset_node; /**< from base_memory to node array */ 00082 void * base_memory; /**< base memory of the maped memory */ 00083 int fd; /**< mmaped memory file descriptor */ 00084 char * filename; /**< full path name of sample file */ 00085 int ref_count; /**< reference count */ 00086 struct list_head list; /**< hash bucket list */ 00087 } odb_data_t; 00088 00089 typedef struct { 00090 odb_data_t * data; 00091 } odb_t; 00092 00093 #ifdef __cplusplus 00094 extern "C" { 00095 #endif 00096 00097 /* db_manage.c */ 00098 00099 /** how to open the DB file */ 00100 enum odb_rw { 00101 ODB_RDONLY = 0, /**< open for read only */ 00102 ODB_RDWR = 1 /**< open for read and/or write */ 00103 }; 00104 00105 /** 00106 * odb_init - initialize a DB file 00107 * @param odb the DB file to init 00108 */ 00109 void odb_init(odb_t * odb); 00110 00111 /** 00112 * odb_open - open a DB file 00113 * @param odb the data base object to setup 00114 * @param filename the filename where go the maped memory 00115 * @param rw \enum ODB_RW if opening for writing, else \enum ODB_RDONLY 00116 * @param sizeof_header size of the file header if any 00117 * 00118 * The sizeof_header parameter allows the data file to have a header 00119 * at the start of the file which is skipped. 00120 * odb_open() always preallocate a few number of pages. 00121 * returns 0 on success, errno on failure 00122 */ 00123 int odb_open(odb_t * odb, char const * filename, 00124 enum odb_rw rw, size_t sizeof_header); 00125 00126 /** Close the given ODB file */ 00127 void odb_close(odb_t * odb); 00128 00129 /** return the number of times this sample file is open */ 00130 int odb_open_count(odb_t const * odb); 00131 00132 /** return the start of the mapped data */ 00133 void * odb_get_data(odb_t * odb); 00134 00135 /** issue a msync on the used size of the mmaped file */ 00136 void odb_sync(odb_t const * odb); 00137 00138 /** 00139 * grow the hashtable in such way current_size is the index of the first free 00140 * node. Take care all node pointer can be invalidated by this call. 00141 * 00142 * Node allocation is done in a two step way 1st) ensure a free node exist 00143 * eventually, caller can setup it, 2nd) commit the node allocation with 00144 * odb_commit_reservation(). 00145 * This is done in this way to ensure node setup is visible from another 00146 * process like pp tools in an atomic way. 00147 * 00148 * returns 0 on success, non zero on failure in this case this function do 00149 * nothing and errno is set by the first libc call failure allowing to retry 00150 * after cleanup some program resource. 00151 */ 00152 int odb_grow_hashtable(odb_data_t * data); 00153 /** 00154 * commit a previously successfull node reservation. This can't fail. 00155 */ 00156 static __inline void odb_commit_reservation(odb_data_t * data) 00157 { 00158 ++data->descr->current_size; 00159 } 00160 00161 /** "immpossible" node number to indicate an error from odb_hash_add_node() */ 00162 #define ODB_NODE_NR_INVALID ((odb_node_nr_t)-1) 00163 00164 /* db_debug.c */ 00165 /** check that the hash is well built */ 00166 int odb_check_hash(odb_t const * odb); 00167 00168 /* db_stat.c */ 00169 typedef struct odb_hash_stat_t odb_hash_stat_t; 00170 odb_hash_stat_t * odb_hash_stat(odb_t const * odb); 00171 void odb_hash_display_stat(odb_hash_stat_t const * stats); 00172 void odb_hash_free_stat(odb_hash_stat_t * stats); 00173 00174 /* db_insert.c */ 00175 /** update info at key by incrementing its associated value by one, 00176 * if the key does not exist a new node is created and the value associated 00177 * is set to one. 00178 * 00179 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure 00180 */ 00181 int odb_update_node(odb_t * odb, odb_key_t key); 00182 00183 /** 00184 * odb_update_node_with_offset 00185 * @param odb the data base object to setup 00186 * @param key the hash key 00187 * @param offset the offset to be added 00188 * 00189 * update info at key by adding the specified offset to its associated value, 00190 * if the key does not exist a new node is created and the value associated 00191 * is set to offset. 00192 * 00193 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure 00194 */ 00195 int odb_update_node_with_offset(odb_t * odb, 00196 odb_key_t key, 00197 unsigned long int offset); 00198 00199 /** Add a new node w/o regarding if a node with the same key already exists 00200 * 00201 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure 00202 */ 00203 int odb_add_node(odb_t * odb, odb_key_t key, odb_value_t value); 00204 00205 /* db_travel.c */ 00206 /** 00207 * return a base pointer to the node array and number of node in this array 00208 * caller then will iterate through: 00209 * 00210 * odb_node_nr_t node_nr, pos; 00211 * odb_node_t * node = odb_get_iterator(odb, &node_nr); 00212 * for ( pos = 0 ; pos < node_nr ; ++pos) 00213 * // do something 00214 * 00215 * note than caller does not need to filter nil key as it's a valid key, 00216 * The returned range is all valid (i.e. should never contain zero value). 00217 */ 00218 odb_node_t * odb_get_iterator(odb_t const * odb, odb_node_nr_t * nr); 00219 00220 static __inline unsigned int 00221 odb_do_hash(odb_data_t const * data, odb_key_t value) 00222 { 00223 /* FIXME: better hash for eip value, needs to instrument code 00224 * and do a lot of tests ... */ 00225 /* trying to combine high order bits his a no-op: inside a binary image 00226 * high order bits don't vary a lot, hash table start with 7 bits mask 00227 * so this hash coding use bits 0-7, 8-15. Hash table is stored in 00228 * files avoiding to rebuilding them at profiling re-start so 00229 * on changing do_hash() change the file format! 00230 */ 00231 uint32_t temp = (value >> 32) ^ value; 00232 return ((temp << 0) ^ (temp >> 8)) & data->hash_mask; 00233 } 00234 00235 #ifdef __cplusplus 00236 } 00237 #endif 00238 00239 #endif /* !ODB_H */
1.6.1