hashmap: add API to disable item counting when threaded

This is to address concerns raised by ThreadSanitizer on the mailing list
about threaded unprotected R/W access to map.size with my previous "disallow
rehash" change (0607e10009).

See:
https://public-inbox.org/git/adb37b70139fd1e2bac18bfd22c8b96683ae18eb.1502780344.git.martin.agren@gmail.com/

Add API to hashmap to disable item counting and thus automatic rehashing.
Also include API to later re-enable them.

When item counting is disabled, the map.size field is invalid.  So to
prevent accidents, the field has been renamed and an accessor function
hashmap_get_size() has been added.  All direct references to this
field have been been updated.  And the name of the field changed
to map.private_size to communicate this.

Here is the relevant output from ThreadSanitizer showing the problem:

WARNING: ThreadSanitizer: data race (pid=10554)
  Read of size 4 at 0x00000082d488 by thread T2 (mutexes: write M16):
    #0 hashmap_add hashmap.c:209
    #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
    #2 handle_range_dir name-hash.c:347
    #3 handle_range_1 name-hash.c:415
    #4 lazy_dir_thread_proc name-hash.c:471
    #5 <null> <null>

  Previous write of size 4 at 0x00000082d488 by thread T1 (mutexes: write M31):
    #0 hashmap_add hashmap.c:209
    #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
    #2 handle_range_dir name-hash.c:347
    #3 handle_range_1 name-hash.c:415
    #4 handle_range_dir name-hash.c:380
    #5 handle_range_1 name-hash.c:415
    #6 lazy_dir_thread_proc name-hash.c:471
    #7 <null> <null>

Martin gives instructions for running TSan on test t3008 in this post:
https://public-inbox.org/git/CAN0heSoJDL9pWELD6ciLTmWf-a=oyxe4EXXOmCKvsG5MSuzxsA@mail.gmail.com/

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff Hostetler
2017-09-06 15:43:48 +00:00
committed by Junio C Hamano
parent 238e487ea9
commit 8b604d1951
6 changed files with 88 additions and 40 deletions

View File

@ -183,7 +183,7 @@ struct hashmap {
const void *cmpfn_data;
/* total number of entries (0 means the hashmap is empty) */
unsigned int size;
unsigned int private_size; /* use hashmap_get_size() */
/*
* tablesize is the allocated size of the hash table. A non-0 value
@ -196,8 +196,7 @@ struct hashmap {
unsigned int grow_at;
unsigned int shrink_at;
/* See `hashmap_disallow_rehash`. */
unsigned disallow_rehash : 1;
unsigned int do_count_items : 1;
};
/* hashmap functions */
@ -252,6 +251,18 @@ static inline void hashmap_entry_init(void *entry, unsigned int hash)
e->next = NULL;
}
/*
* Return the number of items in the map.
*/
static inline unsigned int hashmap_get_size(struct hashmap *map)
{
if (map->do_count_items)
return map->private_size;
BUG("hashmap_get_size: size not set");
return 0;
}
/*
* Returns the hashmap entry for the specified key, or NULL if not found.
*
@ -344,24 +355,6 @@ extern void *hashmap_remove(struct hashmap *map, const void *key,
*/
int hashmap_bucket(const struct hashmap *map, unsigned int hash);
/*
* Disallow/allow rehashing of the hashmap.
* This is useful if the caller knows that the hashmap needs multi-threaded
* access. The caller is still required to guard/lock searches and inserts
* in a manner appropriate to their usage. This simply prevents the table
* from being unexpectedly re-mapped.
*
* It is up to the caller to ensure that the hashmap is initialized to a
* reasonable size to prevent poor performance.
*
* A call to allow rehashing does not force a rehash; that might happen
* with the next insert or delete.
*/
static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
{
map->disallow_rehash = value;
}
/*
* Used to iterate over all entries of a hashmap. Note that it is
* not safe to add or remove entries to the hashmap while
@ -387,6 +380,43 @@ static inline void *hashmap_iter_first(struct hashmap *map,
return hashmap_iter_next(iter);
}
/*
* Disable item counting and automatic rehashing when adding/removing items.
*
* Normally, the hashmap keeps track of the number of items in the map
* and uses it to dynamically resize it. This (both the counting and
* the resizing) can cause problems when the map is being used by
* threaded callers (because the hashmap code does not know about the
* locking strategy used by the threaded callers and therefore, does
* not know how to protect the "private_size" counter).
*/
static inline void hashmap_disable_item_counting(struct hashmap *map)
{
map->do_count_items = 0;
}
/*
* Re-enable item couting when adding/removing items.
* If counting is currently disabled, it will force count them.
* It WILL NOT automatically rehash them.
*/
static inline void hashmap_enable_item_counting(struct hashmap *map)
{
void *item;
unsigned int n = 0;
struct hashmap_iter iter;
if (map->do_count_items)
return;
hashmap_iter_init(map, &iter);
while ((item = hashmap_iter_next(&iter)))
n++;
map->do_count_items = 1;
map->private_size = n;
}
/* String interning */
/*