DPDK20.05 hash表1 – rte_hash_create 与 rte_hash_free
1、数据结构
rte_hash_create() 函数用来创建一个hash表,涉及到如下数据结构。
1.1 struct rte_hash_parameters
1 2 3 4 5 6 7 8 9 10 | struct rte_hash_parameters { const char *name; /**< hash表的名字 */ uint32_t entries; /**< hash 表的 entries总数 */ uint32_t reserved; /**< 暂未使用,应设置为0 */ uint32_t key_len; /**< hash key的长度(字节) */ rte_hash_function hash_func; /**< 用来计算hash的主要哈希函数 */ uint32_t hash_func_init_val; /**< hash函数使用的初始值 */ int socket_id; /**< 内存所在的 socket id */ uint8_t extra_flag; /**< 指定是否需要额外的参数 */ }; |
1.2 struct lcore_cache
1 2 3 4 | struct lcore_cache { unsigned len; /**< Cache 长度 */ uint32_t objs[LCORE_CACHE_SIZE]; /**< Cache 对象 */ } __rte_cache_aligned; |
1.3 struct rte_hash_bucket
1 2 3 4 5 6 | struct rte_hash_bucket { uint16_t sig_current[RTE_HASH_BUCKET_ENTRIES]; uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES]; uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; void *next; } __rte_cache_aligned; |
其中 RTE_HASH_BUCKET_ENTRIES 的值定义为8。
1.4 struct rte_hash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | struct rte_hash { char name[RTE_HASH_NAMESIZE]; /**< hash的名字 */ uint32_t entries; /**< Total table entries. */ uint32_t num_buckets; /**< hash表中buckets的数量 */ struct rte_ring *free_slots; /**< 用来保存key表中空闲slots的所有索引的ring */ struct lcore_cache *local_free_slots; /**< 每个lcore的本地cache,保存空闲slots的一些索引 */ /* 函数中使用的成员变量 */ uint32_t key_len __rte_cache_aligned; /**< hash key 的长度 */ uint8_t hw_trans_mem_support; /**< 是否启用了 hardware transactional memory */ uint8_t use_local_cache; /**< 如果启用了多写者支持,则使用本地cache来分配 key-store slots */ uint8_t readwrite_concur_support; /**< 是否启用了读者-写者并发 */ uint8_t ext_table_support; /**< 启用可扩展 bucket表 */ uint8_t no_free_on_del; /**< 在调用rte_hash_del_xxx函数时是否释放key 索引。 * 如果设置了该值,要释放关联到被删除entry的key index, * 必须调用rte_hash_free_key_with_position() 函数。 * 默认会启用该值。 */ uint8_t readwrite_concur_lf_support; /**< 是否启用无锁的读者-写者并发支持 */ uint8_t writer_takes_lock; /**< 指定写者线程是否需要加锁 */ rte_hash_function hash_func; /**< 用来计算hash值的函数 */ uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; /**< 用来比较 keys 的自定义函数 */ enum cmp_jump_table_case cmp_jump_table_idx; /**< 指定使用哪个比较函数 */ enum rte_hash_sig_compare_function sig_cmp_fn; /**< 指定使用哪个 signature 比较函数 */ uint32_t bucket_bitmask; /**< 从hash signature得到bucket索引时使用的Bitmask */ uint32_t key_entry_size; /**< 每个 key entry 的大小 */ void *key_store; /**< 指向用来保存所有keys和数据的内存空间 */ struct rte_hash_bucket *buckets; /**< 指向保存所有hash值和key索引到key table */ rte_rwlock_t *readwrite_lock; /**< 读者-写者锁线程安全 */ struct rte_hash_bucket *buckets_ext; /**< 指向扩展bucket 数组所在的内存 */ struct rte_ring *free_ext_bkts; /**< 空闲 buckets 的索引的ring */ /* 存储空的扩展bkt的索引,会在调用rte_hash_del_xxx后回收。 * 当无锁读者-写者并发启用时,空的扩展bkt不能直接被插入到空闲列表中 *(因为可能仍有读者在使用它)。因此对扩展bkt的释放就相当于对key索引的释放 */ uint32_t *ext_bkt_to_free; uint32_t *tbl_chng_cnt; /**< 指出上次读之后hash表是否已经被修改 */ } __rte_cache_aligned; |
DPDK的hash表采用了Cuckoo 哈希算法,这种hash 表中有两个表来解决hash冲突,一个主hash表一个次hash表。插入key的时候先去主hash表中查找,如果有空间就插入,如果没有再去次hash表中查找空闲位置。相关原理可以参考这篇文章:
https://coolshell.cn/articles/17225.html
2、rte_hash_create() 函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | struct rte_hash *rte_hash_create(const struct rte_hash_parameters *params) { struct rte_hash *h = NULL; struct rte_tailq_entry *te = NULL; struct rte_hash_list *hash_list; struct rte_ring *r = NULL; struct rte_ring *r_ext = NULL; char hash_name[RTE_HASH_NAMESIZE]; void *k = NULL; void *buckets = NULL; void *buckets_ext = NULL; char ring_name[RTE_RING_NAMESIZE]; char ext_ring_name[RTE_RING_NAMESIZE]; unsigned num_key_slots; unsigned int hw_trans_mem_support = 0, use_local_cache = 0; unsigned int ext_table_support = 0; unsigned int readwrite_concur_support = 0; unsigned int writer_takes_lock = 0; unsigned int no_free_on_del = 0; uint32_t *ext_bkt_to_free = NULL; uint32_t *tbl_chng_cnt = NULL; unsigned int readwrite_concur_lf_support = 0; uint32_t i; rte_hash_function default_hash_func = (rte_hash_function)rte_jhash; hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); if (params == NULL) { RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n"); return NULL; } /* hash entries 的数量不能大于 1<<30,不能小于 8个,key的长度不能为0 */ if ((params->entries > RTE_HASH_ENTRIES_MAX) || (params->entries < RTE_HASH_BUCKET_ENTRIES) || (params->key_len == 0)) { rte_errno = EINVAL; RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n"); return NULL; } /* 如果设置了读者写者并发,同时又设置了支持无锁的读写并发。这两个不能同时设置*/ if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) && (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) { rte_errno = EINVAL; RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or " "rw concurrency lock free\n"); return NULL; } /* 如果启用了 Hardware transactional memory 支持 */ /* 如果设置了该flag且硬件支持,读者写者锁会使用hardware transactional memory *(例如[email protected])来保证线程安全。如果平台支持[email protected],建议设置该flag, * 可以使对表的并发操作加速。否则,由于软件锁机制本身的消耗,其并发会变慢。 */ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT) hw_trans_mem_support = 1; /* 插入的默认行为,支持单写者和多写者 */ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) { use_local_cache = 1; writer_takes_lock = 1; } /* 设置了支持读者写者并发 */ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) { readwrite_concur_support = 1; writer_takes_lock = 1; } /* 支持额外的bucket table */ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE) ext_table_support = 1; /* 在删除hash表项时不会释放其内存 */ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL) no_free_on_del = 1; /* 支持无锁的读者写者并发,支持单写者与多写者 */ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) { readwrite_concur_lf_support = 1; /* 在删除 hash 项时不会释放内部内存或索引 */ no_free_on_del = 1; } /* 保存所有关键字,将第一个留空,给lookup_bulk 使用 */ if (use_local_cache) /* 根据可以保存在本地cache的索引的总数,增加slots的数量,排除第一个cache */ num_key_slots = params->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1; else /* 由于h->key_store 数组中需要留空一个,因此这里需要补一个 */ num_key_slots = params->entries + 1; snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); /* Create ring (Dummy slot index is not enqueued) */ r = rte_ring_create_elem(ring_name, sizeof(uint32_t), rte_align32pow2(num_key_slots), params->socket_id, 0); if (r == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); goto err; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | /* 每个 bucket 中不超过 8 个元素 */ const uint32_t num_buckets = rte_align32pow2(params->entries) / RTE_HASH_BUCKET_ENTRIES; /* 为可扩展buckets 创建 ring */ if (ext_table_support) { snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s", params->name); r_ext = rte_ring_create_elem(ext_ring_name, sizeof(uint32_t), rte_align32pow2(num_buckets + 1), params->socket_id, 0); if (r_ext == NULL) { RTE_LOG(ERR, HASH, "ext buckets memory allocation " "failed\n"); goto err; } } snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name); rte_mcfg_tailq_write_lock(); /* 确保不存在,通常在前面ring创建时会检查 */ TAILQ_FOREACH(te, hash_list, next) { h = (struct rte_hash *) te->data; if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0) break; } h = NULL; if (te != NULL) { /* 已经存在 */ rte_errno = EEXIST; te = NULL; goto err_unlock; } te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0); if (te == NULL) { RTE_LOG(ERR, HASH, "tailq entry allocation failed\n"); goto err_unlock; } /* 创建 rte_hash 结构 */ h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash), RTE_CACHE_LINE_SIZE, params->socket_id); if (h == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); goto err_unlock; } /* 创建 num_buckets 个 bucket */ buckets = rte_zmalloc_socket(NULL, num_buckets * sizeof(struct rte_hash_bucket), RTE_CACHE_LINE_SIZE, params->socket_id); if (buckets == NULL) { RTE_LOG(ERR, HASH, "buckets memory allocation failed\n"); goto err_unlock; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | /* 分配相同数量的可扩展 buckets */ if (ext_table_support) { buckets_ext = rte_zmalloc_socket(NULL, num_buckets * sizeof(struct rte_hash_bucket), RTE_CACHE_LINE_SIZE, params->socket_id); if (buckets_ext == NULL) { RTE_LOG(ERR, HASH, "ext buckets memory allocation " "failed\n"); goto err_unlock; } /* Populate ext bkt ring. We reserve 0 similar to the * key-data slot, just in case in future we want to * use bucket index for the linked list and 0 means NULL * for next bucket */ for (i = 1; i <= num_buckets; i++) rte_ring_sp_enqueue_elem(r_ext, &i, sizeof(uint32_t)); if (readwrite_concur_lf_support) { ext_bkt_to_free = rte_zmalloc(NULL, sizeof(uint32_t) * num_key_slots, 0); if (ext_bkt_to_free == NULL) { RTE_LOG(ERR, HASH, "ext bkt to free memory allocation " "failed\n"); goto err_unlock; } } } /* 计算每个key entry 的大小 */ const uint32_t key_entry_size = RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len, KEY_ALIGNMENT); /* 根据key entry 的大小和数量,计算其占用的空间 */ const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots; /* 为key 分配内存空间 */ k = rte_zmalloc_socket(NULL, key_tbl_size, RTE_CACHE_LINE_SIZE, params->socket_id); if (k == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); goto err_unlock; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | /* 用来表示在上次读取完hash表之后,hash表是否有修改 */ tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t), RTE_CACHE_LINE_SIZE, params->socket_id); if (tbl_chng_cnt == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); goto err_unlock; } /* 如果使用x86架构,选择对应的比较函数,会使用 x86 instrinsics 指令,否则使用 memcmp 函数 */ #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) /* 根据key的长度,选择比较函数的索引 */ switch (params->key_len) { case 16: h->cmp_jump_table_idx = KEY_16_BYTES; break; case 32: h->cmp_jump_table_idx = KEY_32_BYTES; break; case 48: h->cmp_jump_table_idx = KEY_48_BYTES; break; case 64: h->cmp_jump_table_idx = KEY_64_BYTES; break; case 80: h->cmp_jump_table_idx = KEY_80_BYTES; break; case 96: h->cmp_jump_table_idx = KEY_96_BYTES; break; case 112: h->cmp_jump_table_idx = KEY_112_BYTES; break; case 128: h->cmp_jump_table_idx = KEY_128_BYTES; break; default: /* 如果 key 的长度不是16的总数,则使用常规的 memcmp 函数 */ h->cmp_jump_table_idx = KEY_OTHER_BYTES; } #else h->cmp_jump_table_idx = KEY_OTHER_BYTES; #endif if (use_local_cache) { h->local_free_slots = rte_zmalloc_socket(NULL, sizeof(struct lcore_cache) * RTE_MAX_LCORE, RTE_CACHE_LINE_SIZE, params->socket_id); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | /* 设置默认的hash函数 */ #if defined(RTE_ARCH_X86) default_hash_func = (rte_hash_function)rte_hash_crc; #elif defined(RTE_ARCH_ARM64) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) default_hash_func = (rte_hash_function)rte_hash_crc; #endif /* 初始化 rte_hash 结构,设置hash上下文 */ strlcpy(h->name, params->name, sizeof(h->name)); /* hash 表的名字 */ h->entries = params->entries; h->key_len = params->key_len; h->key_entry_size = key_entry_size; h->hash_func_init_val = params->hash_func_init_val; h->num_buckets = num_buckets; h->bucket_bitmask = h->num_buckets - 1; h->buckets = buckets; h->buckets_ext = buckets_ext; h->free_ext_bkts = r_ext; h->hash_func = (params->hash_func == NULL) ? default_hash_func : params->hash_func; h->key_store = k; h->free_slots = r; h->ext_bkt_to_free = ext_bkt_to_free; h->tbl_chng_cnt = tbl_chng_cnt; /* 表示上次读取之后是否有修改 */ *h->tbl_chng_cnt = 0; h->hw_trans_mem_support = hw_trans_mem_support; h->use_local_cache = use_local_cache; h->readwrite_concur_support = readwrite_concur_support; h->ext_table_support = ext_table_support; h->writer_takes_lock = writer_takes_lock; h->no_free_on_del = no_free_on_del; h->readwrite_concur_lf_support = readwrite_concur_lf_support; #if defined(RTE_ARCH_X86) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; else #elif defined(RTE_ARCH_ARM64) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) h->sig_cmp_fn = RTE_HASH_COMPARE_NEON; else #endif h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; /* 写线程需要在以下情况使用锁: * (1) 使用了 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 标记,或者 * (2) 使用了 RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 标记 * 即设置了读者写者并发,或者支持多写者 */ if (h->writer_takes_lock) { /* 如果使用锁,则为锁分配内存并初始化 */ h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t), RTE_CACHE_LINE_SIZE); if (h->readwrite_lock == NULL) goto err_unlock; rte_rwlock_init(h->readwrite_lock); } /* 填充空闲slots ring。k 的第0个entry 保留作为key比较失败时使用 */ for (i = 1; i < num_key_slots; i++) rte_ring_sp_enqueue_elem(r, &i, sizeof(uint32_t)); te->data = (void *) h; /* 将新创建的 hash 表与 hash_list 关联 */ TAILQ_INSERT_TAIL(hash_list, te, next); rte_mcfg_tailq_write_unlock(); return h; err_unlock: rte_mcfg_tailq_write_unlock(); err: rte_ring_free(r); rte_ring_free(r_ext); rte_free(te); rte_free(h); rte_free(buckets); rte_free(buckets_ext); rte_free(k); rte_free(tbl_chng_cnt); rte_free(ext_bkt_to_free); return NULL; } |
3、rte_hash_free() 函数
该函数用来释放之前创建的 rte_hash 结构。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | void rte_hash_free(struct rte_hash *h) { struct rte_tailq_entry *te; struct rte_hash_list *hash_list; if (h == NULL) return; hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); rte_mcfg_tailq_write_lock(); /* 在 hash_list 找到对应的 hash 结构 */ TAILQ_FOREACH(te, hash_list, next) { if (te->data == (void *) h) break; } if (te == NULL) { rte_mcfg_tailq_write_unlock(); return; } /* 从 hash_list 中移除 */ TAILQ_REMOVE(hash_list, te, next); rte_mcfg_tailq_write_unlock(); if (h->use_local_cache) rte_free(h->local_free_slots); if (h->writer_takes_lock) rte_free(h->readwrite_lock); rte_ring_free(h->free_slots); rte_ring_free(h->free_ext_bkts); rte_free(h->key_store); rte_free(h->buckets); rte_free(h->buckets_ext); rte_free(h->tbl_chng_cnt); rte_free(h->ext_bkt_to_free); rte_free(h); rte_free(te); } |
————————————————————
原创文章,转载请注明: 转载自孙希栋的博客
本文链接地址: 《DPDK20.05 hash表1 – rte_hash_create 与 rte_hash_free》