當前位置：首頁 > 人文社科 > 生活经验 >内容正文

生活经验

Redis源码解析——字典基本操作

發布時間：2023/11/27 生活经验 26 豆豆

生活随笔收集整理的這篇文章主要介紹了 Redis源码解析——字典基本操作小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

? ? ? ? 有了《Redis源碼解析——字典結構》的基礎，我們便可以對dict的實現進行展開分析。（轉載請指明出于breaksoftware的csdn博客）

創建字典

? ? ? ? 一般字典創建時，都是沒有數據的，但是字典類型需要確定，所以我們看到Redis字典創建主要需要定義數據操作的dictType對象：

static void _dictReset(dictht *ht)
{ht->table = NULL;ht->size = 0;ht->sizemask = 0;ht->used = 0;
}/* Create a new hash table */
dict *dictCreate(dictType *type,void *privDataPtr)
{dict *d = zmalloc(sizeof(*d));_dictInit(d,type,privDataPtr);return d;
}/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,void *privDataPtr)
{_dictReset(&d->ht[0]);_dictReset(&d->ht[1]);d->type = type;d->privdata = privDataPtr;d->rehashidx = -1;d->iterators = 0;return DICT_OK;
}

? ? ? ? dictCreate的privaDataPtr一般都傳Null。但是這個變量的設計是有原因的，因為作者希望提供一種能力，在框架調用一些使用者提供的方法時，能夠將一些他們可能關心的數據透傳回去。這種數據可能不一定是簡單的數據，也可能是個函數指針。如果是個函數指針的話，那么在框架調用相關函數時，使用者通過privaDataPtr傳遞進來的函數指針將被回傳，并在用戶自定義的方法中執行。比如調用用戶提供的對比數據的函數：

#define dictCompareKeys(d, key1, key2) \(((d)->type->keyCompare) ? \(d)->type->keyCompare((d)->privdata, key1, key2) : \(key1) == (key2))

? ? ? ? 還有一個需要注意的是rehashidx。因為剛創建的初始字典不需要rehash，所以rehashidx為-1。

刪除字典

? ? ? ? 字典刪除操作也非常簡單，其主要處理的就是兩個dictht對象。因為這兩個對象中有dictEntry數組，而每個數組元素均為一條鏈的首地址，于是刪除操作既有鏈表釋放，也有動態數組釋放操作。

int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {unsigned long i;/* Free all the elements */for (i = 0; i < ht->size && ht->used > 0; i++) {dictEntry *he, *nextHe;if (callback && (i & 65535) == 0) callback(d->privdata);if ((he = ht->table[i]) == NULL) continue;while(he) {nextHe = he->next;dictFreeKey(d, he);dictFreeVal(d, he);zfree(he);ht->used--;he = nextHe;}}/* Free the table and the allocated cache structure */zfree(ht->table);/* Re-initialize the table */_dictReset(ht);return DICT_OK; /* never fails */
}/* Clear & Release the hash table */
void dictRelease(dict *d)
{_dictClear(d,&d->ht[0],NULL);_dictClear(d,&d->ht[1],NULL);zfree(d);
}

? ? ? ? 上面函數中dictFreeKey和dictFreeValue實則是調用dictType中傳入的數據釋放函數

#define dictFreeVal(d, entry) \if ((d)->type->valDestructor) \(d)->type->valDestructor((d)->privdata, (entry)->v.val)#define dictFreeKey(d, entry) \if ((d)->type->keyDestructor) \(d)->type->keyDestructor((d)->privdata, (entry)->key)

字典擴容和縮容

? ? ? ? 我們知道Redis的字典是通過數組和鏈表相結合的方式實現的。理論上說，如果數組長度不變，鏈表長度改變則可以達到字典內容增減的目的。但是為什么還要設計擴容和縮容呢？首先說明下，這兒講解的兩個概念是針dictht的table的——即針對數組結構的。那么有了《Redis源碼解析——字典結構》知識，我們可以得知，針對數組長度的增減是為了：在鏈表過長影響查找效率時，擴大數組長度以減小鏈表長度，達到性能優化。在數據過于稀疏的情況下，減小數組長度以使得無效數組指針變少，從而達到節約空間的目的。

? ? ? ? 我們先看看擴容的計算：

/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{/* Incremental rehashing already in progress. Return. */if (dictIsRehashing(d)) return DICT_OK;/* If the hash table is empty expand it to the initial size. */if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);/* If we reached the 1:1 ratio, and we are allowed to resize the hash* table (global setting) or we should avoid it but the ratio between* elements/buckets is over the "safe" threshold, we resize doubling* the number of buckets. */if (d->ht[0].used >= d->ht[0].size &&(dict_can_resize ||d->ht[0].used/d->ht[0].size > dict_force_resize_ratio)){return dictExpand(d, d->ht[0].used*2);}return DICT_OK;
}

? ? ? ? 其中最核心的是檢查ht[0]中元素個數和保存鏈表首地址的數組長度的商是否大于dict_force_resize_ratio——5。這個公式是計算鏈表的平均長度（數組中NULL意味著該對應的鏈表長度為0）。如果平均長度大于5，則需要通過dictExpand方法讓數組去擴容

int dictExpand(dict *d, unsigned long size)
{dictht n; /* the new hash table */unsigned long realsize = _dictNextPower(size);/* the size is invalid if it is smaller than the number of* elements already inside the hash table */if (dictIsRehashing(d) || d->ht[0].used > size)return DICT_ERR;/* Rehashing to the same table size is not useful. */if (realsize == d->ht[0].size) return DICT_ERR;/* Allocate the new hash table and initialize all pointers to NULL */n.size = realsize;n.sizemask = realsize-1;n.table = zcalloc(realsize*sizeof(dictEntry*));n.used = 0;/* Is this the first initialization? If so it's not really a rehashing* we just set the first hash table so that it can accept keys. */if (d->ht[0].table == NULL) {d->ht[0] = n;return DICT_OK;}/* Prepare a second hash table for incremental rehashing */d->ht[1] = n;d->rehashidx = 0;return DICT_OK;
}

? ? ? ? 至于擴容的大小要看下面的實現

static unsigned long _dictNextPower(unsigned long size) {unsigned long i = DICT_HT_INITIAL_SIZE;if (size >= LONG_MAX) return LONG_MAX;while(1) {if (i >= size)return i;i *= 2;}
}

? ? ? ? 可以見的_dictNextPower是獲取最近接size的，但是比size大的2的N次冪。這樣就可以讓鏈表平均長度降低到5/4~5/2之間（1.24~2.5）。

? ? ? ? 我們再注意下dictExpand函數，它最后將分配的空間賦值給ht[1]。如果進入這個場景，就意味著要進行rehash操作了——因為ht[1]就是為了臨時保存rehash結果的。

? ? ? ? 接下來看看縮容計算：

/* Resize the table to the minimal size that contains all the elements,* but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictResize(dict *d)
{int minimal;if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;minimal = d->ht[0].used;if (minimal < DICT_HT_INITIAL_SIZE)minimal = DICT_HT_INITIAL_SIZE;return dictExpand(d, minimal);
}

? ? ? ? 函數注釋寫的很清楚：在平均鏈表長度低于1時要縮容了。但是作者并沒有在字典內容減少時檢測是否需要縮容，甚至沒有設計一個檢測是否需要縮容的函數，而是將這個方法暴露給用戶去做。我想是因為這種場景不影響字典的執行效率，而內存問題可能更多是應該讓用戶去考慮。

Rehash操作 ? ? ? ?

? ? ? ? Rehash操作是Dict庫的重要算法，好在邏輯我們已經在《Redis源碼解析——字典結構》講清楚了，現在我們就看看它的實現

int dictRehash(dict *d, int n) {

? ? ? ? 該函數需要傳入字典指針d和步進長度n，返回0或者1。這兒的步進長度需要說明下，因為Redis的字典rehash操作是漸進的分步來完成，所以每步需要漸進多少距離需要指定。然后dictht的dictEntry數組可能存在連續的空指針，這些空指針沒有數據鏈，因此不需要rehash，所以不用對它們進行操作。于是步進距離只是針對有效的數組指針，比如我們針對下圖結構進行rehash

? ? ? ? 我們假設步進長度為1，則對上面進行rehash時，ht[0].table的前兩個元素均被跳過，第三個元素所指向的鏈上數據將被rehash。因為步進長度為1，且已經rehash了數組中第三條鏈的數據，所以認為該次步進結束。

    int empty_visits = n*10; /* Max number of empty buckets to visit. */if (!dictIsRehashing(d)) return 0;while(n-- && d->ht[0].used != 0) {dictEntry *de, *nextde;/* Note that rehashidx can't overflow as we are sure there are more* elements because ht[0].used != 0 */assert(d->ht[0].size > (unsigned long)d->rehashidx);while(d->ht[0].table[d->rehashidx] == NULL) {d->rehashidx++;if (--empty_visits == 0) return 1;}

? ? ? ? 但是作者認為數組中有效步進長度內，過多的空指針也是會影響rehash效率。于是作者定義了empty_visits的值為步進長度10倍，如果有效步進長度內空指針數大于empty_visits的值，則需要提前跳出rehash操作，并返回1。可能有讀者會疑問，跳過空指針又不耗費時間，干嘛要做這個限制呢？其實問題不出在空指針上，而是因為數組中有過多空指針的話，意味著數據向數據鏈上堆積，于是每步進一次，需要rehash該鏈上的數據也會相對較多，時間消耗也會變長。所以限制空數據鏈的實質是優化步進的操作耗時的不確定性。

? ? ? ? 通過while我們可以看出，如果達到步進長度，或者ht[0]上的數據已經全被rehash到ht[1]上去了，rehash操作就完成了。我們再看戲rehash的具體操作：

        de = d->ht[0].table[d->rehashidx];/* Move all the keys in this bucket from the old to the new hash HT */while(de) {unsigned int h;nextde = de->next;/* Get the index in the new hash table */h = dictHashKey(d, de->key) & d->ht[1].sizemask;de->next = d->ht[1].table[h];d->ht[1].table[h] = de;d->ht[0].used--;d->ht[1].used++;de = nextde;}d->ht[0].table[d->rehashidx] = NULL;d->rehashidx++;}

? ? ? ? 這個過程就是不停的對ht[0].table上數組進行遍歷，如果數組元素不為空，則遍歷并rehash該元素指向的鏈表上的元素。如果ht[0]上數據已經全rehash到ht[1]上，則其used參數為0。這個時候則讓ht[0]等于ht[1]，而ht[1]自身釋放掉，從而達到在ht[0]中的數據被全部rehash的目的。

    /* Check if we already rehashed the whole table... */if (d->ht[0].used == 0) {zfree(d->ht[0].table);d->ht[0] = d->ht[1];_dictReset(&d->ht[1]);d->rehashidx = -1;return 0;}/* More to rehash... */return 1;
}

? ? ? ? 總結下dictRehash操作：它是通過用戶指定有效步進長度，并結合實際數據分布情況，將ht[0]上數據重新rehash到ht[1]上。如果ht[0].table數組全部被遍歷過，則認為rehash完成并返回0，否則返回1。

Rehash的時機

? ? ? ? 之前我們講過為什么要rehash，現在我們探討下分步rehash的時機。

? ? ? ? 當一個Redis字典需要rehash時，它沒有采用一次性完成的方案，而是采用漸進式。于是保持在中間狀態的字典又是在何時被繼續rehash的呢？Redis的字典庫提供了兩個時機，一個是在對字典進行更新或者查找操作時；另一個則是提供給使用者一個接口，由其決定決定何時去rehash。

? ? ? ??因為查找或者更新操作都是需要耗費一定時間，所以此時的rehash也不應該“蹭”過多的時間，于是步進設置為1。

static void _dictRehashStep(dict *d) {if (d->iterators == 0) dictRehash(d,1);
}

? ? ? ? 另一種是是提供給用戶觸發的，但是作者還是希望盡量保證其操作時間不可以過長，所以提供了下面的方法：

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
int dictRehashMilliseconds(dict *d, int ms) {long long start = timeInMilliseconds();int rehashes = 0;while(dictRehash(d,100)) {rehashes += 100;if (timeInMilliseconds()-start > ms) break;}return rehashes;
}

? ? ? ? 此時rehash操作的步進長度為100，這樣相對于步進長度為1的情況，算是批量操作，可以省去函數調用和返回的時間消耗。相應的，還需要使用者提供時間進行約束。至于時長多少，使用者需要自己權衡了。

增加元素

? ? ? ? 新增元素通過下面函數實現

int dictAdd(dict *d, void *key, void *val)
{dictEntry *entry = dictAddRaw(d,key);if (!entry) return DICT_ERR;dictSetVal(d, entry, val);return DICT_OK;
}

? ? ? ??dictAddRaw方法獲取一個新的dictEntry指針，然后通過用于傳入的函數指針，將value復制到dictEntry所指向的對象的值上

#define dictSetVal(d, entry, _val_) do { \if ((d)->type->valDup) \entry->v.val = (d)->type->valDup((d)->privdata, _val_); \else \entry->v.val = (_val_); \
} while(0)

? ? ? ??dictAddRaw方法的實現我們需要注意下。首先它會檢測該字典是否處在rehash的狀態，如果是，則讓其rehash一步

dictEntry *dictAddRaw(dict *d, void *key)
{int index;dictEntry *entry;dictht *ht;if (dictIsRehashing(d)) _dictRehashStep(d);

? ? ? ? 然后檢測key是否已經在map中存在，如果存在則不能新增；否則返回key所在的指針數組的下標。（dictHashKey(ht, key) & ht->sizemask;）

    /* Get the index of the new element, or -1 if* the element already exists. */if ((index = _dictKeyIndex(d, key)) == -1)return NULL;

? ? ? ? 由于字典可能處在rehash的中間狀態，數據一部分在ht[0]中，有一部分在ht[1]中。這個時候就需要判斷新增的dictEntry是要加到哪個dictht上：如果在rehash，則新增到ht[1]上。因為如果新增到ht[0]上，此時rehashidx可能已經越過剛新增key對應的索引，導致數據丟失；如果不在rehash狀態，則新增到ht[0]上。

    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];entry = zmalloc(sizeof(*entry));entry->next = ht->table[index];ht->table[index] = entry;ht->used++;/* Set the hash entry fields. */dictSetKey(d, entry, key);return entry;
}

刪除元素

? ? ? ? 刪除元素時，需要在ht[0]和ht[1]中查找并刪除，所以會遍歷兩個table

static int dictGenericDelete(dict *d, const void *key, int nofree)
{unsigned int h, idx;dictEntry *he, *prevHe;int table;if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */if (dictIsRehashing(d)) _dictRehashStep(d);h = dictHashKey(d, key);for (table = 0; table <= 1; table++) {

? ? ? ? 然后找到key對應的指針數組的下標

        idx = h & d->ht[table].sizemask;

? ? ? ? sizemask是數組長度減去1。上面這步與操作，相當于讓hash值向數組長度取余數。比如我們hash值是5（0x101），數組長度是4（0x100），則sizemask為3（0x011）。5和3進行與運算，得出的是0x001，即5%4的結果。

? ? ? ? 找到指針下標后，則對該下標所指向的鏈表進行遍歷。找到元素就將其從鏈表中摘除。至于是否需要通過用戶提供的析構函數將key和value析構掉，要視傳入的force值決定。

        he = d->ht[table].table[idx];prevHe = NULL;while(he) {if (key==he->key || dictCompareKeys(d, key, he->key)) {/* Unlink the element from the list */if (prevHe)prevHe->next = he->next;elsed->ht[table].table[idx] = he->next;if (!nofree) {dictFreeKey(d, he);dictFreeVal(d, he);}zfree(he);d->ht[table].used--;return DICT_OK;}prevHe = he;he = he->next;}if (!dictIsRehashing(d)) break;}return DICT_ERR; /* not found */
}

? ? ? ? Redis字典庫對上面方法進行封裝，提供了下面這兩個函數：

int dictDelete(dict *ht, const void *key) {return dictGenericDelete(ht,key,0);
}int dictDeleteNoFree(dict *ht, const void *key) {return dictGenericDelete(ht,key,1);
}

查找元素

? ? ? ? 查找元素也需要考慮字典是否在rehash的過程中，于是查找也要視情況看看在ht[0]中查找，還是也要在ht[1]中查找：

dictEntry *dictFind(dict *d, const void *key)
{dictEntry *he;unsigned int h, idx, table;if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */if (dictIsRehashing(d)) _dictRehashStep(d);h = dictHashKey(d, key);for (table = 0; table <= 1; table++) {idx = h & d->ht[table].sizemask;he = d->ht[table].table[idx];while(he) {if (key==he->key || dictCompareKeys(d, key, he->key))return he;he = he->next;}if (!dictIsRehashing(d)) return NULL;}return NULL;
}void *dictFetchValue(dict *d, const void *key) {dictEntry *he;he = dictFind(d,key);return he ? dictGetVal(he) : NULL;
}

修改（無時新增）元素

? ? ? ? Redis的字典庫，會先嘗試往字典里新增該key，然后再查找到該key，讓其value變成需要替換的值，最后還要將原來的value釋放掉

int dictReplace(dict *d, void *key, void *val)
{dictEntry *entry, auxentry;/* Try to add the element. If the key* does not exists dictAdd will suceed. */if (dictAdd(d, key, val) == DICT_OK)return 1;/* It already exists, get the entry */entry = dictFind(d, key);/* Set the new value and free the old one. Note that it is important* to do that in this order, as the value may just be exactly the same* as the previous one. In this context, think to reference counting,* you want to increment (set), and then decrement (free), and not the* reverse. */auxentry = *entry;dictSetVal(d, entry, val);dictFreeVal(d, &auxentry);return 0;
}

總結

以上是生活随笔為你收集整理的Redis源码解析——字典基本操作的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。