[转载]memcache源码分析之items

[转载]memcache源码分析之items – 先贝夜话 – 博客园.

items是memcache用来管理item的封装,采用的hash表和LRU链的形式,关于hash表的操作见我前几天的文章  memcache源码分析之assoc

关于item内容的存储机制简介

item的内容存储是在slab中管理的,为了对内存进行有效的管理,slab采用的是分桶的大小来存储item的内容的,简单举例解释一下,初始化时会有不同块大小的桶,比如桶1里面的

内存块都是80b的,专门用来存储item内容大小接近80b的。桶2的内存块是100b的,专门用来存储内容大小接近100b的item,桶3是 120b的,用来存储大小接近120b的item,等等。所以,如果有一个item的内容大小是90b,那它只能存储在100b的桶内,不能存储在其他里 面的,120b的也不可以。具体详细介绍请见我后续关于slab的文章。

问题:当100b的桶存储满的时候,memcache怎么办呢?

这个问题的答案就在本文介绍的内容里面。

为一个item分配存储空间的时候,具体的操作是这样的:

1、首先,计算该item占用的空间大小,只有知道了它的大小,才能知道它需要存储在哪个桶中。一个item的大小包括它的item结构体大小 部分、名字长度部分、状态标识部分、内容大小部分等的总和。具体计算方法请看下面的代码分析中 item_make_header 函数。

2、然后寻找合适的slab用于存储,这一部分主要是比较item 和各slab桶的大小,寻找最合适的slab,此部分代码是文件  slabs.c 中的  slabs_clsid 函数,具体内容我后续关于slab的文章会详细分析。

3、从对应slab的tail队列中寻找是否存在过期的item,如果有,清除掉,此处操作最多尝试50次。

4、如果第3步操作失败,并且在对应slab中分配空间失败,那么从slab对应的tail队列中删除没有被引用的item,且最多也是尝试50次。

5、尝试从slab中分配空间。

6、如果第5步失败,会从slab对应的tail队列中删除3个小时(默认)之前的正在引用的item。

7、然后尝试从slab中分配空间。如果失败,返回NULL,成功则会设置item对应的一些信息,返回成功标识。

item的删除过程:

1、设置已被删除状态。并从hash表中删除,次部分代码调用的是  memcache源码分析之assoc 中介绍到的函数assoc_delete

2、从LRU链中删除。函数item_unlink_q。

3、如果要清除item占用的资源,则调用函数do_item_remove和item_free,释放占用内存空间。


另外还提供了一些其他操作,分别包括,获取某个item(会判断是否过期),获取某个item(不判断是否过期),客户端通过flush_all操作清空所有过期item,item的新值替换,访问时间更新等。

当然,有item的删除操作,就要有相应的加入hash表和LRU链的操作。

另外,还提供了一些item和slab状态函数。


想了解详细代码的同学可以看一下下面的简要分析。有错误之处请指正。


items.h

01 /* See items.c */
02 uint64_t get_cas_id(void);
03
04 /*@null@*/
05 item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes);
06 void item_free(item *it);
07 bool item_size_ok(const size_t nkey, const int flags, const int nbytes);
08
09 int do_item_link(item *it); /** may fail if transgresses limits */
10 void do_item_unlink(item *it);
11 void do_item_remove(item *it);
12 void do_item_update(item *it); /** update LRU time to current and reposition */
13 int do_item_replace(item *it, item *new_it);
14
15 /*@null@*/
16 char *do_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes);
17 void do_item_stats(ADD_STAT add_stats, void *c);
18 /*@null@*/
19 void do_item_stats_sizes(ADD_STAT add_stats, void *c);
20 void do_item_flush_expired(void);
21
22 item *do_item_get(const char *key, const size_t nkey);
23 item *do_item_get_nocheck(const char *key, const size_t nkey);
24 void item_stats_reset(void);
25 extern pthread_mutex_t cache_lock;

items.c

001 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
002 #include "memcached.h"
003 #include <sys/stat.h>
004 #include <sys/socket.h>
005 #include <sys/signal.h>
006 #include <sys/resource.h>
007 #include <fcntl.h>
008 #include <netinet/in.h>
009 #include <errno.h>
010 #include <stdlib.h>
011 #include <stdio.h>
012 #include <string.h>
013 #include <time.h>
014 #include <assert.h>
015
016 /* Forward Declarations */
017 static void item_link_q(item *it);
018 static void item_unlink_q(item *it);
019
020 /*
021 * We only reposition items in the LRU queue if they haven't been repositioned
022 * in this many seconds. That saves us from churning on frequently-accessed
023 * items.
024 */
025 #define ITEM_UPDATE_INTERVAL 60
026
027 #define LARGEST_ID POWER_LARGEST
028
029 //item状态信息结构体
030 typedef struct {
031 unsigned int evicted;
032 unsigned int evicted_nonzero;
033 rel_time_t evicted_time;
034 unsigned int reclaimed;
035 unsigned int outofmemory;
036 unsigned int tailrepairs;
037 } itemstats_t;
038
039 static item *heads[LARGEST_ID];
040 static item *tails[LARGEST_ID];
041 static itemstats_t itemstats[LARGEST_ID];
042 static unsigned int sizes[LARGEST_ID];//记录每个slab的元素个数
043
044 void item_stats_reset(void) {
045 pthread_mutex_lock(&cache_lock);
046 memset(itemstats, 0, sizeof(itemstats));
047 pthread_mutex_unlock(&cache_lock);
048 }
049
050
051 //获取新的CAS值
052 uint64_t get_cas_id(void) {
053 static uint64_t cas_id = 0;
054 return ++cas_id;
055 }
056
057 /* Enable this for reference-count Debugging. */
058 #if 0
059 # define Debug_REFCNT(it,op) \
060 fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \
061 it, op, it->refcount, \
062 (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \
063 (it->it_flags & ITEM_SLABBED) ? 'S' : ' ')
064 #else
065 # define Debug_REFCNT(it,op) while(0)
066 #endif
067
068 /**
069 * Generates the variable-sized part of the header for an object.
070 *
071 * key     - The key
072 * nkey    - The length of the key
073 * flags   - key flags
074 * nbytes  - Number of bytes to hold value and addition CRLF terminator
075 * suffix  - Buffer for the "VALUE" line suffix (flags, size).
076 * nsuffix - The length of the suffix is stored here.
077 *
078 * Returns the total size of the header.
079 */
080 //计算item占用空间大小
081 static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes,char *suffix, uint8_t *nsuffix) {
082 /* suffix is defined at 40 chars elsewhere.. */
083 *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2);
084 return sizeof(item) + nkey + *nsuffix + nbytes;
085 }
086
087
088 //分配一个item空间
089 item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
090 uint8_t nsuffix;
091 item *it = NULL;
092 char suffix[40];
093 size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);//获取item占用空间大小
094 if (settings.use_cas) {
095 ntotal += sizeof(uint64_t);
096 }
097
098 unsigned int id = slabs_clsid(ntotal);//寻找合适的slab
099 if (id == 0)
100 return 0;
101
102 /* do a quick check if we have any expired items in the tail.. */
103 int tries = 50;
104 item *search;
105
106 for (search = tails[id];tries > 0 && search != NULL;tries--, search=search->prev) {
107 if (search->refcount == 0 && (search->exptime != 0 && search->exptime < current_time)) {//过期
108 it = search;
109 /* I don't want to actually free the object, just steal
110 * the item to avoid to grab the slab mutex twice ;-)
111 */
112 STATS_LOCK();
113 stats.reclaimed++;
114 STATS_UNLOCK();
115 itemstats[id].reclaimed++;
116 it->refcount = 1;
117 do_item_unlink(it);//从hash表删除
118 /* Initialize the item block: */
119 it->slabs_clsid = 0;
120 it->refcount = 0;
121 break;
122 }
123 }
124
125 if (it == NULL && (it = slabs_alloc(ntotal, id)) == NULL) {//没有过期元素且加入相应slab失败
126
127 tries = 50;
128
129 /* If requested to not push old items out of cache when memory runs out,
130 * we're out of luck at this point...
131 */
132
133 if (settings.evict_to_free == 0) {
134 itemstats[id].outofmemory++;
135 return NULL;
136 }
137
138 /*
139 * try to get one off the right LRU
140 * don't necessariuly unlink the tail because it may be locked: refcount>0
141 * search up from tail an item with refcount==0 and unlink it; give up after 50
142 * tries
143 */
144
145 if (tails[id] == 0) {
146 itemstats[id].outofmemory++;
147 return NULL;
148 }
149
150 for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
151 if (search->refcount == 0) {//没有被引用的情况下删除之
152 if (search->exptime == 0 || search->exptime > current_time) {
153 itemstats[id].evicted++;
154 itemstats[id].evicted_time = current_time - search->time;
155 if (search->exptime != 0)
156 itemstats[id].evicted_nonzero++;
157 STATS_LOCK();
158 stats.evictions++;
159 STATS_UNLOCK();
160 } else {
161 itemstats[id].reclaimed++;
162 STATS_LOCK();
163 stats.reclaimed++;
164 STATS_UNLOCK();
165 }
166 do_item_unlink(search);
167 break;
168 }
169 }
170 it = slabs_alloc(ntotal, id);
171 if (it == 0) {
172 itemstats[id].outofmemory++;
173 /* Last ditch effort. There is a very rare bug which causes
174 * refcount leaks. We've fixed most of them, but it still happens,
175 * and it may happen in the future.
176 * We can reasonably assume no item can stay locked for more than
177 * three hours, so if we find one in the tail which is that old,
178 * free it anyway.
179 */
180 tries = 50;
181 for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
182 if (search->refcount != 0 && search->time + TAIL_REPAIR_TIME < current_time) {//没有被引用并且是3小时之前的item
183 itemstats[id].tailrepairs++;
184 search->refcount = 0;
185 do_item_unlink(search);
186 break;
187 }
188 }
189 it = slabs_alloc(ntotal, id);
190 if (it == 0) {
191 return NULL;
192 }
193 }
194 }
195
196 assert(it->slabs_clsid == 0);
197
198 it->slabs_clsid = id;
199
200 assert(it != heads[it->slabs_clsid]);
201
202 it->next = it->prev = it->h_next = 0;
203 it->refcount = 1; /* the caller will have a reference */
204 DEBUG_REFCNT(it, '*');
205 it->it_flags = settings.use_cas ? ITEM_CAS : 0;
206 it->nkey = nkey;
207 it->nbytes = nbytes;
208 memcpy(ITEM_key(it), key, nkey);
209 it->exptime = exptime;
210 memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
211 it->nsuffix = nsuffix;
212 return it;
213 }
214
215
216 //释放item
217 void item_free(item *it) {
218 size_t ntotal = ITEM_ntotal(it);
219 unsigned int clsid;
220 assert((it->it_flags & ITEM_LINKED) == 0);//没有在hash表和LRU链中
221 assert(it != heads[it->slabs_clsid]);
222 assert(it != tails[it->slabs_clsid]);
223 assert(it->refcount == 0);
224
225 /* so slab size changer can tell later if item is already free or not */
226 clsid = it->slabs_clsid;
227 it->slabs_clsid = 0;
228 it->it_flags |= ITEM_SLABBED;//内存空闲交给slab
229 DEBUG_REFCNT(it, 'F');
230 slabs_free(it, ntotal, clsid);
231 }
232
233
234 //检验某item是否有适合的slab来存储
235 bool item_size_ok(const size_t nkey, const int flags, const int nbytes) {
236 char prefix[40];
237 uint8_t nsuffix;
238
239 return slabs_clsid(item_make_header(nkey + 1, flags, nbytes,prefix, &nsuffix)) != 0;
240 }
241
242
243 //加入LRU队列,成为新的head
244 static void item_link_q(item *it) { /* item is the new head */
245 item **head, **tail;
246 assert(it->slabs_clsid < LARGEST_ID);//判断所设置slab是否有效
247 assert((it->it_flags & ITEM_SLABBED) == 0);//判断状态
248
249 head = &heads[it->slabs_clsid];
250 tail = &tails[it->slabs_clsid];
251 assert(it != *head);
252 assert((*head && *tail) || (*head == 0 && *tail == 0));
253 it->prev = 0;
254 it->next = *head;
255 if (it->next) it->next->prev = it;
256 *head = it;
257 if (*tail == 0) *tail = it;//只有tail为空时才加入?
258 sizes[it->slabs_clsid]++;
259 return;
260 }
261
262
263 //从对应的slab的LRU链上删除
264 static void item_unlink_q(item *it) {
265 item **head, **tail;
266 assert(it->slabs_clsid < LARGEST_ID);
267 head = &heads[it->slabs_clsid];
268 tail = &tails[it->slabs_clsid];
269
270 if (*head == it) {
271 assert(it->prev == 0);
272 *head = it->next;
273 }
274 if (*tail == it) {
275 assert(it->next == 0);
276 *tail = it->prev;
277 }
278 assert(it->next != it);
279 assert(it->prev != it);
280
281 if (it->next) it->next->prev = it->prev;
282 if (it->prev) it->prev->next = it->next;
283 sizes[it->slabs_clsid]--;
284 return;
285 }
286
287
288 //将item加入到hashtable和LRU链中
289 int do_item_link(item *it) {
290 MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);//ITEM_key在memcached.h中定义
291 assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);//判断状态,既没有在hash表LRU链中或被释放
292 it->it_flags |= ITEM_LINKED;//设置linked状态
293 it->time = current_time;//设置最近访问时间
294 assoc_insert(it);//插入hashtable   assoc.c
295
296 STATS_LOCK();
297 stats.curr_bytes += ITEM_ntotal(it);//增加每个item所需要的字节大小,包括item结构体和item内容大小
298 stats.curr_items += 1;
299 stats.total_items += 1;
300 STATS_UNLOCK();
301
302 /* Allocate a new CAS ID on link. */
303 ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);//设置新CAS,CAS是memcache用来处理并发请求的一种机制
304
305 item_link_q(it);//加入LRU链
306
307 return 1;
308 }
309
310
311 //从hash表和LRU链中删除item
312 void do_item_unlink(item *it) {
313 MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
314 if ((it->it_flags & ITEM_LINKED) != 0) {
315 it->it_flags &= ~ITEM_LINKED;//设置为非linked
316 STATS_LOCK();
317 stats.curr_bytes -= ITEM_ntotal(it);
318 stats.curr_items -= 1;
319 STATS_UNLOCK();
320 assoc_delete(ITEM_key(it), it->nkey);//从hash表中删除
321 item_unlink_q(it);//从LRU链中删除
322 if (it->refcount == 0) item_free(it);
323 }
324 }
325
326
327 //remove item
328 void do_item_remove(item *it) {
329 MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
330 assert((it->it_flags & ITEM_SLABBED) == 0);
331 if (it->refcount != 0) {
332 it->refcount--;
333 DEBUG_REFCNT(it, '-');
334 }
335 if (it->refcount == 0 && (it->it_flags & ITEM_LINKED) == 0) {//没有人在引用并且没有在hash表和LEU链中
336 item_free(it);
337 }
338 }
339
340
341 //更新item最后访问时间
342 void do_item_update(item *it) {
343 MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
344 if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
345 assert((it->it_flags & ITEM_SLABBED) == 0);//没有被释放
346
347 if ((it->it_flags & ITEM_LINKED) != 0) {
348 item_unlink_q(it);
349 it->time = current_time;
350 item_link_q(it);
351 }
352 }
353 }
354
355
356 //item替换
357 int do_item_replace(item *it, item *new_it) {
358 MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes,ITEM_key(new_it), new_it->nkey, new_it->nbytes);
359 assert((it->it_flags & ITEM_SLABBED) == 0);//确保没有被释放
360
361 do_item_unlink(it);
362 return do_item_link(new_it);
363 }
364
365
366 /*@null@*/
367 char *do_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) {
368 unsigned int memlimit = 2 * 1024 * 1024; /* 2MB max response size */
369 char *buffer;
370 unsigned int bufcurr;
371 item *it;
372 unsigned int len;
373 unsigned int shown = 0;
374 char key_temp[KEY_MAX_LENGTH + 1];
375 char temp[512];
376
377 it = heads[slabs_clsid];
378
379 buffer = malloc((size_t)memlimit);
380 if (buffer == 0) return NULL;
381 bufcurr = 0;
382
383 while (it != NULL && (limit == 0 || shown < limit)) {
384 assert(it->nkey <= KEY_MAX_LENGTH);
385 /* Copy the key since it may not be null-terminated in the struct */
386 strncpy(key_temp, ITEM_key(it), it->nkey);
387 key_temp[it->nkey] = 0x00; /* terminate */
388 len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %lu s]\r\n",key_temp, it->nbytes - 2,(unsigned long)it->exptime + process_started);
389 if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */
390 break;
391 memcpy(buffer + bufcurr, temp, len);
392 bufcurr += len;
393 shown++;
394 it = it->next;
395 }
396
397 memcpy(buffer + bufcurr, "END\r\n", 6);
398 bufcurr += 5;
399
400 *bytes = bufcurr;
401 return buffer;
402 }
403
404
405 //slab状态信息
406 void do_item_stats(ADD_STAT add_stats, void *c) {
407 int i;
408 for (i = 0; i < LARGEST_ID; i++) {
409 if (tails[i] != NULL) {
410 const char *fmt = "items:%d:%s";
411 char key_str[STAT_KEY_LEN];
412 char val_str[STAT_VAL_LEN];
413 int klen = 0, vlen = 0;
414
415 APPEND_NUM_FMT_STAT(fmt, i, "number", "%u", sizes[i]);
416
417 APPEND_NUM_FMT_STAT(fmt, i, "age", "%u", tails[i]->time);
418
419 APPEND_NUM_FMT_STAT(fmt, i, "evicted","%u", itemstats[i].evicted);
420
421 APPEND_NUM_FMT_STAT(fmt, i, "evicted_nonzero","%u", itemstats[i].evicted_nonzero);
422
423 APPEND_NUM_FMT_STAT(fmt, i, "evicted_time","%u", itemstats[i].evicted_time);
424
425 APPEND_NUM_FMT_STAT(fmt, i, "outofmemory","%u", itemstats[i].outofmemory);
426
427 APPEND_NUM_FMT_STAT(fmt, i, "tailrepairs","%u", itemstats[i].tailrepairs);;
428
429 APPEND_NUM_FMT_STAT(fmt, i, "reclaimed","%u", itemstats[i].reclaimed);;
430 }
431 }
432
433 /* getting here means both ascii and binary terminators fit */
434 add_stats(NULL, 0, NULL, 0, c);
435 }
436
437
438 /** dumps out a list of objects of each size, with granularity of 32 bytes */
439 /*@null@*/
440 void do_item_stats_sizes(ADD_STAT add_stats, void *c) {
441
442 /* max 1MB object, divided into 32 bytes size buckets */
443 const int num_buckets = 32768;
444 unsigned int *histogram = calloc(num_buckets, sizeof(int));
445
446 if (histogram != NULL) {
447 int i;
448
449 /* build the histogram */
450 for (i = 0; i < LARGEST_ID; i++) {
451 item *iter = heads[i];
452 while (iter) {
453 int ntotal = ITEM_ntotal(iter);
454 int bucket = ntotal / 32;
455 if ((ntotal % 32) != 0) bucket++;
456 if (bucket < num_buckets) histogram[bucket]++;
457 iter = iter->next;
458 }
459 }
460
461 /* write the buffer */
462 for (i = 0; i < num_buckets; i++) {
463 if (histogram[i] != 0) {
464 char key[8];
465 int klen = 0;
466 klen = snprintf(key, sizeof(key), "%d", i * 32);
467 assert(klen < sizeof(key));
468 APPEND_STAT(key, "%u", histogram[i]);
469 }
470 }
471 free(histogram);
472 }
473 add_stats(NULL, 0, NULL, 0, c);
474 }
475
476
477 //获取item
478 item *do_item_get(const char *key, const size_t nkey) {
479 item *it = assoc_find(key, nkey);
480 int was_found = 0;
481
482 if (settings.verbose > 2) {//输出调试信息
483 if (it == NULL) {
484 fprintf(stderr, "> NOT FOUND %s", key);
485 } else {
486 fprintf(stderr, "> FOUND KEY %s", ITEM_key(it));
487 was_found++;
488 }
489 }
490
491 //忽略比设置日期早的item
492 if (it != NULL && settings.oldest_live != 0 && settings.oldest_live <= current_time && it->time <= settings.oldest_live) {
493 do_item_unlink(it); /* MTSAFE - cache_lock held */
494 it = NULL;
495 }
496
497 if (it == NULL && was_found) {
498 fprintf(stderr, " -nuked by flush");//被忽略错误信息
499 was_found--;
500 }
501
502 if (it != NULL && it->exptime != 0 && it->exptime <= current_time) {//过期
503 do_item_unlink(it); /* MTSAFE - cache_lock held */
504 it = NULL;
505 }
506
507 if (it == NULL && was_found) {
508 fprintf(stderr, " -nuked by expire");//过期错误
509 was_found--;
510 }
511
512 if (it != NULL) {
513 it->refcount++;
514 DEBUG_REFCNT(it, '+');
515 }
516
517 if (settings.verbose > 2)
518 fprintf(stderr, "\n");
519
520 return it;
521 }
522
523
524 //获取一个item,不论过期与否
525 item *do_item_get_nocheck(const char *key, const size_t nkey) {
526 item *it = assoc_find(key, nkey);
527 if (it) {
528 it->refcount++;
529 DEBUG_REFCNT(it, '+');
530 }
531 return it;
532 }
533
534
535 //flush all items
536 void do_item_flush_expired(void) {
537 int i;
538 item *iter, *next;
539 if (settings.oldest_live == 0)
540 return;
541 for (i = 0; i < LARGEST_ID; i++) {
542 /* The LRU is sorted in decreasing time order, and an item's timestamp
543 * is never newer than its last access time, so we only need to walk
544 * back until we hit an item older than the oldest_live time.
545 * The oldest_live checking will auto-expire the remaining items.
546 */
547 for (iter = heads[i]; iter != NULL; iter = next) {
548 if (iter->time >= settings.oldest_live) {
549 next = iter->next;
550 if ((iter->it_flags & ITEM_SLABBED) == 0) {//没有被释放,unlink
551 do_item_unlink(iter);
552 }
553 } else {
554 break;
555 }
556 }
557 }
558 }
赞(0) 打赏
分享到: 更多 (0)

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏