[转载]memcached源码分析之slabs

baacloud免费翻墙vpn注册使用

[转载]memcache源码分析之slabs – 先贝夜话 – 博客园.

slab是memcache用来管理item的内容存储部分。

分配内存时,memcache把我们通过参数m设置的内存大小分配到每个slab中

1、slab默认最多为200个,但是由于item的最大为1MB,而且每个slab里面存储的item的尺寸是根据factor来确定的,所以能够分配的slab的个数小于200。

2、关于增长因子factor参数(配置时参数名为f),默认为1.25,即每个slab所能存储的item的大小是根据factor的大小来变化的。

3、每个slab中含有一个或多个trunk,trunk中存储的就是item,item的最大为1M,所以trunk最大为1M

4、每个slab中会有一个item空闲列表,当新的item需要存储时,首先会考虑空闲列表,从中取出一个位置用来存储。当空闲列表满时,系统会去自动扩充。

5、每个slab中有二个参数为end_page_ptr、end_page_free,前者指向当前空闲的trunk指针,后者当前 trunk指向空闲处,当4中的空闲列表为空时,如果end_page_ptr和end_page_free不为空,则会在此trunk中存储item。 如果没有多余的trunk可用,系统会自动扩充trunk。

采用这种方式管理内存的好处是最大程度的减少了内存碎片的产生,提高了存储和读取效率。

下面是一些源码注释

slabs.h

1 /* stats */
2 void stats_prefix_init(void);
3 void stats_prefix_clear(void);
4 void stats_prefix_record_get(const char *key, const size_t nkey, const bool is_hit);
5 void stats_prefix_record_delete(const char *key, const size_t nkey);
6 void stats_prefix_record_set(const char *key, const size_t nkey);
7 /*@null@*/
8 char *stats_prefix_dump(int *length);

slabs.c

001 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
002 /*
003 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
004 * and are divided into chunks. The chunk sizes start off at the size of the
005 * "item" structure plus space for a small key and value. They increase by
006 * a multiplier factor from there, up to half the maximum slab size. The last
007 * slab size is always 1MB, since that's the maximum item size allowed by the
008 * memcached protocol.
009 */
010 #include "memcached.h"
011 #include <sys/stat.h>
012 #include <sys/socket.h>
013 #include <sys/signal.h>
014 #include <sys/resource.h>
015 #include <fcntl.h>
016 #include <netinet/in.h>
017 #include <errno.h>
018 #include <stdlib.h>
019 #include <stdio.h>
020 #include <string.h>
021 #include <assert.h>
022 #include <pthread.h>
023
024 /* powers-of-N allocation structures */
025
026 typedef struct {
027 unsigned int size; /* item的大小 */
028 unsigned int perslab; /* 每个trunk有多少item */
029
030 void **slots; //空闲item列表
031 unsigned int sl_total; //空闲总量
032 unsigned int sl_curr; //当前空闲处
033
034 void *end_page_ptr; //当前trunk空闲处
035 unsigned int end_page_free; //当前trunk空闲个数
036
037 unsigned int slabs; //已分配chunk数目
038
039 void **slab_list; //trunk指针
040 unsigned int list_size; //trunk数目
041
042 unsigned int killing; /* index+1 of dying slab, or zero if none */
043 size_t requested; //已分配总内存大小
044 } slabclass_t;
045
046 static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];
047 static size_t mem_limit = 0;//内存限制大小
048 static size_t mem_malloced = 0;//已分配大小
049 static int power_largest;
050
051 static void *mem_base = NULL;
052 static void *mem_current = NULL;//内存使用当前地址
053 static size_t mem_avail = 0;//剩余内存
054
055 /**
056 * slab 线程锁
057 */
058 static pthread_mutex_t slabs_lock = PTHREAD_MUTEX_INITIALIZER;
059
060 /*
061 * Forward Declarations
062 */
063 static int do_slabs_newslab(const unsigned int id);
064 static void *memory_allocate(size_t size);
065
066 #ifndef DONT_PREALLOC_SLABS
067 /* Preallocate as many slab pages as possible (called from slabs_init)
068 on start-up, so users don't get confused out-of-memory errors when
069 they do have free (in-slab) space, but no space to make new slabs.
070 if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
071 slab types can be made.  if max memory is less than 18 MB, only the
072 smaller ones will be made.  */
073 static void slabs_preallocate (const unsigned int maxslabs);
074 #endif
075
076
077 //寻找适合给定大小的item存储的slab
078 unsigned int slabs_clsid(const size_t size) {
079 int res = POWER_SMALLEST;
080
081 if (size == 0)
082 return 0;
083 while (size > slabclass[res].size)//找到第一个比item size大的slab
084 if (res++ == power_largest)
085 return 0;
086 return res;
087 }
088
089
090 /* slab初始化*/
091 /* limit:内存大小(字节);factor:增长因子;prealloc:是否一次性分配内存*/
092 void slabs_init(const size_t limit, const double factor, const bool prealloc) {
093 int i = POWER_SMALLEST - 1;//0
094 unsigned int size = sizeof(item) + settings.chunk_size;//chunk_size 最小分配空间
095
096 mem_limit = limit;
097
098 if (prealloc) {//一次分配所有设置的内存
099 /* Allocate everything in a big chunk with malloc */
100 mem_base = malloc(mem_limit);
101 if (mem_base != NULL) {
102 mem_current = mem_base;
103 mem_avail = mem_limit;
104 } else {
105 fprintf(stderr, "Warning: Failed to allocate requested memory in one large chunk.\nWill allocate in smaller chunks\n");
106 }
107 }
108
109 memset(slabclass, 0, sizeof(slabclass));
110
111 while (++i < POWER_LARGEST && size <= settings.item_size_max / factor) {
112 /* Make sure items are always n-byte aligned */
113 if (size % CHUNK_ALIGN_BYTES)//字节数为8的倍数
114 size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
115
116 slabclass[i].size = size;//item大小
117 slabclass[i].perslab = settings.item_size_max / slabclass[i].size;//item数目
118 size *= factor;//乘以增长因子
119 if (settings.verbose > 1) {
120 fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",i, slabclass[i].size, slabclass[i].perslab);
121 }
122 }
123
124 power_largest = i;
125 slabclass[power_largest].size = settings.item_size_max;
126 slabclass[power_largest].perslab = 1;//最大的只能存储一个item
127 if (settings.verbose > 1) {
128 fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",i, slabclass[i].size, slabclass[i].perslab);
129 }
130
131 /* for the test suite:  faking of how much we've already malloc'd */
132 {
133 char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
134 if (t_initial_malloc) {
135 mem_malloced = (size_t)atol(t_initial_malloc);
136 }
137
138 }
139
140 #ifndef DONT_PREALLOC_SLABS
141 {
142 char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");
143
144 if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
145 slabs_preallocate(power_largest);
146 }
147 }
148 #endif
149 }
150
151
152 //新分配trunk
153 #ifndef DONT_PREALLOC_SLABS
154 static void slabs_preallocate (const unsigned int maxslabs) {
155 int i;
156 unsigned int prealloc = 0;
157
158 /* pre-allocate a 1MB slab in every size class so people don't get
159 confused by non-intuitive "SERVER_ERROR out of memory"
160 messages.  this is the most common question on the mailing
161 list.  if you really don't want this, you can rebuild without
162 these three lines.  */
163
164 for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
165 if (++prealloc > maxslabs)
166 return;
167 do_slabs_newslab(i);
168 }
169
170 }
171 #endif
172
173
174 //扩充trunk数目
175 static int grow_slab_list (const unsigned int id) {
176 slabclass_t *p = &slabclass[id];
177 if (p->slabs == p->list_size) {
178 size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
179 void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
180 if (new_list == 0) return 0;
181 p->list_size = new_size;
182 p->slab_list = new_list;
183 }
184 return 1;
185 }
186
187
188
189 //分配trunk
190 static int do_slabs_newslab(const unsigned int id) {
191 slabclass_t *p = &slabclass[id];
192 int len = p->size * p->perslab;//1MB
193 char *ptr;
194
195 if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0) || (grow_slab_list(id) == 0) || ((ptr = memory_allocate((size_t)len)) == 0)) {
196 MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
197 return 0;
198 }
199
200 memset(ptr, 0, (size_t)len);
201 p->end_page_ptr = ptr;
202 p->end_page_free = p->perslab;
203
204 p->slab_list[p->slabs++] = ptr;
205 mem_malloced += len;
206 MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
207
208 return 1;
209 }
210
211
212
213 /*@存储item@*/
214 static void *do_slabs_alloc(const size_t size, unsigned int id) {
215 slabclass_t *p;
216 void *ret = NULL;
217
218 if (id < POWER_SMALLEST || id > power_largest) {
219 MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0);
220 return NULL;
221 }
222
223 p = &slabclass[id];
224 assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
225
226 #ifdef USE_SYSTEM_MALLOC
227 if (mem_limit && mem_malloced + size > mem_limit) {
228 MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
229 return 0;
230 }
231 mem_malloced += size;
232 ret = malloc(size);
233 MEMCACHED_SLABS_ALLOCATE(size, id, 0, ret);
234 return ret;
235 #endif
236
237 /* fail unless we have space at the end of a recently allocated page,
238 we have something on our freelist, or we could allocate a new page */
239 if (! (p->end_page_ptr != 0 || p->sl_curr != 0 || do_slabs_newslab(id) != 0)) {//没有空闲 也不能扩展
240 ret = NULL;
241 } else if (p->sl_curr != 0) {
242 /* return off our freelist */
243 ret = p->slots[--p->sl_curr];
244 } else {
245 /* if we recently allocated a whole page, return from that */
246 assert(p->end_page_ptr != NULL);
247 ret = p->end_page_ptr;
248 if (--p->end_page_free != 0) {
249 p->end_page_ptr = ((caddr_t)p->end_page_ptr) + p->size;
250 } else {
251 p->end_page_ptr = 0;
252 }
253 }
254
255 if (ret) {
256 p->requested += size;
257 MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret);
258 } else {
259 MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
260 }
261
262 return ret;
263 }
264
265
266 //释放item内存
267 static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
268 slabclass_t *p;
269
270 assert(((item *)ptr)->slabs_clsid == 0);
271 assert(id >= POWER_SMALLEST && id <= power_largest);
272 if (id < POWER_SMALLEST || id > power_largest)
273 return;
274
275 MEMCACHED_SLABS_FREE(size, id, ptr);
276 p = &slabclass[id];
277
278 #ifdef USE_SYSTEM_MALLOC
279 mem_malloced -= size;
280 free(ptr);
281 return;
282 #endif
283
284 if (p->sl_curr == p->sl_total) { //需要扩充空闲列表
285 int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16; /* 16 is arbitrary */
286 void **new_slots = realloc(p->slots, new_size * sizeof(void *));
287 if (new_slots == 0)
288 return;
289 p->slots = new_slots;
290 p->sl_total = new_size;
291 }
292 p->slots[p->sl_curr++] = ptr;
293 p->requested -= size;
294 return;
295 }
296
297
298 static int nz_strcmp(int nzlength, const char *nz, const char *z) {
299 int zlength=strlen(z);
300 return (zlength == nzlength) && (strncmp(nz, z, zlength) == 0) ? 0 : -1;
301 }
302
303
304 //获取状态
305 bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {
306 bool ret = true;
307
308 if (add_stats != NULL) {
309 if (!stat_type) {
310 /* prepare general statistics for the engine */
311 STATS_LOCK();
312 APPEND_STAT("bytes", "%llu", (unsigned long long)stats.curr_bytes);
313 APPEND_STAT("curr_items", "%u", stats.curr_items);
314 APPEND_STAT("total_items", "%u", stats.total_items);
315 APPEND_STAT("evictions", "%llu",(unsigned long long)stats.evictions);
316 APPEND_STAT("reclaimed", "%llu",(unsigned long long)stats.reclaimed);
317 STATS_UNLOCK();
318 } else if (nz_strcmp(nkey, stat_type, "items") == 0) {
319 item_stats(add_stats, c);
320 } else if (nz_strcmp(nkey, stat_type, "slabs") == 0) {
321 slabs_stats(add_stats, c);
322 } else if (nz_strcmp(nkey, stat_type, "sizes") == 0) {
323 item_stats_sizes(add_stats, c);
324 } else {
325 ret = false;
326 }
327 } else {
328 ret = false;
329 }
330
331 return ret;
332 }
333
334
335 /*状态*/
336 static void do_slabs_stats(ADD_STAT add_stats, void *c) {
337 int i, total;
338 /* Get the per-thread stats which contain some interesting aggregates */
339 struct thread_stats thread_stats;
340 threadlocal_stats_aggregate(&thread_stats);
341
342 total = 0;
343 for(i = POWER_SMALLEST; i <= power_largest; i++) {
344 slabclass_t *p = &slabclass[i];
345 if (p->slabs != 0) {
346 uint32_t perslab, slabs;
347 slabs = p->slabs;
348 perslab = p->perslab;
349
350 char key_str[STAT_KEY_LEN];
351 char val_str[STAT_VAL_LEN];
352 int klen = 0, vlen = 0;
353
354 APPEND_NUM_STAT(i, "chunk_size", "%u", p->size);
355 APPEND_NUM_STAT(i, "chunks_per_page", "%u", perslab);
356 APPEND_NUM_STAT(i, "total_pages", "%u", slabs);
357 APPEND_NUM_STAT(i, "total_chunks", "%u", slabs * perslab);
358 APPEND_NUM_STAT(i, "used_chunks", "%u",slabs*perslab - p->sl_curr - p->end_page_free);
359 APPEND_NUM_STAT(i, "free_chunks", "%u", p->sl_curr);
360 APPEND_NUM_STAT(i, "free_chunks_end", "%u", p->end_page_free);
361 APPEND_NUM_STAT(i, "mem_requested", "%llu",(unsigned long long)p->requested);
362 APPEND_NUM_STAT(i, "get_hits", "%llu",(unsigned long long)thread_stats.slab_stats[i].get_hits);
363 APPEND_NUM_STAT(i, "cmd_set", "%llu",(unsigned long long)thread_stats.slab_stats[i].set_cmds);
364 APPEND_NUM_STAT(i, "delete_hits", "%llu",(unsigned long long)thread_stats.slab_stats[i].delete_hits);
365 APPEND_NUM_STAT(i, "incr_hits", "%llu",(unsigned long long)thread_stats.slab_stats[i].incr_hits);
366 APPEND_NUM_STAT(i, "decr_hits", "%llu",(unsigned long long)thread_stats.slab_stats[i].decr_hits);
367 APPEND_NUM_STAT(i, "cas_hits", "%llu",(unsigned long long)thread_stats.slab_stats[i].cas_hits);
368 APPEND_NUM_STAT(i, "cas_badval", "%llu",(unsigned long long)thread_stats.slab_stats[i].cas_badval);
369
370 total++;
371 }
372 }
373
374 /* add overall slab stats and append terminator */
375
376 APPEND_STAT("active_slabs", "%d", total);
377 APPEND_STAT("total_malloced", "%llu", (unsigned long long)mem_malloced);
378 add_stats(NULL, 0, NULL, 0, c);
379 }
380
381
382 //为item分配内存
383 static void *memory_allocate(size_t size) {
384 void *ret;
385
386 if (mem_base == NULL) {
387 /* We are not using a preallocated large memory chunk */
388 ret = malloc(size);
389 } else {
390 ret = mem_current;
391
392 if (size > mem_avail) {
393 return NULL;
394 }
395
396 /* mem_current pointer _must_ be aligned!!! */
397 if (size % CHUNK_ALIGN_BYTES) {
398 size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
399 }
400
401 mem_current = ((char*)mem_current) + size;
402 if (size < mem_avail) {
403 mem_avail -= size;
404 } else {
405 mem_avail = 0;
406 }
407 }
408
409 return ret;
410 }
411
412
413 //存储
414 void *slabs_alloc(size_t size, unsigned int id) {
415 void *ret;
416
417 pthread_mutex_lock(&slabs_lock);
418 ret = do_slabs_alloc(size, id);
419 pthread_mutex_unlock(&slabs_lock);
420 return ret;
421 }
422
423
424 //释放
425 void slabs_free(void *ptr, size_t size, unsigned int id) {
426 pthread_mutex_lock(&slabs_lock);
427 do_slabs_free(ptr, size, id);
428 pthread_mutex_unlock(&slabs_lock);
429 }
430
431
432 //状态
433 void slabs_stats(ADD_STAT add_stats, void *c) {
434 pthread_mutex_lock(&slabs_lock);
435 do_slabs_stats(add_stats, c);
436 pthread_mutex_unlock(&slabs_lock);
437 }
赞(0) 打赏
分享到: 更多 (0)

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏