1 /* Redis Object implementation.
2 *
3 * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Redis nor the names of its contributors may be used
15 * to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "server.h"
32 #include <math.h>
33 #include <ctype.h>
34
35 #ifdef __CYGWIN__
36 #define strtold(a,b) ((long double)strtod((a),(b)))
37 #endif
38
39 /* ===================== Creation and parsing of objects ==================== */
40
createObject(int type,void * ptr)41 robj *createObject(int type, void *ptr) {
42 robj *o = zmalloc(sizeof(*o));
43 o->type = type;
44 o->encoding = OBJ_ENCODING_RAW;
45 o->ptr = ptr;
46 o->refcount = 1;
47
48 /* Set the LRU to the current lruclock (minutes resolution), or
49 * alternatively the LFU counter. */
50 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
51 o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
52 } else {
53 o->lru = LRU_CLOCK();
54 }
55 return o;
56 }
57
58 /* Set a special refcount in the object to make it "shared":
59 * incrRefCount and decrRefCount() will test for this special refcount
60 * and will not touch the object. This way it is free to access shared
61 * objects such as small integers from different threads without any
62 * mutex.
63 *
64 * A common patter to create shared objects:
65 *
66 * robj *myobject = makeObjectShared(createObject(...));
67 *
68 */
makeObjectShared(robj * o)69 robj *makeObjectShared(robj *o) {
70 serverAssert(o->refcount == 1);
71 o->refcount = OBJ_SHARED_REFCOUNT;
72 return o;
73 }
74
75 /* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
76 * string object where o->ptr points to a proper sds string. */
createRawStringObject(const char * ptr,size_t len)77 robj *createRawStringObject(const char *ptr, size_t len) {
78 return createObject(OBJ_STRING, sdsnewlen(ptr,len));
79 }
80
81 /* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
82 * an object where the sds string is actually an unmodifiable string
83 * allocated in the same chunk as the object itself. */
createEmbeddedStringObject(const char * ptr,size_t len)84 robj *createEmbeddedStringObject(const char *ptr, size_t len) {
85 robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
86 struct sdshdr8 *sh = (void*)(o+1);
87
88 o->type = OBJ_STRING;
89 o->encoding = OBJ_ENCODING_EMBSTR;
90 o->ptr = sh+1;
91 o->refcount = 1;
92 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
93 o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
94 } else {
95 o->lru = LRU_CLOCK();
96 }
97
98 sh->len = len;
99 sh->alloc = len;
100 sh->flags = SDS_TYPE_8;
101 if (ptr == SDS_NOINIT)
102 sh->buf[len] = '\0';
103 else if (ptr) {
104 memcpy(sh->buf,ptr,len);
105 sh->buf[len] = '\0';
106 } else {
107 memset(sh->buf,0,len+1);
108 }
109 return o;
110 }
111
112 /* Create a string object with EMBSTR encoding if it is smaller than
113 * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
114 * used.
115 *
116 * The current limit of 44 is chosen so that the biggest string object
117 * we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
118 #define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
createStringObject(const char * ptr,size_t len)119 robj *createStringObject(const char *ptr, size_t len) {
120 if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
121 return createEmbeddedStringObject(ptr,len);
122 else
123 return createRawStringObject(ptr,len);
124 }
125
126 /* Create a string object from a long long value. When possible returns a
127 * shared integer object, or at least an integer encoded one.
128 *
129 * If valueobj is non zero, the function avoids returning a shared
130 * integer, because the object is going to be used as value in the Redis key
131 * space (for instance when the INCR command is used), so we want LFU/LRU
132 * values specific for each key. */
createStringObjectFromLongLongWithOptions(long long value,int valueobj)133 robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
134 robj *o;
135
136 if (server.maxmemory == 0 ||
137 !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
138 {
139 /* If the maxmemory policy permits, we can still return shared integers
140 * even if valueobj is true. */
141 valueobj = 0;
142 }
143
144 if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
145 incrRefCount(shared.integers[value]);
146 o = shared.integers[value];
147 } else {
148 if (value >= LONG_MIN && value <= LONG_MAX) {
149 o = createObject(OBJ_STRING, NULL);
150 o->encoding = OBJ_ENCODING_INT;
151 o->ptr = (void*)((long)value);
152 } else {
153 o = createObject(OBJ_STRING,sdsfromlonglong(value));
154 }
155 }
156 return o;
157 }
158
159 /* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
160 * to create a shared object if possible. */
createStringObjectFromLongLong(long long value)161 robj *createStringObjectFromLongLong(long long value) {
162 return createStringObjectFromLongLongWithOptions(value,0);
163 }
164
165 /* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
166 * object when LFU/LRU info are needed, that is, when the object is used
167 * as a value in the key space, and Redis is configured to evict based on
168 * LFU/LRU. */
createStringObjectFromLongLongForValue(long long value)169 robj *createStringObjectFromLongLongForValue(long long value) {
170 return createStringObjectFromLongLongWithOptions(value,1);
171 }
172
173 /* Create a string object from a long double. If humanfriendly is non-zero
174 * it does not use exponential format and trims trailing zeroes at the end,
175 * however this results in loss of precision. Otherwise exp format is used
176 * and the output of snprintf() is not modified.
177 *
178 * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
createStringObjectFromLongDouble(long double value,int humanfriendly)179 robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
180 char buf[MAX_LONG_DOUBLE_CHARS];
181 int len = ld2string(buf,sizeof(buf),value,humanfriendly? LD_STR_HUMAN: LD_STR_AUTO);
182 return createStringObject(buf,len);
183 }
184
185 /* Duplicate a string object, with the guarantee that the returned object
186 * has the same encoding as the original one.
187 *
188 * This function also guarantees that duplicating a small integer object
189 * (or a string object that contains a representation of a small integer)
190 * will always result in a fresh object that is unshared (refcount == 1).
191 *
192 * The resulting object always has refcount set to 1. */
dupStringObject(const robj * o)193 robj *dupStringObject(const robj *o) {
194 robj *d;
195
196 serverAssert(o->type == OBJ_STRING);
197
198 switch(o->encoding) {
199 case OBJ_ENCODING_RAW:
200 return createRawStringObject(o->ptr,sdslen(o->ptr));
201 case OBJ_ENCODING_EMBSTR:
202 return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
203 case OBJ_ENCODING_INT:
204 d = createObject(OBJ_STRING, NULL);
205 d->encoding = OBJ_ENCODING_INT;
206 d->ptr = o->ptr;
207 return d;
208 default:
209 serverPanic("Wrong encoding.");
210 break;
211 }
212 }
213
createQuicklistObject(void)214 robj *createQuicklistObject(void) {
215 quicklist *l = quicklistCreate();
216 robj *o = createObject(OBJ_LIST,l);
217 o->encoding = OBJ_ENCODING_QUICKLIST;
218 return o;
219 }
220
createZiplistObject(void)221 robj *createZiplistObject(void) {
222 unsigned char *zl = ziplistNew();
223 robj *o = createObject(OBJ_LIST,zl);
224 o->encoding = OBJ_ENCODING_ZIPLIST;
225 return o;
226 }
227
createSetObject(void)228 robj *createSetObject(void) {
229 dict *d = dictCreate(&setDictType,NULL);
230 robj *o = createObject(OBJ_SET,d);
231 o->encoding = OBJ_ENCODING_HT;
232 return o;
233 }
234
createIntsetObject(void)235 robj *createIntsetObject(void) {
236 intset *is = intsetNew();
237 robj *o = createObject(OBJ_SET,is);
238 o->encoding = OBJ_ENCODING_INTSET;
239 return o;
240 }
241
createHashObject(void)242 robj *createHashObject(void) {
243 unsigned char *zl = ziplistNew();
244 robj *o = createObject(OBJ_HASH, zl);
245 o->encoding = OBJ_ENCODING_ZIPLIST;
246 return o;
247 }
248
createZsetObject(void)249 robj *createZsetObject(void) {
250 zset *zs = zmalloc(sizeof(*zs));
251 robj *o;
252
253 zs->dict = dictCreate(&zsetDictType,NULL);
254 zs->zsl = zslCreate();
255 o = createObject(OBJ_ZSET,zs);
256 o->encoding = OBJ_ENCODING_SKIPLIST;
257 return o;
258 }
259
createZsetZiplistObject(void)260 robj *createZsetZiplistObject(void) {
261 unsigned char *zl = ziplistNew();
262 robj *o = createObject(OBJ_ZSET,zl);
263 o->encoding = OBJ_ENCODING_ZIPLIST;
264 return o;
265 }
266
createStreamObject(void)267 robj *createStreamObject(void) {
268 stream *s = streamNew();
269 robj *o = createObject(OBJ_STREAM,s);
270 o->encoding = OBJ_ENCODING_STREAM;
271 return o;
272 }
273
createModuleObject(moduleType * mt,void * value)274 robj *createModuleObject(moduleType *mt, void *value) {
275 moduleValue *mv = zmalloc(sizeof(*mv));
276 mv->type = mt;
277 mv->value = value;
278 return createObject(OBJ_MODULE,mv);
279 }
280
freeStringObject(robj * o)281 void freeStringObject(robj *o) {
282 if (o->encoding == OBJ_ENCODING_RAW) {
283 sdsfree(o->ptr);
284 }
285 }
286
freeListObject(robj * o)287 void freeListObject(robj *o) {
288 if (o->encoding == OBJ_ENCODING_QUICKLIST) {
289 quicklistRelease(o->ptr);
290 } else {
291 serverPanic("Unknown list encoding type");
292 }
293 }
294
freeSetObject(robj * o)295 void freeSetObject(robj *o) {
296 switch (o->encoding) {
297 case OBJ_ENCODING_HT:
298 dictRelease((dict*) o->ptr);
299 break;
300 case OBJ_ENCODING_INTSET:
301 zfree(o->ptr);
302 break;
303 default:
304 serverPanic("Unknown set encoding type");
305 }
306 }
307
freeZsetObject(robj * o)308 void freeZsetObject(robj *o) {
309 zset *zs;
310 switch (o->encoding) {
311 case OBJ_ENCODING_SKIPLIST:
312 zs = o->ptr;
313 dictRelease(zs->dict);
314 zslFree(zs->zsl);
315 zfree(zs);
316 break;
317 case OBJ_ENCODING_ZIPLIST:
318 zfree(o->ptr);
319 break;
320 default:
321 serverPanic("Unknown sorted set encoding");
322 }
323 }
324
freeHashObject(robj * o)325 void freeHashObject(robj *o) {
326 switch (o->encoding) {
327 case OBJ_ENCODING_HT:
328 dictRelease((dict*) o->ptr);
329 break;
330 case OBJ_ENCODING_ZIPLIST:
331 zfree(o->ptr);
332 break;
333 default:
334 serverPanic("Unknown hash encoding type");
335 break;
336 }
337 }
338
freeModuleObject(robj * o)339 void freeModuleObject(robj *o) {
340 moduleValue *mv = o->ptr;
341 mv->type->free(mv->value);
342 zfree(mv);
343 }
344
freeStreamObject(robj * o)345 void freeStreamObject(robj *o) {
346 freeStream(o->ptr);
347 }
348
incrRefCount(robj * o)349 void incrRefCount(robj *o) {
350 if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT) {
351 o->refcount++;
352 } else {
353 if (o->refcount == OBJ_SHARED_REFCOUNT) {
354 /* Nothing to do: this refcount is immutable. */
355 } else if (o->refcount == OBJ_STATIC_REFCOUNT) {
356 serverPanic("You tried to retain an object allocated in the stack");
357 }
358 }
359 }
360
decrRefCount(robj * o)361 void decrRefCount(robj *o) {
362 if (o->refcount == 1) {
363 switch(o->type) {
364 case OBJ_STRING: freeStringObject(o); break;
365 case OBJ_LIST: freeListObject(o); break;
366 case OBJ_SET: freeSetObject(o); break;
367 case OBJ_ZSET: freeZsetObject(o); break;
368 case OBJ_HASH: freeHashObject(o); break;
369 case OBJ_MODULE: freeModuleObject(o); break;
370 case OBJ_STREAM: freeStreamObject(o); break;
371 default: serverPanic("Unknown object type"); break;
372 }
373 zfree(o);
374 } else {
375 if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
376 if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
377 }
378 }
379
380 /* This variant of decrRefCount() gets its argument as void, and is useful
381 * as free method in data structures that expect a 'void free_object(void*)'
382 * prototype for the free method. */
decrRefCountVoid(void * o)383 void decrRefCountVoid(void *o) {
384 decrRefCount(o);
385 }
386
387 /* This function set the ref count to zero without freeing the object.
388 * It is useful in order to pass a new object to functions incrementing
389 * the ref count of the received object. Example:
390 *
391 * functionThatWillIncrementRefCount(resetRefCount(CreateObject(...)));
392 *
393 * Otherwise you need to resort to the less elegant pattern:
394 *
395 * *obj = createObject(...);
396 * functionThatWillIncrementRefCount(obj);
397 * decrRefCount(obj);
398 */
resetRefCount(robj * obj)399 robj *resetRefCount(robj *obj) {
400 obj->refcount = 0;
401 return obj;
402 }
403
checkType(client * c,robj * o,int type)404 int checkType(client *c, robj *o, int type) {
405 if (o->type != type) {
406 addReply(c,shared.wrongtypeerr);
407 return 1;
408 }
409 return 0;
410 }
411
isSdsRepresentableAsLongLong(sds s,long long * llval)412 int isSdsRepresentableAsLongLong(sds s, long long *llval) {
413 return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
414 }
415
isObjectRepresentableAsLongLong(robj * o,long long * llval)416 int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
417 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
418 if (o->encoding == OBJ_ENCODING_INT) {
419 if (llval) *llval = (long) o->ptr;
420 return C_OK;
421 } else {
422 return isSdsRepresentableAsLongLong(o->ptr,llval);
423 }
424 }
425
426 /* Optimize the SDS string inside the string object to require little space,
427 * in case there is more than 10% of free space at the end of the SDS
428 * string. This happens because SDS strings tend to overallocate to avoid
429 * wasting too much time in allocations when appending to the string. */
trimStringObjectIfNeeded(robj * o)430 void trimStringObjectIfNeeded(robj *o) {
431 if (o->encoding == OBJ_ENCODING_RAW &&
432 sdsavail(o->ptr) > sdslen(o->ptr)/10)
433 {
434 o->ptr = sdsRemoveFreeSpace(o->ptr);
435 }
436 }
437
438 /* Try to encode a string object in order to save space */
tryObjectEncoding(robj * o)439 robj *tryObjectEncoding(robj *o) {
440 long value;
441 sds s = o->ptr;
442 size_t len;
443
444 /* Make sure this is a string object, the only type we encode
445 * in this function. Other types use encoded memory efficient
446 * representations but are handled by the commands implementing
447 * the type. */
448 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
449
450 /* We try some specialized encoding only for objects that are
451 * RAW or EMBSTR encoded, in other words objects that are still
452 * in represented by an actually array of chars. */
453 if (!sdsEncodedObject(o)) return o;
454
455 /* It's not safe to encode shared objects: shared objects can be shared
456 * everywhere in the "object space" of Redis and may end in places where
457 * they are not handled. We handle them only as values in the keyspace. */
458 if (o->refcount > 1) return o;
459
460 /* Check if we can represent this string as a long integer.
461 * Note that we are sure that a string larger than 20 chars is not
462 * representable as a 32 nor 64 bit integer. */
463 len = sdslen(s);
464 if (len <= 20 && string2l(s,len,&value)) {
465 /* This object is encodable as a long. Try to use a shared object.
466 * Note that we avoid using shared integers when maxmemory is used
467 * because every object needs to have a private LRU field for the LRU
468 * algorithm to work well. */
469 if ((server.maxmemory == 0 ||
470 !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
471 value >= 0 &&
472 value < OBJ_SHARED_INTEGERS)
473 {
474 decrRefCount(o);
475 incrRefCount(shared.integers[value]);
476 return shared.integers[value];
477 } else {
478 if (o->encoding == OBJ_ENCODING_RAW) {
479 sdsfree(o->ptr);
480 o->encoding = OBJ_ENCODING_INT;
481 o->ptr = (void*) value;
482 return o;
483 } else if (o->encoding == OBJ_ENCODING_EMBSTR) {
484 decrRefCount(o);
485 return createStringObjectFromLongLongForValue(value);
486 }
487 }
488 }
489
490 /* If the string is small and is still RAW encoded,
491 * try the EMBSTR encoding which is more efficient.
492 * In this representation the object and the SDS string are allocated
493 * in the same chunk of memory to save space and cache misses. */
494 if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
495 robj *emb;
496
497 if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
498 emb = createEmbeddedStringObject(s,sdslen(s));
499 decrRefCount(o);
500 return emb;
501 }
502
503 /* We can't encode the object...
504 *
505 * Do the last try, and at least optimize the SDS string inside
506 * the string object to require little space, in case there
507 * is more than 10% of free space at the end of the SDS string.
508 *
509 * We do that only for relatively large strings as this branch
510 * is only entered if the length of the string is greater than
511 * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
512 trimStringObjectIfNeeded(o);
513
514 /* Return the original object. */
515 return o;
516 }
517
518 /* Get a decoded version of an encoded object (returned as a new object).
519 * If the object is already raw-encoded just increment the ref count. */
getDecodedObject(robj * o)520 robj *getDecodedObject(robj *o) {
521 robj *dec;
522
523 if (sdsEncodedObject(o)) {
524 incrRefCount(o);
525 return o;
526 }
527 if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
528 char buf[32];
529
530 ll2string(buf,32,(long)o->ptr);
531 dec = createStringObject(buf,strlen(buf));
532 return dec;
533 } else {
534 serverPanic("Unknown encoding type");
535 }
536 }
537
538 /* Compare two string objects via strcmp() or strcoll() depending on flags.
539 * Note that the objects may be integer-encoded. In such a case we
540 * use ll2string() to get a string representation of the numbers on the stack
541 * and compare the strings, it's much faster than calling getDecodedObject().
542 *
543 * Important note: when REDIS_COMPARE_BINARY is used a binary-safe comparison
544 * is used. */
545
546 #define REDIS_COMPARE_BINARY (1<<0)
547 #define REDIS_COMPARE_COLL (1<<1)
548
compareStringObjectsWithFlags(robj * a,robj * b,int flags)549 int compareStringObjectsWithFlags(robj *a, robj *b, int flags) {
550 serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
551 char bufa[128], bufb[128], *astr, *bstr;
552 size_t alen, blen, minlen;
553
554 if (a == b) return 0;
555 if (sdsEncodedObject(a)) {
556 astr = a->ptr;
557 alen = sdslen(astr);
558 } else {
559 alen = ll2string(bufa,sizeof(bufa),(long) a->ptr);
560 astr = bufa;
561 }
562 if (sdsEncodedObject(b)) {
563 bstr = b->ptr;
564 blen = sdslen(bstr);
565 } else {
566 blen = ll2string(bufb,sizeof(bufb),(long) b->ptr);
567 bstr = bufb;
568 }
569 if (flags & REDIS_COMPARE_COLL) {
570 return strcoll(astr,bstr);
571 } else {
572 int cmp;
573
574 minlen = (alen < blen) ? alen : blen;
575 cmp = memcmp(astr,bstr,minlen);
576 if (cmp == 0) return alen-blen;
577 return cmp;
578 }
579 }
580
581 /* Wrapper for compareStringObjectsWithFlags() using binary comparison. */
compareStringObjects(robj * a,robj * b)582 int compareStringObjects(robj *a, robj *b) {
583 return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_BINARY);
584 }
585
586 /* Wrapper for compareStringObjectsWithFlags() using collation. */
collateStringObjects(robj * a,robj * b)587 int collateStringObjects(robj *a, robj *b) {
588 return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_COLL);
589 }
590
591 /* Equal string objects return 1 if the two objects are the same from the
592 * point of view of a string comparison, otherwise 0 is returned. Note that
593 * this function is faster then checking for (compareStringObject(a,b) == 0)
594 * because it can perform some more optimization. */
equalStringObjects(robj * a,robj * b)595 int equalStringObjects(robj *a, robj *b) {
596 if (a->encoding == OBJ_ENCODING_INT &&
597 b->encoding == OBJ_ENCODING_INT){
598 /* If both strings are integer encoded just check if the stored
599 * long is the same. */
600 return a->ptr == b->ptr;
601 } else {
602 return compareStringObjects(a,b) == 0;
603 }
604 }
605
stringObjectLen(robj * o)606 size_t stringObjectLen(robj *o) {
607 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
608 if (sdsEncodedObject(o)) {
609 return sdslen(o->ptr);
610 } else {
611 return sdigits10((long)o->ptr);
612 }
613 }
614
getDoubleFromObject(const robj * o,double * target)615 int getDoubleFromObject(const robj *o, double *target) {
616 double value;
617
618 if (o == NULL) {
619 value = 0;
620 } else {
621 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
622 if (sdsEncodedObject(o)) {
623 if (!string2d(o->ptr, sdslen(o->ptr), &value))
624 return C_ERR;
625 } else if (o->encoding == OBJ_ENCODING_INT) {
626 value = (long)o->ptr;
627 } else {
628 serverPanic("Unknown string encoding");
629 }
630 }
631 *target = value;
632 return C_OK;
633 }
634
getDoubleFromObjectOrReply(client * c,robj * o,double * target,const char * msg)635 int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
636 double value;
637 if (getDoubleFromObject(o, &value) != C_OK) {
638 if (msg != NULL) {
639 addReplyError(c,(char*)msg);
640 } else {
641 addReplyError(c,"value is not a valid float");
642 }
643 return C_ERR;
644 }
645 *target = value;
646 return C_OK;
647 }
648
getLongDoubleFromObject(robj * o,long double * target)649 int getLongDoubleFromObject(robj *o, long double *target) {
650 long double value;
651
652 if (o == NULL) {
653 value = 0;
654 } else {
655 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
656 if (sdsEncodedObject(o)) {
657 if (!string2ld(o->ptr, sdslen(o->ptr), &value))
658 return C_ERR;
659 } else if (o->encoding == OBJ_ENCODING_INT) {
660 value = (long)o->ptr;
661 } else {
662 serverPanic("Unknown string encoding");
663 }
664 }
665 *target = value;
666 return C_OK;
667 }
668
getLongDoubleFromObjectOrReply(client * c,robj * o,long double * target,const char * msg)669 int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
670 long double value;
671 if (getLongDoubleFromObject(o, &value) != C_OK) {
672 if (msg != NULL) {
673 addReplyError(c,(char*)msg);
674 } else {
675 addReplyError(c,"value is not a valid float");
676 }
677 return C_ERR;
678 }
679 *target = value;
680 return C_OK;
681 }
682
getLongLongFromObject(robj * o,long long * target)683 int getLongLongFromObject(robj *o, long long *target) {
684 long long value;
685
686 if (o == NULL) {
687 value = 0;
688 } else {
689 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
690 if (sdsEncodedObject(o)) {
691 if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR;
692 } else if (o->encoding == OBJ_ENCODING_INT) {
693 value = (long)o->ptr;
694 } else {
695 serverPanic("Unknown string encoding");
696 }
697 }
698 if (target) *target = value;
699 return C_OK;
700 }
701
getLongLongFromObjectOrReply(client * c,robj * o,long long * target,const char * msg)702 int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
703 long long value;
704 if (getLongLongFromObject(o, &value) != C_OK) {
705 if (msg != NULL) {
706 addReplyError(c,(char*)msg);
707 } else {
708 addReplyError(c,"value is not an integer or out of range");
709 }
710 return C_ERR;
711 }
712 *target = value;
713 return C_OK;
714 }
715
getLongFromObjectOrReply(client * c,robj * o,long * target,const char * msg)716 int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
717 long long value;
718
719 if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
720 if (value < LONG_MIN || value > LONG_MAX) {
721 if (msg != NULL) {
722 addReplyError(c,(char*)msg);
723 } else {
724 addReplyError(c,"value is out of range");
725 }
726 return C_ERR;
727 }
728 *target = value;
729 return C_OK;
730 }
731
strEncoding(int encoding)732 char *strEncoding(int encoding) {
733 switch(encoding) {
734 case OBJ_ENCODING_RAW: return "raw";
735 case OBJ_ENCODING_INT: return "int";
736 case OBJ_ENCODING_HT: return "hashtable";
737 case OBJ_ENCODING_QUICKLIST: return "quicklist";
738 case OBJ_ENCODING_ZIPLIST: return "ziplist";
739 case OBJ_ENCODING_INTSET: return "intset";
740 case OBJ_ENCODING_SKIPLIST: return "skiplist";
741 case OBJ_ENCODING_EMBSTR: return "embstr";
742 case OBJ_ENCODING_STREAM: return "stream";
743 default: return "unknown";
744 }
745 }
746
747 /* =========================== Memory introspection ========================= */
748
749
750 /* This is an helper function with the goal of estimating the memory
751 * size of a radix tree that is used to store Stream IDs.
752 *
753 * Note: to guess the size of the radix tree is not trivial, so we
754 * approximate it considering 16 bytes of data overhead for each
755 * key (the ID), and then adding the number of bare nodes, plus some
756 * overhead due by the data and child pointers. This secret recipe
757 * was obtained by checking the average radix tree created by real
758 * workloads, and then adjusting the constants to get numbers that
759 * more or less match the real memory usage.
760 *
761 * Actually the number of nodes and keys may be different depending
762 * on the insertion speed and thus the ability of the radix tree
763 * to compress prefixes. */
streamRadixTreeMemoryUsage(rax * rax)764 size_t streamRadixTreeMemoryUsage(rax *rax) {
765 size_t size;
766 size = rax->numele * sizeof(streamID);
767 size += rax->numnodes * sizeof(raxNode);
768 /* Add a fixed overhead due to the aux data pointer, children, ... */
769 size += rax->numnodes * sizeof(long)*30;
770 return size;
771 }
772
773 /* Returns the size in bytes consumed by the key's value in RAM.
774 * Note that the returned value is just an approximation, especially in the
775 * case of aggregated data types where only "sample_size" elements
776 * are checked and averaged to estimate the total size. */
777 #define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
objectComputeSize(robj * o,size_t sample_size)778 size_t objectComputeSize(robj *o, size_t sample_size) {
779 sds ele, ele2;
780 dict *d;
781 dictIterator *di;
782 struct dictEntry *de;
783 size_t asize = 0, elesize = 0, samples = 0;
784
785 if (o->type == OBJ_STRING) {
786 if(o->encoding == OBJ_ENCODING_INT) {
787 asize = sizeof(*o);
788 } else if(o->encoding == OBJ_ENCODING_RAW) {
789 asize = sdsZmallocSize(o->ptr)+sizeof(*o);
790 } else if(o->encoding == OBJ_ENCODING_EMBSTR) {
791 asize = sdslen(o->ptr)+2+sizeof(*o);
792 } else {
793 serverPanic("Unknown string encoding");
794 }
795 } else if (o->type == OBJ_LIST) {
796 if (o->encoding == OBJ_ENCODING_QUICKLIST) {
797 quicklist *ql = o->ptr;
798 quicklistNode *node = ql->head;
799 asize = sizeof(*o)+sizeof(quicklist);
800 do {
801 elesize += sizeof(quicklistNode)+ziplistBlobLen(node->zl);
802 samples++;
803 } while ((node = node->next) && samples < sample_size);
804 asize += (double)elesize/samples*ql->len;
805 } else if (o->encoding == OBJ_ENCODING_ZIPLIST) {
806 asize = sizeof(*o)+ziplistBlobLen(o->ptr);
807 } else {
808 serverPanic("Unknown list encoding");
809 }
810 } else if (o->type == OBJ_SET) {
811 if (o->encoding == OBJ_ENCODING_HT) {
812 d = o->ptr;
813 di = dictGetIterator(d);
814 asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
815 while((de = dictNext(di)) != NULL && samples < sample_size) {
816 ele = dictGetKey(de);
817 elesize += sizeof(struct dictEntry) + sdsZmallocSize(ele);
818 samples++;
819 }
820 dictReleaseIterator(di);
821 if (samples) asize += (double)elesize/samples*dictSize(d);
822 } else if (o->encoding == OBJ_ENCODING_INTSET) {
823 intset *is = o->ptr;
824 asize = sizeof(*o)+sizeof(*is)+(size_t)is->encoding*is->length;
825 } else {
826 serverPanic("Unknown set encoding");
827 }
828 } else if (o->type == OBJ_ZSET) {
829 if (o->encoding == OBJ_ENCODING_ZIPLIST) {
830 asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
831 } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
832 d = ((zset*)o->ptr)->dict;
833 zskiplist *zsl = ((zset*)o->ptr)->zsl;
834 zskiplistNode *znode = zsl->header->level[0].forward;
835 asize = sizeof(*o)+sizeof(zset)+sizeof(zskiplist)+sizeof(dict)+
836 (sizeof(struct dictEntry*)*dictSlots(d))+
837 zmalloc_size(zsl->header);
838 while(znode != NULL && samples < sample_size) {
839 elesize += sdsZmallocSize(znode->ele);
840 elesize += sizeof(struct dictEntry) + zmalloc_size(znode);
841 samples++;
842 znode = znode->level[0].forward;
843 }
844 if (samples) asize += (double)elesize/samples*dictSize(d);
845 } else {
846 serverPanic("Unknown sorted set encoding");
847 }
848 } else if (o->type == OBJ_HASH) {
849 if (o->encoding == OBJ_ENCODING_ZIPLIST) {
850 asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
851 } else if (o->encoding == OBJ_ENCODING_HT) {
852 d = o->ptr;
853 di = dictGetIterator(d);
854 asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
855 while((de = dictNext(di)) != NULL && samples < sample_size) {
856 ele = dictGetKey(de);
857 ele2 = dictGetVal(de);
858 elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
859 elesize += sizeof(struct dictEntry);
860 samples++;
861 }
862 dictReleaseIterator(di);
863 if (samples) asize += (double)elesize/samples*dictSize(d);
864 } else {
865 serverPanic("Unknown hash encoding");
866 }
867 } else if (o->type == OBJ_STREAM) {
868 stream *s = o->ptr;
869 asize = sizeof(*o);
870 asize += streamRadixTreeMemoryUsage(s->rax);
871
872 /* Now we have to add the listpacks. The last listpack is often non
873 * complete, so we estimate the size of the first N listpacks, and
874 * use the average to compute the size of the first N-1 listpacks, and
875 * finally add the real size of the last node. */
876 raxIterator ri;
877 raxStart(&ri,s->rax);
878 raxSeek(&ri,"^",NULL,0);
879 size_t lpsize = 0, samples = 0;
880 while(samples < sample_size && raxNext(&ri)) {
881 unsigned char *lp = ri.data;
882 lpsize += lpBytes(lp);
883 samples++;
884 }
885 if (s->rax->numele <= samples) {
886 asize += lpsize;
887 } else {
888 if (samples) lpsize /= samples; /* Compute the average. */
889 asize += lpsize * (s->rax->numele-1);
890 /* No need to check if seek succeeded, we enter this branch only
891 * if there are a few elements in the radix tree. */
892 raxSeek(&ri,"$",NULL,0);
893 raxNext(&ri);
894 asize += lpBytes(ri.data);
895 }
896 raxStop(&ri);
897
898 /* Consumer groups also have a non trivial memory overhead if there
899 * are many consumers and many groups, let's count at least the
900 * overhead of the pending entries in the groups and consumers
901 * PELs. */
902 if (s->cgroups) {
903 raxStart(&ri,s->cgroups);
904 raxSeek(&ri,"^",NULL,0);
905 while(raxNext(&ri)) {
906 streamCG *cg = ri.data;
907 asize += sizeof(*cg);
908 asize += streamRadixTreeMemoryUsage(cg->pel);
909 asize += sizeof(streamNACK)*raxSize(cg->pel);
910
911 /* For each consumer we also need to add the basic data
912 * structures and the PEL memory usage. */
913 raxIterator cri;
914 raxStart(&cri,cg->consumers);
915 raxSeek(&cri,"^",NULL,0);
916 while(raxNext(&cri)) {
917 streamConsumer *consumer = cri.data;
918 asize += sizeof(*consumer);
919 asize += sdslen(consumer->name);
920 asize += streamRadixTreeMemoryUsage(consumer->pel);
921 /* Don't count NACKs again, they are shared with the
922 * consumer group PEL. */
923 }
924 raxStop(&cri);
925 }
926 raxStop(&ri);
927 }
928 } else if (o->type == OBJ_MODULE) {
929 moduleValue *mv = o->ptr;
930 moduleType *mt = mv->type;
931 if (mt->mem_usage != NULL) {
932 asize = mt->mem_usage(mv->value);
933 } else {
934 asize = 0;
935 }
936 } else {
937 serverPanic("Unknown object type");
938 }
939 return asize;
940 }
941
942 /* Release data obtained with getMemoryOverheadData(). */
freeMemoryOverheadData(struct redisMemOverhead * mh)943 void freeMemoryOverheadData(struct redisMemOverhead *mh) {
944 zfree(mh->db);
945 zfree(mh);
946 }
947
948 /* Return a struct redisMemOverhead filled with memory overhead
949 * information used for the MEMORY OVERHEAD and INFO command. The returned
950 * structure pointer should be freed calling freeMemoryOverheadData(). */
getMemoryOverheadData(void)951 struct redisMemOverhead *getMemoryOverheadData(void) {
952 int j;
953 size_t mem_total = 0;
954 size_t mem = 0;
955 size_t zmalloc_used = zmalloc_used_memory();
956 struct redisMemOverhead *mh = zcalloc(sizeof(*mh));
957
958 mh->total_allocated = zmalloc_used;
959 mh->startup_allocated = server.initial_memory_usage;
960 mh->peak_allocated = server.stat_peak_memory;
961 mh->total_frag =
962 (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
963 mh->total_frag_bytes =
964 server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
965 mh->allocator_frag =
966 (float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
967 mh->allocator_frag_bytes =
968 server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
969 mh->allocator_rss =
970 (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
971 mh->allocator_rss_bytes =
972 server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
973 mh->rss_extra =
974 (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
975 mh->rss_extra_bytes =
976 server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
977
978 mem_total += server.initial_memory_usage;
979
980 mem = 0;
981 if (server.repl_backlog)
982 mem += zmalloc_size(server.repl_backlog);
983 mh->repl_backlog = mem;
984 mem_total += mem;
985
986 /* Computing the memory used by the clients would be O(N) if done
987 * here online. We use our values computed incrementally by
988 * clientsCronTrackClientsMemUsage(). */
989 mh->clients_slaves = server.stat_clients_type_memory[CLIENT_TYPE_SLAVE];
990 mh->clients_normal = server.stat_clients_type_memory[CLIENT_TYPE_MASTER]+
991 server.stat_clients_type_memory[CLIENT_TYPE_PUBSUB]+
992 server.stat_clients_type_memory[CLIENT_TYPE_NORMAL];
993 mem_total += mh->clients_slaves;
994 mem_total += mh->clients_normal;
995
996 mem = 0;
997 if (server.aof_state != AOF_OFF) {
998 mem += sdsZmallocSize(server.aof_buf);
999 mem += aofRewriteBufferSize();
1000 }
1001 mh->aof_buffer = mem;
1002 mem_total+=mem;
1003
1004 mem = server.lua_scripts_mem;
1005 mem += dictSize(server.lua_scripts) * sizeof(dictEntry) +
1006 dictSlots(server.lua_scripts) * sizeof(dictEntry*);
1007 mem += dictSize(server.repl_scriptcache_dict) * sizeof(dictEntry) +
1008 dictSlots(server.repl_scriptcache_dict) * sizeof(dictEntry*);
1009 if (listLength(server.repl_scriptcache_fifo) > 0) {
1010 mem += listLength(server.repl_scriptcache_fifo) * (sizeof(listNode) +
1011 sdsZmallocSize(listNodeValue(listFirst(server.repl_scriptcache_fifo))));
1012 }
1013 mh->lua_caches = mem;
1014 mem_total+=mem;
1015
1016 for (j = 0; j < server.dbnum; j++) {
1017 redisDb *db = server.db+j;
1018 long long keyscount = dictSize(db->dict);
1019 if (keyscount==0) continue;
1020
1021 mh->total_keys += keyscount;
1022 mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
1023 mh->db[mh->num_dbs].dbid = j;
1024
1025 mem = dictSize(db->dict) * sizeof(dictEntry) +
1026 dictSlots(db->dict) * sizeof(dictEntry*) +
1027 dictSize(db->dict) * sizeof(robj);
1028 mh->db[mh->num_dbs].overhead_ht_main = mem;
1029 mem_total+=mem;
1030
1031 mem = dictSize(db->expires) * sizeof(dictEntry) +
1032 dictSlots(db->expires) * sizeof(dictEntry*);
1033 mh->db[mh->num_dbs].overhead_ht_expires = mem;
1034 mem_total+=mem;
1035
1036 mh->num_dbs++;
1037 }
1038
1039 mh->overhead_total = mem_total;
1040 mh->dataset = zmalloc_used - mem_total;
1041 mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated;
1042
1043 /* Metrics computed after subtracting the startup memory from
1044 * the total memory. */
1045 size_t net_usage = 1;
1046 if (zmalloc_used > mh->startup_allocated)
1047 net_usage = zmalloc_used - mh->startup_allocated;
1048 mh->dataset_perc = (float)mh->dataset*100/net_usage;
1049 mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
1050
1051 return mh;
1052 }
1053
1054 /* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
1055 * stats output. */
inputCatSds(void * result,const char * str)1056 void inputCatSds(void *result, const char *str) {
1057 /* result is actually a (sds *), so re-cast it here */
1058 sds *info = (sds *)result;
1059 *info = sdscat(*info, str);
1060 }
1061
1062 /* This implements MEMORY DOCTOR. An human readable analysis of the Redis
1063 * memory condition. */
getMemoryDoctorReport(void)1064 sds getMemoryDoctorReport(void) {
1065 int empty = 0; /* Instance is empty or almost empty. */
1066 int big_peak = 0; /* Memory peak is much larger than used mem. */
1067 int high_frag = 0; /* High fragmentation. */
1068 int high_alloc_frag = 0;/* High allocator fragmentation. */
1069 int high_proc_rss = 0; /* High process rss overhead. */
1070 int high_alloc_rss = 0; /* High rss overhead. */
1071 int big_slave_buf = 0; /* Slave buffers are too big. */
1072 int big_client_buf = 0; /* Client buffers are too big. */
1073 int many_scripts = 0; /* Script cache has too many scripts. */
1074 int num_reports = 0;
1075 struct redisMemOverhead *mh = getMemoryOverheadData();
1076
1077 if (mh->total_allocated < (1024*1024*5)) {
1078 empty = 1;
1079 num_reports++;
1080 } else {
1081 /* Peak is > 150% of current used memory? */
1082 if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
1083 big_peak = 1;
1084 num_reports++;
1085 }
1086
1087 /* Fragmentation is higher than 1.4 and 10MB ?*/
1088 if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10<<20) {
1089 high_frag = 1;
1090 num_reports++;
1091 }
1092
1093 /* External fragmentation is higher than 1.1 and 10MB? */
1094 if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10<<20) {
1095 high_alloc_frag = 1;
1096 num_reports++;
1097 }
1098
1099 /* Allocator rss is higher than 1.1 and 10MB ? */
1100 if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10<<20) {
1101 high_alloc_rss = 1;
1102 num_reports++;
1103 }
1104
1105 /* Non-Allocator rss is higher than 1.1 and 10MB ? */
1106 if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10<<20) {
1107 high_proc_rss = 1;
1108 num_reports++;
1109 }
1110
1111 /* Clients using more than 200k each average? */
1112 long numslaves = listLength(server.slaves);
1113 long numclients = listLength(server.clients)-numslaves;
1114 if (mh->clients_normal / numclients > (1024*200)) {
1115 big_client_buf = 1;
1116 num_reports++;
1117 }
1118
1119 /* Slaves using more than 10 MB each? */
1120 if (numslaves > 0 && mh->clients_slaves / numslaves > (1024*1024*10)) {
1121 big_slave_buf = 1;
1122 num_reports++;
1123 }
1124
1125 /* Too many scripts are cached? */
1126 if (dictSize(server.lua_scripts) > 1000) {
1127 many_scripts = 1;
1128 num_reports++;
1129 }
1130 }
1131
1132 sds s;
1133 if (num_reports == 0) {
1134 s = sdsnew(
1135 "Hi Sam, I can't find any memory issue in your instance. "
1136 "I can only account for what occurs on this base.\n");
1137 } else if (empty == 1) {
1138 s = sdsnew(
1139 "Hi Sam, this instance is empty or is using very little memory, "
1140 "my issues detector can't be used in these conditions. "
1141 "Please, leave for your mission on Earth and fill it with some data. "
1142 "The new Sam and I will be back to our programming as soon as I "
1143 "finished rebooting.\n");
1144 } else {
1145 s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
1146 if (big_peak) {
1147 s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
1148 }
1149 if (high_frag) {
1150 s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
1151 }
1152 if (high_alloc_frag) {
1153 s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
1154 }
1155 if (high_alloc_rss) {
1156 s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
1157 }
1158 if (high_proc_rss) {
1159 s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
1160 }
1161 if (big_slave_buf) {
1162 s = sdscat(s," * Big replica buffers: The replica output buffers in this instance are greater than 10MB for each replica (on average). This likely means that there is some replica instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what replica is not receiving data correctly and why. You can use the INFO output in order to check the replicas delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
1163 }
1164 if (big_client_buf) {
1165 s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
1166 }
1167 if (many_scripts) {
1168 s = sdscat(s," * Many scripts: There seem to be many cached scripts in this instance (more than 1000). This may be because scripts are generated and `EVAL`ed, instead of being parameterized (with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called periodically, the scripts' caches may end up consuming most of your memory.\n\n");
1169 }
1170 s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
1171 }
1172 freeMemoryOverheadData(mh);
1173 return s;
1174 }
1175
1176 /* Set the object LRU/LFU depending on server.maxmemory_policy.
1177 * The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
1178 * The lru_idle and lru_clock args are only relevant if policy
1179 * is MAXMEMORY_FLAG_LRU.
1180 * Either or both of them may be <0, in that case, nothing is set. */
objectSetLRUOrLFU(robj * val,long long lfu_freq,long long lru_idle,long long lru_clock,int lru_multiplier)1181 int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
1182 long long lru_clock, int lru_multiplier) {
1183 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1184 if (lfu_freq >= 0) {
1185 serverAssert(lfu_freq <= 255);
1186 val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
1187 return 1;
1188 }
1189 } else if (lru_idle >= 0) {
1190 /* Provided LRU idle time is in seconds. Scale
1191 * according to the LRU clock resolution this Redis
1192 * instance was compiled with (normally 1000 ms, so the
1193 * below statement will expand to lru_idle*1000/1000. */
1194 lru_idle = lru_idle*lru_multiplier/LRU_CLOCK_RESOLUTION;
1195 long lru_abs = lru_clock - lru_idle; /* Absolute access time. */
1196 /* If the LRU field underflows (since LRU it is a wrapping
1197 * clock), the best we can do is to provide a large enough LRU
1198 * that is half-way in the circlular LRU clock we use: this way
1199 * the computed idle time for this object will stay high for quite
1200 * some time. */
1201 if (lru_abs < 0)
1202 lru_abs = (lru_clock+(LRU_CLOCK_MAX/2)) % LRU_CLOCK_MAX;
1203 val->lru = lru_abs;
1204 return 1;
1205 }
1206 return 0;
1207 }
1208
1209 /* ======================= The OBJECT and MEMORY commands =================== */
1210
1211 /* This is a helper function for the OBJECT command. We need to lookup keys
1212 * without any modification of LRU or other parameters. */
objectCommandLookup(client * c,robj * key)1213 robj *objectCommandLookup(client *c, robj *key) {
1214 return lookupKeyReadWithFlags(c->db,key,LOOKUP_NOTOUCH|LOOKUP_NONOTIFY);
1215 }
1216
objectCommandLookupOrReply(client * c,robj * key,robj * reply)1217 robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
1218 robj *o = objectCommandLookup(c,key);
1219
1220 if (!o) addReply(c, reply);
1221 return o;
1222 }
1223
1224 /* Object command allows to inspect the internals of a Redis Object.
1225 * Usage: OBJECT <refcount|encoding|idletime|freq> <key> */
objectCommand(client * c)1226 void objectCommand(client *c) {
1227 robj *o;
1228
1229 if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
1230 const char *help[] = {
1231 "ENCODING <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
1232 "FREQ <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
1233 "IDLETIME <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
1234 "REFCOUNT <key> -- Return the number of references of the value associated with the specified key.",
1235 NULL
1236 };
1237 addReplyHelp(c, help);
1238 } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
1239 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1240 == NULL) return;
1241 addReplyLongLong(c,o->refcount);
1242 } else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) {
1243 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1244 == NULL) return;
1245 addReplyBulkCString(c,strEncoding(o->encoding));
1246 } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
1247 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1248 == NULL) return;
1249 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1250 addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1251 return;
1252 }
1253 addReplyLongLong(c,estimateObjectIdleTime(o)/1000);
1254 } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) {
1255 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1256 == NULL) return;
1257 if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
1258 addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1259 return;
1260 }
1261 /* LFUDecrAndReturn should be called
1262 * in case of the key has not been accessed for a long time,
1263 * because we update the access time only
1264 * when the key is read or overwritten. */
1265 addReplyLongLong(c,LFUDecrAndReturn(o));
1266 } else {
1267 addReplySubcommandSyntaxError(c);
1268 }
1269 }
1270
1271 /* The memory command will eventually be a complete interface for the
1272 * memory introspection capabilities of Redis.
1273 *
1274 * Usage: MEMORY usage <key> */
memoryCommand(client * c)1275 void memoryCommand(client *c) {
1276 if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
1277 const char *help[] = {
1278 "DOCTOR - Return memory problems reports.",
1279 "MALLOC-STATS -- Return internal statistics report from the memory allocator.",
1280 "PURGE -- Attempt to purge dirty pages for reclamation by the allocator.",
1281 "STATS -- Return information about the memory usage of the server.",
1282 "USAGE <key> [SAMPLES <count>] -- Return memory in bytes used by <key> and its value. Nested values are sampled up to <count> times (default: 5).",
1283 NULL
1284 };
1285 addReplyHelp(c, help);
1286 } else if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) {
1287 dictEntry *de;
1288 long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
1289 for (int j = 3; j < c->argc; j++) {
1290 if (!strcasecmp(c->argv[j]->ptr,"samples") &&
1291 j+1 < c->argc)
1292 {
1293 if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
1294 == C_ERR) return;
1295 if (samples < 0) {
1296 addReply(c,shared.syntaxerr);
1297 return;
1298 }
1299 if (samples == 0) samples = LLONG_MAX;;
1300 j++; /* skip option argument. */
1301 } else {
1302 addReply(c,shared.syntaxerr);
1303 return;
1304 }
1305 }
1306 if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
1307 addReplyNull(c);
1308 return;
1309 }
1310 size_t usage = objectComputeSize(dictGetVal(de),samples);
1311 usage += sdsZmallocSize(dictGetKey(de));
1312 usage += sizeof(dictEntry);
1313 addReplyLongLong(c,usage);
1314 } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
1315 struct redisMemOverhead *mh = getMemoryOverheadData();
1316
1317 addReplyMapLen(c,25+mh->num_dbs);
1318
1319 addReplyBulkCString(c,"peak.allocated");
1320 addReplyLongLong(c,mh->peak_allocated);
1321
1322 addReplyBulkCString(c,"total.allocated");
1323 addReplyLongLong(c,mh->total_allocated);
1324
1325 addReplyBulkCString(c,"startup.allocated");
1326 addReplyLongLong(c,mh->startup_allocated);
1327
1328 addReplyBulkCString(c,"replication.backlog");
1329 addReplyLongLong(c,mh->repl_backlog);
1330
1331 addReplyBulkCString(c,"clients.slaves");
1332 addReplyLongLong(c,mh->clients_slaves);
1333
1334 addReplyBulkCString(c,"clients.normal");
1335 addReplyLongLong(c,mh->clients_normal);
1336
1337 addReplyBulkCString(c,"aof.buffer");
1338 addReplyLongLong(c,mh->aof_buffer);
1339
1340 addReplyBulkCString(c,"lua.caches");
1341 addReplyLongLong(c,mh->lua_caches);
1342
1343 for (size_t j = 0; j < mh->num_dbs; j++) {
1344 char dbname[32];
1345 snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
1346 addReplyBulkCString(c,dbname);
1347 addReplyMapLen(c,2);
1348
1349 addReplyBulkCString(c,"overhead.hashtable.main");
1350 addReplyLongLong(c,mh->db[j].overhead_ht_main);
1351
1352 addReplyBulkCString(c,"overhead.hashtable.expires");
1353 addReplyLongLong(c,mh->db[j].overhead_ht_expires);
1354 }
1355
1356 addReplyBulkCString(c,"overhead.total");
1357 addReplyLongLong(c,mh->overhead_total);
1358
1359 addReplyBulkCString(c,"keys.count");
1360 addReplyLongLong(c,mh->total_keys);
1361
1362 addReplyBulkCString(c,"keys.bytes-per-key");
1363 addReplyLongLong(c,mh->bytes_per_key);
1364
1365 addReplyBulkCString(c,"dataset.bytes");
1366 addReplyLongLong(c,mh->dataset);
1367
1368 addReplyBulkCString(c,"dataset.percentage");
1369 addReplyDouble(c,mh->dataset_perc);
1370
1371 addReplyBulkCString(c,"peak.percentage");
1372 addReplyDouble(c,mh->peak_perc);
1373
1374 addReplyBulkCString(c,"allocator.allocated");
1375 addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
1376
1377 addReplyBulkCString(c,"allocator.active");
1378 addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
1379
1380 addReplyBulkCString(c,"allocator.resident");
1381 addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
1382
1383 addReplyBulkCString(c,"allocator-fragmentation.ratio");
1384 addReplyDouble(c,mh->allocator_frag);
1385
1386 addReplyBulkCString(c,"allocator-fragmentation.bytes");
1387 addReplyLongLong(c,mh->allocator_frag_bytes);
1388
1389 addReplyBulkCString(c,"allocator-rss.ratio");
1390 addReplyDouble(c,mh->allocator_rss);
1391
1392 addReplyBulkCString(c,"allocator-rss.bytes");
1393 addReplyLongLong(c,mh->allocator_rss_bytes);
1394
1395 addReplyBulkCString(c,"rss-overhead.ratio");
1396 addReplyDouble(c,mh->rss_extra);
1397
1398 addReplyBulkCString(c,"rss-overhead.bytes");
1399 addReplyLongLong(c,mh->rss_extra_bytes);
1400
1401 addReplyBulkCString(c,"fragmentation"); /* this is the total RSS overhead, including fragmentation */
1402 addReplyDouble(c,mh->total_frag); /* it is kept here for backwards compatibility */
1403
1404 addReplyBulkCString(c,"fragmentation.bytes");
1405 addReplyLongLong(c,mh->total_frag_bytes);
1406
1407 freeMemoryOverheadData(mh);
1408 } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
1409 #if defined(USE_JEMALLOC)
1410 sds info = sdsempty();
1411 je_malloc_stats_print(inputCatSds, &info, NULL);
1412 addReplyVerbatim(c,info,sdslen(info),"txt");
1413 sdsfree(info);
1414 #else
1415 addReplyBulkCString(c,"Stats not supported for the current allocator");
1416 #endif
1417 } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
1418 sds report = getMemoryDoctorReport();
1419 addReplyVerbatim(c,report,sdslen(report),"txt");
1420 sdsfree(report);
1421 } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) {
1422 if (jemalloc_purge() == 0)
1423 addReply(c, shared.ok);
1424 else
1425 addReplyError(c, "Error purging dirty pages");
1426 } else {
1427 addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try MEMORY HELP", (char*)c->argv[1]->ptr);
1428 }
1429 }
1430