1 /* Redis Object implementation.
2  *
3  * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  *   * Redistributions of source code must retain the above copyright notice,
10  *     this list of conditions and the following disclaimer.
11  *   * Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *   * Neither the name of Redis nor the names of its contributors may be used
15  *     to endorse or promote products derived from this software without
16  *     specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "server.h"
32 #include <math.h>
33 #include <ctype.h>
34 
35 #ifdef __CYGWIN__
36 #define strtold(a,b) ((long double)strtod((a),(b)))
37 #endif
38 
39 /* ===================== Creation and parsing of objects ==================== */
40 
createObject(int type,void * ptr)41 robj *createObject(int type, void *ptr) {
42     robj *o = zmalloc(sizeof(*o));
43     o->type = type;
44     o->encoding = OBJ_ENCODING_RAW;
45     o->ptr = ptr;
46     o->refcount = 1;
47 
48     /* Set the LRU to the current lruclock (minutes resolution), or
49      * alternatively the LFU counter. */
50     if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
51         o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
52     } else {
53         o->lru = LRU_CLOCK();
54     }
55     return o;
56 }
57 
58 /* Set a special refcount in the object to make it "shared":
59  * incrRefCount and decrRefCount() will test for this special refcount
60  * and will not touch the object. This way it is free to access shared
61  * objects such as small integers from different threads without any
62  * mutex.
63  *
64  * A common patter to create shared objects:
65  *
66  * robj *myobject = makeObjectShared(createObject(...));
67  *
68  */
makeObjectShared(robj * o)69 robj *makeObjectShared(robj *o) {
70     serverAssert(o->refcount == 1);
71     o->refcount = OBJ_SHARED_REFCOUNT;
72     return o;
73 }
74 
75 /* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
76  * string object where o->ptr points to a proper sds string. */
createRawStringObject(const char * ptr,size_t len)77 robj *createRawStringObject(const char *ptr, size_t len) {
78     return createObject(OBJ_STRING, sdsnewlen(ptr,len));
79 }
80 
81 /* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
82  * an object where the sds string is actually an unmodifiable string
83  * allocated in the same chunk as the object itself. */
createEmbeddedStringObject(const char * ptr,size_t len)84 robj *createEmbeddedStringObject(const char *ptr, size_t len) {
85     robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
86     struct sdshdr8 *sh = (void*)(o+1);
87 
88     o->type = OBJ_STRING;
89     o->encoding = OBJ_ENCODING_EMBSTR;
90     o->ptr = sh+1;
91     o->refcount = 1;
92     if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
93         o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
94     } else {
95         o->lru = LRU_CLOCK();
96     }
97 
98     sh->len = len;
99     sh->alloc = len;
100     sh->flags = SDS_TYPE_8;
101     if (ptr == SDS_NOINIT)
102         sh->buf[len] = '\0';
103     else if (ptr) {
104         memcpy(sh->buf,ptr,len);
105         sh->buf[len] = '\0';
106     } else {
107         memset(sh->buf,0,len+1);
108     }
109     return o;
110 }
111 
112 /* Create a string object with EMBSTR encoding if it is smaller than
113  * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
114  * used.
115  *
116  * The current limit of 44 is chosen so that the biggest string object
117  * we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
118 #define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
createStringObject(const char * ptr,size_t len)119 robj *createStringObject(const char *ptr, size_t len) {
120     if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
121         return createEmbeddedStringObject(ptr,len);
122     else
123         return createRawStringObject(ptr,len);
124 }
125 
126 /* Create a string object from a long long value. When possible returns a
127  * shared integer object, or at least an integer encoded one.
128  *
129  * If valueobj is non zero, the function avoids returning a shared
130  * integer, because the object is going to be used as value in the Redis key
131  * space (for instance when the INCR command is used), so we want LFU/LRU
132  * values specific for each key. */
createStringObjectFromLongLongWithOptions(long long value,int valueobj)133 robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
134     robj *o;
135 
136     if (server.maxmemory == 0 ||
137         !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
138     {
139         /* If the maxmemory policy permits, we can still return shared integers
140          * even if valueobj is true. */
141         valueobj = 0;
142     }
143 
144     if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
145         incrRefCount(shared.integers[value]);
146         o = shared.integers[value];
147     } else {
148         if (value >= LONG_MIN && value <= LONG_MAX) {
149             o = createObject(OBJ_STRING, NULL);
150             o->encoding = OBJ_ENCODING_INT;
151             o->ptr = (void*)((long)value);
152         } else {
153             o = createObject(OBJ_STRING,sdsfromlonglong(value));
154         }
155     }
156     return o;
157 }
158 
159 /* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
160  * to create a shared object if possible. */
createStringObjectFromLongLong(long long value)161 robj *createStringObjectFromLongLong(long long value) {
162     return createStringObjectFromLongLongWithOptions(value,0);
163 }
164 
165 /* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
166  * object when LFU/LRU info are needed, that is, when the object is used
167  * as a value in the key space, and Redis is configured to evict based on
168  * LFU/LRU. */
createStringObjectFromLongLongForValue(long long value)169 robj *createStringObjectFromLongLongForValue(long long value) {
170     return createStringObjectFromLongLongWithOptions(value,1);
171 }
172 
173 /* Create a string object from a long double. If humanfriendly is non-zero
174  * it does not use exponential format and trims trailing zeroes at the end,
175  * however this results in loss of precision. Otherwise exp format is used
176  * and the output of snprintf() is not modified.
177  *
178  * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
createStringObjectFromLongDouble(long double value,int humanfriendly)179 robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
180     char buf[MAX_LONG_DOUBLE_CHARS];
181     int len = ld2string(buf,sizeof(buf),value,humanfriendly? LD_STR_HUMAN: LD_STR_AUTO);
182     return createStringObject(buf,len);
183 }
184 
185 /* Duplicate a string object, with the guarantee that the returned object
186  * has the same encoding as the original one.
187  *
188  * This function also guarantees that duplicating a small integer object
189  * (or a string object that contains a representation of a small integer)
190  * will always result in a fresh object that is unshared (refcount == 1).
191  *
192  * The resulting object always has refcount set to 1. */
dupStringObject(const robj * o)193 robj *dupStringObject(const robj *o) {
194     robj *d;
195 
196     serverAssert(o->type == OBJ_STRING);
197 
198     switch(o->encoding) {
199     case OBJ_ENCODING_RAW:
200         return createRawStringObject(o->ptr,sdslen(o->ptr));
201     case OBJ_ENCODING_EMBSTR:
202         return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
203     case OBJ_ENCODING_INT:
204         d = createObject(OBJ_STRING, NULL);
205         d->encoding = OBJ_ENCODING_INT;
206         d->ptr = o->ptr;
207         return d;
208     default:
209         serverPanic("Wrong encoding.");
210         break;
211     }
212 }
213 
createQuicklistObject(void)214 robj *createQuicklistObject(void) {
215     quicklist *l = quicklistCreate();
216     robj *o = createObject(OBJ_LIST,l);
217     o->encoding = OBJ_ENCODING_QUICKLIST;
218     return o;
219 }
220 
createZiplistObject(void)221 robj *createZiplistObject(void) {
222     unsigned char *zl = ziplistNew();
223     robj *o = createObject(OBJ_LIST,zl);
224     o->encoding = OBJ_ENCODING_ZIPLIST;
225     return o;
226 }
227 
createSetObject(void)228 robj *createSetObject(void) {
229     dict *d = dictCreate(&setDictType,NULL);
230     robj *o = createObject(OBJ_SET,d);
231     o->encoding = OBJ_ENCODING_HT;
232     return o;
233 }
234 
createIntsetObject(void)235 robj *createIntsetObject(void) {
236     intset *is = intsetNew();
237     robj *o = createObject(OBJ_SET,is);
238     o->encoding = OBJ_ENCODING_INTSET;
239     return o;
240 }
241 
createHashObject(void)242 robj *createHashObject(void) {
243     unsigned char *zl = ziplistNew();
244     robj *o = createObject(OBJ_HASH, zl);
245     o->encoding = OBJ_ENCODING_ZIPLIST;
246     return o;
247 }
248 
createZsetObject(void)249 robj *createZsetObject(void) {
250     zset *zs = zmalloc(sizeof(*zs));
251     robj *o;
252 
253     zs->dict = dictCreate(&zsetDictType,NULL);
254     zs->zsl = zslCreate();
255     o = createObject(OBJ_ZSET,zs);
256     o->encoding = OBJ_ENCODING_SKIPLIST;
257     return o;
258 }
259 
createZsetZiplistObject(void)260 robj *createZsetZiplistObject(void) {
261     unsigned char *zl = ziplistNew();
262     robj *o = createObject(OBJ_ZSET,zl);
263     o->encoding = OBJ_ENCODING_ZIPLIST;
264     return o;
265 }
266 
createStreamObject(void)267 robj *createStreamObject(void) {
268     stream *s = streamNew();
269     robj *o = createObject(OBJ_STREAM,s);
270     o->encoding = OBJ_ENCODING_STREAM;
271     return o;
272 }
273 
createModuleObject(moduleType * mt,void * value)274 robj *createModuleObject(moduleType *mt, void *value) {
275     moduleValue *mv = zmalloc(sizeof(*mv));
276     mv->type = mt;
277     mv->value = value;
278     return createObject(OBJ_MODULE,mv);
279 }
280 
freeStringObject(robj * o)281 void freeStringObject(robj *o) {
282     if (o->encoding == OBJ_ENCODING_RAW) {
283         sdsfree(o->ptr);
284     }
285 }
286 
freeListObject(robj * o)287 void freeListObject(robj *o) {
288     if (o->encoding == OBJ_ENCODING_QUICKLIST) {
289         quicklistRelease(o->ptr);
290     } else {
291         serverPanic("Unknown list encoding type");
292     }
293 }
294 
freeSetObject(robj * o)295 void freeSetObject(robj *o) {
296     switch (o->encoding) {
297     case OBJ_ENCODING_HT:
298         dictRelease((dict*) o->ptr);
299         break;
300     case OBJ_ENCODING_INTSET:
301         zfree(o->ptr);
302         break;
303     default:
304         serverPanic("Unknown set encoding type");
305     }
306 }
307 
freeZsetObject(robj * o)308 void freeZsetObject(robj *o) {
309     zset *zs;
310     switch (o->encoding) {
311     case OBJ_ENCODING_SKIPLIST:
312         zs = o->ptr;
313         dictRelease(zs->dict);
314         zslFree(zs->zsl);
315         zfree(zs);
316         break;
317     case OBJ_ENCODING_ZIPLIST:
318         zfree(o->ptr);
319         break;
320     default:
321         serverPanic("Unknown sorted set encoding");
322     }
323 }
324 
freeHashObject(robj * o)325 void freeHashObject(robj *o) {
326     switch (o->encoding) {
327     case OBJ_ENCODING_HT:
328         dictRelease((dict*) o->ptr);
329         break;
330     case OBJ_ENCODING_ZIPLIST:
331         zfree(o->ptr);
332         break;
333     default:
334         serverPanic("Unknown hash encoding type");
335         break;
336     }
337 }
338 
freeModuleObject(robj * o)339 void freeModuleObject(robj *o) {
340     moduleValue *mv = o->ptr;
341     mv->type->free(mv->value);
342     zfree(mv);
343 }
344 
freeStreamObject(robj * o)345 void freeStreamObject(robj *o) {
346     freeStream(o->ptr);
347 }
348 
incrRefCount(robj * o)349 void incrRefCount(robj *o) {
350     if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT) {
351         o->refcount++;
352     } else {
353         if (o->refcount == OBJ_SHARED_REFCOUNT) {
354             /* Nothing to do: this refcount is immutable. */
355         } else if (o->refcount == OBJ_STATIC_REFCOUNT) {
356             serverPanic("You tried to retain an object allocated in the stack");
357         }
358     }
359 }
360 
decrRefCount(robj * o)361 void decrRefCount(robj *o) {
362     if (o->refcount == 1) {
363         switch(o->type) {
364         case OBJ_STRING: freeStringObject(o); break;
365         case OBJ_LIST: freeListObject(o); break;
366         case OBJ_SET: freeSetObject(o); break;
367         case OBJ_ZSET: freeZsetObject(o); break;
368         case OBJ_HASH: freeHashObject(o); break;
369         case OBJ_MODULE: freeModuleObject(o); break;
370         case OBJ_STREAM: freeStreamObject(o); break;
371         default: serverPanic("Unknown object type"); break;
372         }
373         zfree(o);
374     } else {
375         if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
376         if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
377     }
378 }
379 
380 /* This variant of decrRefCount() gets its argument as void, and is useful
381  * as free method in data structures that expect a 'void free_object(void*)'
382  * prototype for the free method. */
decrRefCountVoid(void * o)383 void decrRefCountVoid(void *o) {
384     decrRefCount(o);
385 }
386 
387 /* This function set the ref count to zero without freeing the object.
388  * It is useful in order to pass a new object to functions incrementing
389  * the ref count of the received object. Example:
390  *
391  *    functionThatWillIncrementRefCount(resetRefCount(CreateObject(...)));
392  *
393  * Otherwise you need to resort to the less elegant pattern:
394  *
395  *    *obj = createObject(...);
396  *    functionThatWillIncrementRefCount(obj);
397  *    decrRefCount(obj);
398  */
resetRefCount(robj * obj)399 robj *resetRefCount(robj *obj) {
400     obj->refcount = 0;
401     return obj;
402 }
403 
checkType(client * c,robj * o,int type)404 int checkType(client *c, robj *o, int type) {
405     if (o->type != type) {
406         addReply(c,shared.wrongtypeerr);
407         return 1;
408     }
409     return 0;
410 }
411 
isSdsRepresentableAsLongLong(sds s,long long * llval)412 int isSdsRepresentableAsLongLong(sds s, long long *llval) {
413     return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
414 }
415 
isObjectRepresentableAsLongLong(robj * o,long long * llval)416 int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
417     serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
418     if (o->encoding == OBJ_ENCODING_INT) {
419         if (llval) *llval = (long) o->ptr;
420         return C_OK;
421     } else {
422         return isSdsRepresentableAsLongLong(o->ptr,llval);
423     }
424 }
425 
426 /* Optimize the SDS string inside the string object to require little space,
427  * in case there is more than 10% of free space at the end of the SDS
428  * string. This happens because SDS strings tend to overallocate to avoid
429  * wasting too much time in allocations when appending to the string. */
trimStringObjectIfNeeded(robj * o)430 void trimStringObjectIfNeeded(robj *o) {
431     if (o->encoding == OBJ_ENCODING_RAW &&
432         sdsavail(o->ptr) > sdslen(o->ptr)/10)
433     {
434         o->ptr = sdsRemoveFreeSpace(o->ptr);
435     }
436 }
437 
438 /* Try to encode a string object in order to save space */
tryObjectEncoding(robj * o)439 robj *tryObjectEncoding(robj *o) {
440     long value;
441     sds s = o->ptr;
442     size_t len;
443 
444     /* Make sure this is a string object, the only type we encode
445      * in this function. Other types use encoded memory efficient
446      * representations but are handled by the commands implementing
447      * the type. */
448     serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
449 
450     /* We try some specialized encoding only for objects that are
451      * RAW or EMBSTR encoded, in other words objects that are still
452      * in represented by an actually array of chars. */
453     if (!sdsEncodedObject(o)) return o;
454 
455     /* It's not safe to encode shared objects: shared objects can be shared
456      * everywhere in the "object space" of Redis and may end in places where
457      * they are not handled. We handle them only as values in the keyspace. */
458      if (o->refcount > 1) return o;
459 
460     /* Check if we can represent this string as a long integer.
461      * Note that we are sure that a string larger than 20 chars is not
462      * representable as a 32 nor 64 bit integer. */
463     len = sdslen(s);
464     if (len <= 20 && string2l(s,len,&value)) {
465         /* This object is encodable as a long. Try to use a shared object.
466          * Note that we avoid using shared integers when maxmemory is used
467          * because every object needs to have a private LRU field for the LRU
468          * algorithm to work well. */
469         if ((server.maxmemory == 0 ||
470             !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
471             value >= 0 &&
472             value < OBJ_SHARED_INTEGERS)
473         {
474             decrRefCount(o);
475             incrRefCount(shared.integers[value]);
476             return shared.integers[value];
477         } else {
478             if (o->encoding == OBJ_ENCODING_RAW) {
479                 sdsfree(o->ptr);
480                 o->encoding = OBJ_ENCODING_INT;
481                 o->ptr = (void*) value;
482                 return o;
483             } else if (o->encoding == OBJ_ENCODING_EMBSTR) {
484                 decrRefCount(o);
485                 return createStringObjectFromLongLongForValue(value);
486             }
487         }
488     }
489 
490     /* If the string is small and is still RAW encoded,
491      * try the EMBSTR encoding which is more efficient.
492      * In this representation the object and the SDS string are allocated
493      * in the same chunk of memory to save space and cache misses. */
494     if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
495         robj *emb;
496 
497         if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
498         emb = createEmbeddedStringObject(s,sdslen(s));
499         decrRefCount(o);
500         return emb;
501     }
502 
503     /* We can't encode the object...
504      *
505      * Do the last try, and at least optimize the SDS string inside
506      * the string object to require little space, in case there
507      * is more than 10% of free space at the end of the SDS string.
508      *
509      * We do that only for relatively large strings as this branch
510      * is only entered if the length of the string is greater than
511      * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
512     trimStringObjectIfNeeded(o);
513 
514     /* Return the original object. */
515     return o;
516 }
517 
518 /* Get a decoded version of an encoded object (returned as a new object).
519  * If the object is already raw-encoded just increment the ref count. */
getDecodedObject(robj * o)520 robj *getDecodedObject(robj *o) {
521     robj *dec;
522 
523     if (sdsEncodedObject(o)) {
524         incrRefCount(o);
525         return o;
526     }
527     if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
528         char buf[32];
529 
530         ll2string(buf,32,(long)o->ptr);
531         dec = createStringObject(buf,strlen(buf));
532         return dec;
533     } else {
534         serverPanic("Unknown encoding type");
535     }
536 }
537 
538 /* Compare two string objects via strcmp() or strcoll() depending on flags.
539  * Note that the objects may be integer-encoded. In such a case we
540  * use ll2string() to get a string representation of the numbers on the stack
541  * and compare the strings, it's much faster than calling getDecodedObject().
542  *
543  * Important note: when REDIS_COMPARE_BINARY is used a binary-safe comparison
544  * is used. */
545 
546 #define REDIS_COMPARE_BINARY (1<<0)
547 #define REDIS_COMPARE_COLL (1<<1)
548 
compareStringObjectsWithFlags(robj * a,robj * b,int flags)549 int compareStringObjectsWithFlags(robj *a, robj *b, int flags) {
550     serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
551     char bufa[128], bufb[128], *astr, *bstr;
552     size_t alen, blen, minlen;
553 
554     if (a == b) return 0;
555     if (sdsEncodedObject(a)) {
556         astr = a->ptr;
557         alen = sdslen(astr);
558     } else {
559         alen = ll2string(bufa,sizeof(bufa),(long) a->ptr);
560         astr = bufa;
561     }
562     if (sdsEncodedObject(b)) {
563         bstr = b->ptr;
564         blen = sdslen(bstr);
565     } else {
566         blen = ll2string(bufb,sizeof(bufb),(long) b->ptr);
567         bstr = bufb;
568     }
569     if (flags & REDIS_COMPARE_COLL) {
570         return strcoll(astr,bstr);
571     } else {
572         int cmp;
573 
574         minlen = (alen < blen) ? alen : blen;
575         cmp = memcmp(astr,bstr,minlen);
576         if (cmp == 0) return alen-blen;
577         return cmp;
578     }
579 }
580 
581 /* Wrapper for compareStringObjectsWithFlags() using binary comparison. */
compareStringObjects(robj * a,robj * b)582 int compareStringObjects(robj *a, robj *b) {
583     return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_BINARY);
584 }
585 
586 /* Wrapper for compareStringObjectsWithFlags() using collation. */
collateStringObjects(robj * a,robj * b)587 int collateStringObjects(robj *a, robj *b) {
588     return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_COLL);
589 }
590 
591 /* Equal string objects return 1 if the two objects are the same from the
592  * point of view of a string comparison, otherwise 0 is returned. Note that
593  * this function is faster then checking for (compareStringObject(a,b) == 0)
594  * because it can perform some more optimization. */
equalStringObjects(robj * a,robj * b)595 int equalStringObjects(robj *a, robj *b) {
596     if (a->encoding == OBJ_ENCODING_INT &&
597         b->encoding == OBJ_ENCODING_INT){
598         /* If both strings are integer encoded just check if the stored
599          * long is the same. */
600         return a->ptr == b->ptr;
601     } else {
602         return compareStringObjects(a,b) == 0;
603     }
604 }
605 
stringObjectLen(robj * o)606 size_t stringObjectLen(robj *o) {
607     serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
608     if (sdsEncodedObject(o)) {
609         return sdslen(o->ptr);
610     } else {
611         return sdigits10((long)o->ptr);
612     }
613 }
614 
getDoubleFromObject(const robj * o,double * target)615 int getDoubleFromObject(const robj *o, double *target) {
616     double value;
617 
618     if (o == NULL) {
619         value = 0;
620     } else {
621         serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
622         if (sdsEncodedObject(o)) {
623             if (!string2d(o->ptr, sdslen(o->ptr), &value))
624                 return C_ERR;
625         } else if (o->encoding == OBJ_ENCODING_INT) {
626             value = (long)o->ptr;
627         } else {
628             serverPanic("Unknown string encoding");
629         }
630     }
631     *target = value;
632     return C_OK;
633 }
634 
getDoubleFromObjectOrReply(client * c,robj * o,double * target,const char * msg)635 int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
636     double value;
637     if (getDoubleFromObject(o, &value) != C_OK) {
638         if (msg != NULL) {
639             addReplyError(c,(char*)msg);
640         } else {
641             addReplyError(c,"value is not a valid float");
642         }
643         return C_ERR;
644     }
645     *target = value;
646     return C_OK;
647 }
648 
getLongDoubleFromObject(robj * o,long double * target)649 int getLongDoubleFromObject(robj *o, long double *target) {
650     long double value;
651 
652     if (o == NULL) {
653         value = 0;
654     } else {
655         serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
656         if (sdsEncodedObject(o)) {
657             if (!string2ld(o->ptr, sdslen(o->ptr), &value))
658                 return C_ERR;
659         } else if (o->encoding == OBJ_ENCODING_INT) {
660             value = (long)o->ptr;
661         } else {
662             serverPanic("Unknown string encoding");
663         }
664     }
665     *target = value;
666     return C_OK;
667 }
668 
getLongDoubleFromObjectOrReply(client * c,robj * o,long double * target,const char * msg)669 int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
670     long double value;
671     if (getLongDoubleFromObject(o, &value) != C_OK) {
672         if (msg != NULL) {
673             addReplyError(c,(char*)msg);
674         } else {
675             addReplyError(c,"value is not a valid float");
676         }
677         return C_ERR;
678     }
679     *target = value;
680     return C_OK;
681 }
682 
getLongLongFromObject(robj * o,long long * target)683 int getLongLongFromObject(robj *o, long long *target) {
684     long long value;
685 
686     if (o == NULL) {
687         value = 0;
688     } else {
689         serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
690         if (sdsEncodedObject(o)) {
691             if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR;
692         } else if (o->encoding == OBJ_ENCODING_INT) {
693             value = (long)o->ptr;
694         } else {
695             serverPanic("Unknown string encoding");
696         }
697     }
698     if (target) *target = value;
699     return C_OK;
700 }
701 
getLongLongFromObjectOrReply(client * c,robj * o,long long * target,const char * msg)702 int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
703     long long value;
704     if (getLongLongFromObject(o, &value) != C_OK) {
705         if (msg != NULL) {
706             addReplyError(c,(char*)msg);
707         } else {
708             addReplyError(c,"value is not an integer or out of range");
709         }
710         return C_ERR;
711     }
712     *target = value;
713     return C_OK;
714 }
715 
getLongFromObjectOrReply(client * c,robj * o,long * target,const char * msg)716 int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
717     long long value;
718 
719     if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
720     if (value < LONG_MIN || value > LONG_MAX) {
721         if (msg != NULL) {
722             addReplyError(c,(char*)msg);
723         } else {
724             addReplyError(c,"value is out of range");
725         }
726         return C_ERR;
727     }
728     *target = value;
729     return C_OK;
730 }
731 
strEncoding(int encoding)732 char *strEncoding(int encoding) {
733     switch(encoding) {
734     case OBJ_ENCODING_RAW: return "raw";
735     case OBJ_ENCODING_INT: return "int";
736     case OBJ_ENCODING_HT: return "hashtable";
737     case OBJ_ENCODING_QUICKLIST: return "quicklist";
738     case OBJ_ENCODING_ZIPLIST: return "ziplist";
739     case OBJ_ENCODING_INTSET: return "intset";
740     case OBJ_ENCODING_SKIPLIST: return "skiplist";
741     case OBJ_ENCODING_EMBSTR: return "embstr";
742     case OBJ_ENCODING_STREAM: return "stream";
743     default: return "unknown";
744     }
745 }
746 
747 /* =========================== Memory introspection ========================= */
748 
749 
750 /* This is an helper function with the goal of estimating the memory
751  * size of a radix tree that is used to store Stream IDs.
752  *
753  * Note: to guess the size of the radix tree is not trivial, so we
754  * approximate it considering 16 bytes of data overhead for each
755  * key (the ID), and then adding the number of bare nodes, plus some
756  * overhead due by the data and child pointers. This secret recipe
757  * was obtained by checking the average radix tree created by real
758  * workloads, and then adjusting the constants to get numbers that
759  * more or less match the real memory usage.
760  *
761  * Actually the number of nodes and keys may be different depending
762  * on the insertion speed and thus the ability of the radix tree
763  * to compress prefixes. */
streamRadixTreeMemoryUsage(rax * rax)764 size_t streamRadixTreeMemoryUsage(rax *rax) {
765     size_t size;
766     size = rax->numele * sizeof(streamID);
767     size += rax->numnodes * sizeof(raxNode);
768     /* Add a fixed overhead due to the aux data pointer, children, ... */
769     size += rax->numnodes * sizeof(long)*30;
770     return size;
771 }
772 
773 /* Returns the size in bytes consumed by the key's value in RAM.
774  * Note that the returned value is just an approximation, especially in the
775  * case of aggregated data types where only "sample_size" elements
776  * are checked and averaged to estimate the total size. */
777 #define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
objectComputeSize(robj * o,size_t sample_size)778 size_t objectComputeSize(robj *o, size_t sample_size) {
779     sds ele, ele2;
780     dict *d;
781     dictIterator *di;
782     struct dictEntry *de;
783     size_t asize = 0, elesize = 0, samples = 0;
784 
785     if (o->type == OBJ_STRING) {
786         if(o->encoding == OBJ_ENCODING_INT) {
787             asize = sizeof(*o);
788         } else if(o->encoding == OBJ_ENCODING_RAW) {
789             asize = sdsZmallocSize(o->ptr)+sizeof(*o);
790         } else if(o->encoding == OBJ_ENCODING_EMBSTR) {
791             asize = sdslen(o->ptr)+2+sizeof(*o);
792         } else {
793             serverPanic("Unknown string encoding");
794         }
795     } else if (o->type == OBJ_LIST) {
796         if (o->encoding == OBJ_ENCODING_QUICKLIST) {
797             quicklist *ql = o->ptr;
798             quicklistNode *node = ql->head;
799             asize = sizeof(*o)+sizeof(quicklist);
800             do {
801                 elesize += sizeof(quicklistNode)+ziplistBlobLen(node->zl);
802                 samples++;
803             } while ((node = node->next) && samples < sample_size);
804             asize += (double)elesize/samples*ql->len;
805         } else if (o->encoding == OBJ_ENCODING_ZIPLIST) {
806             asize = sizeof(*o)+ziplistBlobLen(o->ptr);
807         } else {
808             serverPanic("Unknown list encoding");
809         }
810     } else if (o->type == OBJ_SET) {
811         if (o->encoding == OBJ_ENCODING_HT) {
812             d = o->ptr;
813             di = dictGetIterator(d);
814             asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
815             while((de = dictNext(di)) != NULL && samples < sample_size) {
816                 ele = dictGetKey(de);
817                 elesize += sizeof(struct dictEntry) + sdsZmallocSize(ele);
818                 samples++;
819             }
820             dictReleaseIterator(di);
821             if (samples) asize += (double)elesize/samples*dictSize(d);
822         } else if (o->encoding == OBJ_ENCODING_INTSET) {
823             intset *is = o->ptr;
824             asize = sizeof(*o)+sizeof(*is)+(size_t)is->encoding*is->length;
825         } else {
826             serverPanic("Unknown set encoding");
827         }
828     } else if (o->type == OBJ_ZSET) {
829         if (o->encoding == OBJ_ENCODING_ZIPLIST) {
830             asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
831         } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
832             d = ((zset*)o->ptr)->dict;
833             zskiplist *zsl = ((zset*)o->ptr)->zsl;
834             zskiplistNode *znode = zsl->header->level[0].forward;
835             asize = sizeof(*o)+sizeof(zset)+sizeof(zskiplist)+sizeof(dict)+
836                     (sizeof(struct dictEntry*)*dictSlots(d))+
837                     zmalloc_size(zsl->header);
838             while(znode != NULL && samples < sample_size) {
839                 elesize += sdsZmallocSize(znode->ele);
840                 elesize += sizeof(struct dictEntry) + zmalloc_size(znode);
841                 samples++;
842                 znode = znode->level[0].forward;
843             }
844             if (samples) asize += (double)elesize/samples*dictSize(d);
845         } else {
846             serverPanic("Unknown sorted set encoding");
847         }
848     } else if (o->type == OBJ_HASH) {
849         if (o->encoding == OBJ_ENCODING_ZIPLIST) {
850             asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
851         } else if (o->encoding == OBJ_ENCODING_HT) {
852             d = o->ptr;
853             di = dictGetIterator(d);
854             asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
855             while((de = dictNext(di)) != NULL && samples < sample_size) {
856                 ele = dictGetKey(de);
857                 ele2 = dictGetVal(de);
858                 elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
859                 elesize += sizeof(struct dictEntry);
860                 samples++;
861             }
862             dictReleaseIterator(di);
863             if (samples) asize += (double)elesize/samples*dictSize(d);
864         } else {
865             serverPanic("Unknown hash encoding");
866         }
867     } else if (o->type == OBJ_STREAM) {
868         stream *s = o->ptr;
869         asize = sizeof(*o);
870         asize += streamRadixTreeMemoryUsage(s->rax);
871 
872         /* Now we have to add the listpacks. The last listpack is often non
873          * complete, so we estimate the size of the first N listpacks, and
874          * use the average to compute the size of the first N-1 listpacks, and
875          * finally add the real size of the last node. */
876         raxIterator ri;
877         raxStart(&ri,s->rax);
878         raxSeek(&ri,"^",NULL,0);
879         size_t lpsize = 0, samples = 0;
880         while(samples < sample_size && raxNext(&ri)) {
881             unsigned char *lp = ri.data;
882             lpsize += lpBytes(lp);
883             samples++;
884         }
885         if (s->rax->numele <= samples) {
886             asize += lpsize;
887         } else {
888             if (samples) lpsize /= samples; /* Compute the average. */
889             asize += lpsize * (s->rax->numele-1);
890             /* No need to check if seek succeeded, we enter this branch only
891              * if there are a few elements in the radix tree. */
892             raxSeek(&ri,"$",NULL,0);
893             raxNext(&ri);
894             asize += lpBytes(ri.data);
895         }
896         raxStop(&ri);
897 
898         /* Consumer groups also have a non trivial memory overhead if there
899          * are many consumers and many groups, let's count at least the
900          * overhead of the pending entries in the groups and consumers
901          * PELs. */
902         if (s->cgroups) {
903             raxStart(&ri,s->cgroups);
904             raxSeek(&ri,"^",NULL,0);
905             while(raxNext(&ri)) {
906                 streamCG *cg = ri.data;
907                 asize += sizeof(*cg);
908                 asize += streamRadixTreeMemoryUsage(cg->pel);
909                 asize += sizeof(streamNACK)*raxSize(cg->pel);
910 
911                 /* For each consumer we also need to add the basic data
912                  * structures and the PEL memory usage. */
913                 raxIterator cri;
914                 raxStart(&cri,cg->consumers);
915                 raxSeek(&cri,"^",NULL,0);
916                 while(raxNext(&cri)) {
917                     streamConsumer *consumer = cri.data;
918                     asize += sizeof(*consumer);
919                     asize += sdslen(consumer->name);
920                     asize += streamRadixTreeMemoryUsage(consumer->pel);
921                     /* Don't count NACKs again, they are shared with the
922                      * consumer group PEL. */
923                 }
924                 raxStop(&cri);
925             }
926             raxStop(&ri);
927         }
928     } else if (o->type == OBJ_MODULE) {
929         moduleValue *mv = o->ptr;
930         moduleType *mt = mv->type;
931         if (mt->mem_usage != NULL) {
932             asize = mt->mem_usage(mv->value);
933         } else {
934             asize = 0;
935         }
936     } else {
937         serverPanic("Unknown object type");
938     }
939     return asize;
940 }
941 
942 /* Release data obtained with getMemoryOverheadData(). */
freeMemoryOverheadData(struct redisMemOverhead * mh)943 void freeMemoryOverheadData(struct redisMemOverhead *mh) {
944     zfree(mh->db);
945     zfree(mh);
946 }
947 
948 /* Return a struct redisMemOverhead filled with memory overhead
949  * information used for the MEMORY OVERHEAD and INFO command. The returned
950  * structure pointer should be freed calling freeMemoryOverheadData(). */
getMemoryOverheadData(void)951 struct redisMemOverhead *getMemoryOverheadData(void) {
952     int j;
953     size_t mem_total = 0;
954     size_t mem = 0;
955     size_t zmalloc_used = zmalloc_used_memory();
956     struct redisMemOverhead *mh = zcalloc(sizeof(*mh));
957 
958     mh->total_allocated = zmalloc_used;
959     mh->startup_allocated = server.initial_memory_usage;
960     mh->peak_allocated = server.stat_peak_memory;
961     mh->total_frag =
962         (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
963     mh->total_frag_bytes =
964         server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
965     mh->allocator_frag =
966         (float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
967     mh->allocator_frag_bytes =
968         server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
969     mh->allocator_rss =
970         (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
971     mh->allocator_rss_bytes =
972         server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
973     mh->rss_extra =
974         (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
975     mh->rss_extra_bytes =
976         server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
977 
978     mem_total += server.initial_memory_usage;
979 
980     mem = 0;
981     if (server.repl_backlog)
982         mem += zmalloc_size(server.repl_backlog);
983     mh->repl_backlog = mem;
984     mem_total += mem;
985 
986     /* Computing the memory used by the clients would be O(N) if done
987      * here online. We use our values computed incrementally by
988      * clientsCronTrackClientsMemUsage(). */
989     mh->clients_slaves = server.stat_clients_type_memory[CLIENT_TYPE_SLAVE];
990     mh->clients_normal = server.stat_clients_type_memory[CLIENT_TYPE_MASTER]+
991                          server.stat_clients_type_memory[CLIENT_TYPE_PUBSUB]+
992                          server.stat_clients_type_memory[CLIENT_TYPE_NORMAL];
993     mem_total += mh->clients_slaves;
994     mem_total += mh->clients_normal;
995 
996     mem = 0;
997     if (server.aof_state != AOF_OFF) {
998         mem += sdsZmallocSize(server.aof_buf);
999         mem += aofRewriteBufferSize();
1000     }
1001     mh->aof_buffer = mem;
1002     mem_total+=mem;
1003 
1004     mem = server.lua_scripts_mem;
1005     mem += dictSize(server.lua_scripts) * sizeof(dictEntry) +
1006         dictSlots(server.lua_scripts) * sizeof(dictEntry*);
1007     mem += dictSize(server.repl_scriptcache_dict) * sizeof(dictEntry) +
1008         dictSlots(server.repl_scriptcache_dict) * sizeof(dictEntry*);
1009     if (listLength(server.repl_scriptcache_fifo) > 0) {
1010         mem += listLength(server.repl_scriptcache_fifo) * (sizeof(listNode) +
1011             sdsZmallocSize(listNodeValue(listFirst(server.repl_scriptcache_fifo))));
1012     }
1013     mh->lua_caches = mem;
1014     mem_total+=mem;
1015 
1016     for (j = 0; j < server.dbnum; j++) {
1017         redisDb *db = server.db+j;
1018         long long keyscount = dictSize(db->dict);
1019         if (keyscount==0) continue;
1020 
1021         mh->total_keys += keyscount;
1022         mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
1023         mh->db[mh->num_dbs].dbid = j;
1024 
1025         mem = dictSize(db->dict) * sizeof(dictEntry) +
1026               dictSlots(db->dict) * sizeof(dictEntry*) +
1027               dictSize(db->dict) * sizeof(robj);
1028         mh->db[mh->num_dbs].overhead_ht_main = mem;
1029         mem_total+=mem;
1030 
1031         mem = dictSize(db->expires) * sizeof(dictEntry) +
1032               dictSlots(db->expires) * sizeof(dictEntry*);
1033         mh->db[mh->num_dbs].overhead_ht_expires = mem;
1034         mem_total+=mem;
1035 
1036         mh->num_dbs++;
1037     }
1038 
1039     mh->overhead_total = mem_total;
1040     mh->dataset = zmalloc_used - mem_total;
1041     mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated;
1042 
1043     /* Metrics computed after subtracting the startup memory from
1044      * the total memory. */
1045     size_t net_usage = 1;
1046     if (zmalloc_used > mh->startup_allocated)
1047         net_usage = zmalloc_used - mh->startup_allocated;
1048     mh->dataset_perc = (float)mh->dataset*100/net_usage;
1049     mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
1050 
1051     return mh;
1052 }
1053 
1054 /* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
1055  * stats output. */
inputCatSds(void * result,const char * str)1056 void inputCatSds(void *result, const char *str) {
1057     /* result is actually a (sds *), so re-cast it here */
1058     sds *info = (sds *)result;
1059     *info = sdscat(*info, str);
1060 }
1061 
1062 /* This implements MEMORY DOCTOR. An human readable analysis of the Redis
1063  * memory condition. */
getMemoryDoctorReport(void)1064 sds getMemoryDoctorReport(void) {
1065     int empty = 0;          /* Instance is empty or almost empty. */
1066     int big_peak = 0;       /* Memory peak is much larger than used mem. */
1067     int high_frag = 0;      /* High fragmentation. */
1068     int high_alloc_frag = 0;/* High allocator fragmentation. */
1069     int high_proc_rss = 0;  /* High process rss overhead. */
1070     int high_alloc_rss = 0; /* High rss overhead. */
1071     int big_slave_buf = 0;  /* Slave buffers are too big. */
1072     int big_client_buf = 0; /* Client buffers are too big. */
1073     int many_scripts = 0;   /* Script cache has too many scripts. */
1074     int num_reports = 0;
1075     struct redisMemOverhead *mh = getMemoryOverheadData();
1076 
1077     if (mh->total_allocated < (1024*1024*5)) {
1078         empty = 1;
1079         num_reports++;
1080     } else {
1081         /* Peak is > 150% of current used memory? */
1082         if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
1083             big_peak = 1;
1084             num_reports++;
1085         }
1086 
1087         /* Fragmentation is higher than 1.4 and 10MB ?*/
1088         if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10<<20) {
1089             high_frag = 1;
1090             num_reports++;
1091         }
1092 
1093         /* External fragmentation is higher than 1.1 and 10MB? */
1094         if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10<<20) {
1095             high_alloc_frag = 1;
1096             num_reports++;
1097         }
1098 
1099         /* Allocator rss is higher than 1.1 and 10MB ? */
1100         if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10<<20) {
1101             high_alloc_rss = 1;
1102             num_reports++;
1103         }
1104 
1105         /* Non-Allocator rss is higher than 1.1 and 10MB ? */
1106         if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10<<20) {
1107             high_proc_rss = 1;
1108             num_reports++;
1109         }
1110 
1111         /* Clients using more than 200k each average? */
1112         long numslaves = listLength(server.slaves);
1113         long numclients = listLength(server.clients)-numslaves;
1114         if (mh->clients_normal / numclients > (1024*200)) {
1115             big_client_buf = 1;
1116             num_reports++;
1117         }
1118 
1119         /* Slaves using more than 10 MB each? */
1120         if (numslaves > 0 && mh->clients_slaves / numslaves > (1024*1024*10)) {
1121             big_slave_buf = 1;
1122             num_reports++;
1123         }
1124 
1125         /* Too many scripts are cached? */
1126         if (dictSize(server.lua_scripts) > 1000) {
1127             many_scripts = 1;
1128             num_reports++;
1129         }
1130     }
1131 
1132     sds s;
1133     if (num_reports == 0) {
1134         s = sdsnew(
1135         "Hi Sam, I can't find any memory issue in your instance. "
1136         "I can only account for what occurs on this base.\n");
1137     } else if (empty == 1) {
1138         s = sdsnew(
1139         "Hi Sam, this instance is empty or is using very little memory, "
1140         "my issues detector can't be used in these conditions. "
1141         "Please, leave for your mission on Earth and fill it with some data. "
1142         "The new Sam and I will be back to our programming as soon as I "
1143         "finished rebooting.\n");
1144     } else {
1145         s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
1146         if (big_peak) {
1147             s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
1148         }
1149         if (high_frag) {
1150             s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
1151         }
1152         if (high_alloc_frag) {
1153             s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
1154         }
1155         if (high_alloc_rss) {
1156             s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
1157         }
1158         if (high_proc_rss) {
1159             s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
1160         }
1161         if (big_slave_buf) {
1162             s = sdscat(s," * Big replica buffers: The replica output buffers in this instance are greater than 10MB for each replica (on average). This likely means that there is some replica instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what replica is not receiving data correctly and why. You can use the INFO output in order to check the replicas delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
1163         }
1164         if (big_client_buf) {
1165             s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
1166         }
1167         if (many_scripts) {
1168             s = sdscat(s," * Many scripts: There seem to be many cached scripts in this instance (more than 1000). This may be because scripts are generated and `EVAL`ed, instead of being parameterized (with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called periodically, the scripts' caches may end up consuming most of your memory.\n\n");
1169         }
1170         s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
1171     }
1172     freeMemoryOverheadData(mh);
1173     return s;
1174 }
1175 
1176 /* Set the object LRU/LFU depending on server.maxmemory_policy.
1177  * The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
1178  * The lru_idle and lru_clock args are only relevant if policy
1179  * is MAXMEMORY_FLAG_LRU.
1180  * Either or both of them may be <0, in that case, nothing is set. */
objectSetLRUOrLFU(robj * val,long long lfu_freq,long long lru_idle,long long lru_clock,int lru_multiplier)1181 int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
1182                        long long lru_clock, int lru_multiplier) {
1183     if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1184         if (lfu_freq >= 0) {
1185             serverAssert(lfu_freq <= 255);
1186             val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
1187             return 1;
1188         }
1189     } else if (lru_idle >= 0) {
1190         /* Provided LRU idle time is in seconds. Scale
1191          * according to the LRU clock resolution this Redis
1192          * instance was compiled with (normally 1000 ms, so the
1193          * below statement will expand to lru_idle*1000/1000. */
1194         lru_idle = lru_idle*lru_multiplier/LRU_CLOCK_RESOLUTION;
1195         long lru_abs = lru_clock - lru_idle; /* Absolute access time. */
1196         /* If the LRU field underflows (since LRU it is a wrapping
1197          * clock), the best we can do is to provide a large enough LRU
1198          * that is half-way in the circlular LRU clock we use: this way
1199          * the computed idle time for this object will stay high for quite
1200          * some time. */
1201         if (lru_abs < 0)
1202             lru_abs = (lru_clock+(LRU_CLOCK_MAX/2)) % LRU_CLOCK_MAX;
1203         val->lru = lru_abs;
1204         return 1;
1205     }
1206     return 0;
1207 }
1208 
1209 /* ======================= The OBJECT and MEMORY commands =================== */
1210 
1211 /* This is a helper function for the OBJECT command. We need to lookup keys
1212  * without any modification of LRU or other parameters. */
objectCommandLookup(client * c,robj * key)1213 robj *objectCommandLookup(client *c, robj *key) {
1214     return lookupKeyReadWithFlags(c->db,key,LOOKUP_NOTOUCH|LOOKUP_NONOTIFY);
1215 }
1216 
objectCommandLookupOrReply(client * c,robj * key,robj * reply)1217 robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
1218     robj *o = objectCommandLookup(c,key);
1219 
1220     if (!o) addReply(c, reply);
1221     return o;
1222 }
1223 
1224 /* Object command allows to inspect the internals of a Redis Object.
1225  * Usage: OBJECT <refcount|encoding|idletime|freq> <key> */
objectCommand(client * c)1226 void objectCommand(client *c) {
1227     robj *o;
1228 
1229     if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
1230         const char *help[] = {
1231 "ENCODING <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
1232 "FREQ <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
1233 "IDLETIME <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
1234 "REFCOUNT <key> -- Return the number of references of the value associated with the specified key.",
1235 NULL
1236         };
1237         addReplyHelp(c, help);
1238     } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
1239         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1240                 == NULL) return;
1241         addReplyLongLong(c,o->refcount);
1242     } else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) {
1243         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1244                 == NULL) return;
1245         addReplyBulkCString(c,strEncoding(o->encoding));
1246     } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
1247         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1248                 == NULL) return;
1249         if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1250             addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1251             return;
1252         }
1253         addReplyLongLong(c,estimateObjectIdleTime(o)/1000);
1254     } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) {
1255         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
1256                 == NULL) return;
1257         if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
1258             addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1259             return;
1260         }
1261         /* LFUDecrAndReturn should be called
1262          * in case of the key has not been accessed for a long time,
1263          * because we update the access time only
1264          * when the key is read or overwritten. */
1265         addReplyLongLong(c,LFUDecrAndReturn(o));
1266     } else {
1267         addReplySubcommandSyntaxError(c);
1268     }
1269 }
1270 
1271 /* The memory command will eventually be a complete interface for the
1272  * memory introspection capabilities of Redis.
1273  *
1274  * Usage: MEMORY usage <key> */
memoryCommand(client * c)1275 void memoryCommand(client *c) {
1276     if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
1277         const char *help[] = {
1278 "DOCTOR - Return memory problems reports.",
1279 "MALLOC-STATS -- Return internal statistics report from the memory allocator.",
1280 "PURGE -- Attempt to purge dirty pages for reclamation by the allocator.",
1281 "STATS -- Return information about the memory usage of the server.",
1282 "USAGE <key> [SAMPLES <count>] -- Return memory in bytes used by <key> and its value. Nested values are sampled up to <count> times (default: 5).",
1283 NULL
1284         };
1285         addReplyHelp(c, help);
1286     } else if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) {
1287         dictEntry *de;
1288         long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
1289         for (int j = 3; j < c->argc; j++) {
1290             if (!strcasecmp(c->argv[j]->ptr,"samples") &&
1291                 j+1 < c->argc)
1292             {
1293                 if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
1294                      == C_ERR) return;
1295                 if (samples < 0) {
1296                     addReply(c,shared.syntaxerr);
1297                     return;
1298                 }
1299                 if (samples == 0) samples = LLONG_MAX;;
1300                 j++; /* skip option argument. */
1301             } else {
1302                 addReply(c,shared.syntaxerr);
1303                 return;
1304             }
1305         }
1306         if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
1307             addReplyNull(c);
1308             return;
1309         }
1310         size_t usage = objectComputeSize(dictGetVal(de),samples);
1311         usage += sdsZmallocSize(dictGetKey(de));
1312         usage += sizeof(dictEntry);
1313         addReplyLongLong(c,usage);
1314     } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
1315         struct redisMemOverhead *mh = getMemoryOverheadData();
1316 
1317         addReplyMapLen(c,25+mh->num_dbs);
1318 
1319         addReplyBulkCString(c,"peak.allocated");
1320         addReplyLongLong(c,mh->peak_allocated);
1321 
1322         addReplyBulkCString(c,"total.allocated");
1323         addReplyLongLong(c,mh->total_allocated);
1324 
1325         addReplyBulkCString(c,"startup.allocated");
1326         addReplyLongLong(c,mh->startup_allocated);
1327 
1328         addReplyBulkCString(c,"replication.backlog");
1329         addReplyLongLong(c,mh->repl_backlog);
1330 
1331         addReplyBulkCString(c,"clients.slaves");
1332         addReplyLongLong(c,mh->clients_slaves);
1333 
1334         addReplyBulkCString(c,"clients.normal");
1335         addReplyLongLong(c,mh->clients_normal);
1336 
1337         addReplyBulkCString(c,"aof.buffer");
1338         addReplyLongLong(c,mh->aof_buffer);
1339 
1340         addReplyBulkCString(c,"lua.caches");
1341         addReplyLongLong(c,mh->lua_caches);
1342 
1343         for (size_t j = 0; j < mh->num_dbs; j++) {
1344             char dbname[32];
1345             snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
1346             addReplyBulkCString(c,dbname);
1347             addReplyMapLen(c,2);
1348 
1349             addReplyBulkCString(c,"overhead.hashtable.main");
1350             addReplyLongLong(c,mh->db[j].overhead_ht_main);
1351 
1352             addReplyBulkCString(c,"overhead.hashtable.expires");
1353             addReplyLongLong(c,mh->db[j].overhead_ht_expires);
1354         }
1355 
1356         addReplyBulkCString(c,"overhead.total");
1357         addReplyLongLong(c,mh->overhead_total);
1358 
1359         addReplyBulkCString(c,"keys.count");
1360         addReplyLongLong(c,mh->total_keys);
1361 
1362         addReplyBulkCString(c,"keys.bytes-per-key");
1363         addReplyLongLong(c,mh->bytes_per_key);
1364 
1365         addReplyBulkCString(c,"dataset.bytes");
1366         addReplyLongLong(c,mh->dataset);
1367 
1368         addReplyBulkCString(c,"dataset.percentage");
1369         addReplyDouble(c,mh->dataset_perc);
1370 
1371         addReplyBulkCString(c,"peak.percentage");
1372         addReplyDouble(c,mh->peak_perc);
1373 
1374         addReplyBulkCString(c,"allocator.allocated");
1375         addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
1376 
1377         addReplyBulkCString(c,"allocator.active");
1378         addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
1379 
1380         addReplyBulkCString(c,"allocator.resident");
1381         addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
1382 
1383         addReplyBulkCString(c,"allocator-fragmentation.ratio");
1384         addReplyDouble(c,mh->allocator_frag);
1385 
1386         addReplyBulkCString(c,"allocator-fragmentation.bytes");
1387         addReplyLongLong(c,mh->allocator_frag_bytes);
1388 
1389         addReplyBulkCString(c,"allocator-rss.ratio");
1390         addReplyDouble(c,mh->allocator_rss);
1391 
1392         addReplyBulkCString(c,"allocator-rss.bytes");
1393         addReplyLongLong(c,mh->allocator_rss_bytes);
1394 
1395         addReplyBulkCString(c,"rss-overhead.ratio");
1396         addReplyDouble(c,mh->rss_extra);
1397 
1398         addReplyBulkCString(c,"rss-overhead.bytes");
1399         addReplyLongLong(c,mh->rss_extra_bytes);
1400 
1401         addReplyBulkCString(c,"fragmentation"); /* this is the total RSS overhead, including fragmentation */
1402         addReplyDouble(c,mh->total_frag); /* it is kept here for backwards compatibility */
1403 
1404         addReplyBulkCString(c,"fragmentation.bytes");
1405         addReplyLongLong(c,mh->total_frag_bytes);
1406 
1407         freeMemoryOverheadData(mh);
1408     } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
1409 #if defined(USE_JEMALLOC)
1410         sds info = sdsempty();
1411         je_malloc_stats_print(inputCatSds, &info, NULL);
1412         addReplyVerbatim(c,info,sdslen(info),"txt");
1413         sdsfree(info);
1414 #else
1415         addReplyBulkCString(c,"Stats not supported for the current allocator");
1416 #endif
1417     } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
1418         sds report = getMemoryDoctorReport();
1419         addReplyVerbatim(c,report,sdslen(report),"txt");
1420         sdsfree(report);
1421     } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) {
1422         if (jemalloc_purge() == 0)
1423             addReply(c, shared.ok);
1424         else
1425             addReplyError(c, "Error purging dirty pages");
1426     } else {
1427         addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try MEMORY HELP", (char*)c->argv[1]->ptr);
1428     }
1429 }
1430